小彭老师大大，有关mpm的流体模拟函数优化

void Simulate() {

    // CLEAR GRID
    std::size_t grid_size = grid.size();

    // 确保 grid_size 不超出 int 的范围
#pragma omp parallel for
    for (int i = 0; i < static_cast<int>(grid_size); ++i) {
        grid[i].vel = glm::vec3(0.0f);
        grid[i].mass = 0.0f;
    }


    // P2G_1
    //质量与动量转移
     #pragma omp parallel for
    for (int i = 0; i < particles.size(); i++) {
        auto &p = particles[i];
        glm::uvec3 cell_idx = glm::uvec3(p.pos);
        glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

        glm::vec3 weights_[3];
        weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
        weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
        weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

        for (uint32_t gx = 0; gx < 3; ++gx) {
            for (uint32_t gy = 0; gy < 3; ++gy) {
                for (uint32_t gz = 0; gz < 3; ++gz) {
                    float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                    glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                     cell_idx.y + gy - 1,
                                                     cell_idx.z + gz - 1);
                    glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;
                    glm::vec3 Q = p.C * cell_dist;

                    int cell_index = (int) cell_pos.x +
                                     (int) cell_pos.y * grid_res +
                                     (int) cell_pos.z * grid_res * grid_res;

                    float mass_contrib = weight * particle_mass;
                    grid[cell_index].mass += mass_contrib;
                    grid[cell_index].vel += mass_contrib * (p.vel + Q);
                }

            }
        }
    }

    // P2G_2
    //密度计算和压力计算
     #pragma omp parallel for
    for (int i = 0; i < particles.size(); i++) {
        auto &p = particles[i];

        glm::uvec3 cell_idx = glm::uvec3(p.pos);
        glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

        glm::vec3 weights_[3];
        weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
        weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
        weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

        float density = 0.0f;
        for (uint32_t gx = 0; gx < 3; ++gx) {
            for (uint32_t gy = 0; gy < 3; ++gy) {
                for (uint32_t gz = 0; gz < 3; ++gz) {
                    float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                    glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                     cell_idx.y + gy - 1,
                                                     cell_idx.z + gz - 1);

                    int cell_index = (int) cell_pos.x +
                                     (int) cell_pos.y * grid_res +
                                     (int) cell_pos.z * grid_res * grid_res;

                    density += grid[cell_index].mass * weight;
                }
            }
        }

        float volume = particle_mass / density;
        float pressure = std::max(-0.1f, eos_stiffness *
                                         (std::pow(density / rest_density, eos_power) - 1.0f));

        glm::mat3 stress = glm::mat3(
                -pressure, 0, 0,
                0, -pressure, 0,
                0, 0, -pressure
        );

         glm::mat3 strain = p.C;

         //float trace = strain[0][0] + strain[1][0] + strain[2][0]; // DEBUG
         float trace = glm::determinant(strain);
         strain[0][0] = strain[1][0] = strain[2][0] = trace;

         glm::mat3 viscosity_term = dynamic_viscosity * strain;
         stress += viscosity_term;

        auto eq_16_term_0 = -volume * 4 * stress * dt;

        for (uint32_t gx = 0; gx < 3; ++gx) {
            for (uint32_t gy = 0; gy < 3; ++gy) {
                for (uint32_t gz = 0; gz < 3; ++gz) {
                    float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                    glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                     cell_idx.y + gy - 1,
                                                     cell_idx.z + gz - 1);

                    glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;

                    int cell_index = (int) cell_pos.x +
                                     (int) cell_pos.y * grid_res +
                                     (int) cell_pos.z * grid_res * grid_res;

                    glm::vec3 momentum = (eq_16_term_0 * weight) * cell_dist;
                    grid[cell_index].vel += momentum;
                }
            }
        }
    }

    // GRID UPDATE
    // 获取 grid 的大小
    grid_size = grid.size();

    // GRID UPDATE
#pragma omp parallel for
    for (int i = 0; i < static_cast<int>(grid_size); ++i) {
        auto& cell = grid[static_cast<std::size_t>(i)]; // 使用 static_cast 将索引转换为 std::size_t
        if (cell.mass > 0) {
            cell.vel /= cell.mass;
            cell.vel += dt * glm::vec3(0.0f, gravity, 0.0f);

            int index = cell.index;
            int x = index / (grid_res * grid_res);
            index /= grid_res;
            int y = (index / grid_res) % grid_res;
            index /= grid_res;
            int z = index;

            if (x < 1 || x > grid_res - 2) { cell.vel.x = 0.0f; }
            if (y < 1 || y > grid_res - 2) { cell.vel.y = 0.0f; }
            if (z < 1 || z > grid_res - 2) { cell.vel.z = 0.0f; }
        }
    }


    // G2P
     #pragma omp parallel for
    for (int i = 0; i < static_cast<int>(particles.size()); ++i) {
        auto& p = particles[static_cast<std::size_t>(i)]; // 使用索引访问粒子

        p.vel = glm::vec3(0.0f);

        glm::uvec3 cell_idx = glm::uvec3(p.pos);
        glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

        glm::vec3 weights_[3];

        weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
        weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
        weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

        glm::mat3 B = glm::mat3(0.0f);
        for (uint32_t gx = 0; gx < 3; ++gx) {
            for (uint32_t gy = 0; gy < 3; ++gy) {
                for (uint32_t gz = 0; gz < 3; ++gz) {
                    float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;
                    // std::cout << weight << std::endl;
                    glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                        cell_idx.y + gy - 1,
                        cell_idx.z + gz - 1);

                    glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;

                    int cell_index = (int)cell_pos.x +
                        (int)cell_pos.y * grid_res +
                        (int)cell_pos.z * grid_res * grid_res;

                    glm::vec3 weighted_velocity = grid[cell_index].vel * weight;

                    B += glm::mat3(weighted_velocity * cell_dist.x,
                        weighted_velocity * cell_dist.y,
                        weighted_velocity * cell_dist.z);

                    p.vel += weighted_velocity;
                }
            }
        }

        p.C = B * 4.0f;
        p.vel *= damping;
        p.pos += p.vel * dt;
        p.pos = glm::clamp(p.pos, 1.0f, grid_res - 2.0f);

        glm::vec3 x_n = p.pos + p.vel;
        const float wall_min = 3.0f;
        const float wall_max = grid_res - 4.0f;
        if (x_n.x < wall_min) p.vel.x += (wall_min - x_n.x);
        if (x_n.x > wall_max) p.vel.x += (wall_max - x_n.x);
        if (x_n.y < wall_min) p.vel.y += (wall_min - x_n.y);
        if (x_n.y > wall_max) p.vel.y += (wall_max - x_n.y);
        if (x_n.z < wall_min) p.vel.z += (wall_min - x_n.z);
        if (x_n.z > wall_max) p.vel.z += (wall_max - x_n.z);
    }

}这是使用openmp的进行了加速，但是我有在youtube上看到可以对其用simd指令优化到单线程的也能流畅运行的程度，但本人对此一窍不通（悲），所以来探讨一下是否有可行性。（本人尝试过使用simd进行改写，但是debug的太痛苦了，模板元什么的最讨厌了）https://github.com/Hab5/mls-mpm这是对应的完整代码仓库，https://www.youtube.com/watch?v=4Y58Pg9tpSo这是对应的YouTube视频介绍（梦开始的地方）

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

小彭老师大大，有关mpm的流体模拟函数优化 #10

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

小彭老师大大，有关mpm的流体模拟函数优化 #10

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions