Skip to content

小彭老师大大,有关mpm的流体模拟函数优化 #10

@dd123-a

Description

@dd123-a

void Simulate() {

// CLEAR GRID
std::size_t grid_size = grid.size();

// 确保 grid_size 不超出 int 的范围

#pragma omp parallel for
for (int i = 0; i < static_cast(grid_size); ++i) {
grid[i].vel = glm::vec3(0.0f);
grid[i].mass = 0.0f;
}

// P2G_1
//质量与动量转移
 #pragma omp parallel for
for (int i = 0; i < particles.size(); i++) {
    auto &p = particles[i];
    glm::uvec3 cell_idx = glm::uvec3(p.pos);
    glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

    glm::vec3 weights_[3];
    weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
    weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
    weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

    for (uint32_t gx = 0; gx < 3; ++gx) {
        for (uint32_t gy = 0; gy < 3; ++gy) {
            for (uint32_t gz = 0; gz < 3; ++gz) {
                float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                 cell_idx.y + gy - 1,
                                                 cell_idx.z + gz - 1);
                glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;
                glm::vec3 Q = p.C * cell_dist;

                int cell_index = (int) cell_pos.x +
                                 (int) cell_pos.y * grid_res +
                                 (int) cell_pos.z * grid_res * grid_res;

                float mass_contrib = weight * particle_mass;
                grid[cell_index].mass += mass_contrib;
                grid[cell_index].vel += mass_contrib * (p.vel + Q);
            }

        }
    }
}

// P2G_2
//密度计算和压力计算
 #pragma omp parallel for
for (int i = 0; i < particles.size(); i++) {
    auto &p = particles[i];

    glm::uvec3 cell_idx = glm::uvec3(p.pos);
    glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

    glm::vec3 weights_[3];
    weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
    weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
    weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

    float density = 0.0f;
    for (uint32_t gx = 0; gx < 3; ++gx) {
        for (uint32_t gy = 0; gy < 3; ++gy) {
            for (uint32_t gz = 0; gz < 3; ++gz) {
                float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                 cell_idx.y + gy - 1,
                                                 cell_idx.z + gz - 1);

                int cell_index = (int) cell_pos.x +
                                 (int) cell_pos.y * grid_res +
                                 (int) cell_pos.z * grid_res * grid_res;

                density += grid[cell_index].mass * weight;
            }
        }
    }

    float volume = particle_mass / density;
    float pressure = std::max(-0.1f, eos_stiffness *
                                     (std::pow(density / rest_density, eos_power) - 1.0f));

    glm::mat3 stress = glm::mat3(
            -pressure, 0, 0,
            0, -pressure, 0,
            0, 0, -pressure
    );

     glm::mat3 strain = p.C;

     //float trace = strain[0][0] + strain[1][0] + strain[2][0]; // DEBUG
     float trace = glm::determinant(strain);
     strain[0][0] = strain[1][0] = strain[2][0] = trace;

     glm::mat3 viscosity_term = dynamic_viscosity * strain;
     stress += viscosity_term;

    auto eq_16_term_0 = -volume * 4 * stress * dt;

    for (uint32_t gx = 0; gx < 3; ++gx) {
        for (uint32_t gy = 0; gy < 3; ++gy) {
            for (uint32_t gz = 0; gz < 3; ++gz) {
                float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;

                glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                                                 cell_idx.y + gy - 1,
                                                 cell_idx.z + gz - 1);

                glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;

                int cell_index = (int) cell_pos.x +
                                 (int) cell_pos.y * grid_res +
                                 (int) cell_pos.z * grid_res * grid_res;

                glm::vec3 momentum = (eq_16_term_0 * weight) * cell_dist;
                grid[cell_index].vel += momentum;
            }
        }
    }
}

// GRID UPDATE
// 获取 grid 的大小
grid_size = grid.size();

// GRID UPDATE

#pragma omp parallel for
for (int i = 0; i < static_cast(grid_size); ++i) {
auto& cell = grid[static_caststd::size_t(i)]; // 使用 static_cast 将索引转换为 std::size_t
if (cell.mass > 0) {
cell.vel /= cell.mass;
cell.vel += dt * glm::vec3(0.0f, gravity, 0.0f);

        int index = cell.index;
        int x = index / (grid_res * grid_res);
        index /= grid_res;
        int y = (index / grid_res) % grid_res;
        index /= grid_res;
        int z = index;

        if (x < 1 || x > grid_res - 2) { cell.vel.x = 0.0f; }
        if (y < 1 || y > grid_res - 2) { cell.vel.y = 0.0f; }
        if (z < 1 || z > grid_res - 2) { cell.vel.z = 0.0f; }
    }
}


// G2P
 #pragma omp parallel for
for (int i = 0; i < static_cast<int>(particles.size()); ++i) {
    auto& p = particles[static_cast<std::size_t>(i)]; // 使用索引访问粒子

    p.vel = glm::vec3(0.0f);

    glm::uvec3 cell_idx = glm::uvec3(p.pos);
    glm::vec3 cell_diff = (p.pos - glm::vec3(cell_idx)) - 0.5f;

    glm::vec3 weights_[3];

    weights_[0] = 0.5f * glm::pow(0.5f - cell_diff, glm::vec3(2.0f));
    weights_[1] = 0.75f - glm::pow(cell_diff, glm::vec3(2.0f));
    weights_[2] = 0.5f * glm::pow(0.5f + cell_diff, glm::vec3(2.0f));

    glm::mat3 B = glm::mat3(0.0f);
    for (uint32_t gx = 0; gx < 3; ++gx) {
        for (uint32_t gy = 0; gy < 3; ++gy) {
            for (uint32_t gz = 0; gz < 3; ++gz) {
                float weight = weights_[gx].x * weights_[gy].y * weights_[gz].z;
                // std::cout << weight << std::endl;
                glm::uvec3 cell_pos = glm::uvec3(cell_idx.x + gx - 1,
                    cell_idx.y + gy - 1,
                    cell_idx.z + gz - 1);

                glm::vec3 cell_dist = (glm::vec3(cell_pos) - p.pos) + 0.5f;

                int cell_index = (int)cell_pos.x +
                    (int)cell_pos.y * grid_res +
                    (int)cell_pos.z * grid_res * grid_res;

                glm::vec3 weighted_velocity = grid[cell_index].vel * weight;

                B += glm::mat3(weighted_velocity * cell_dist.x,
                    weighted_velocity * cell_dist.y,
                    weighted_velocity * cell_dist.z);

                p.vel += weighted_velocity;
            }
        }
    }

    p.C = B * 4.0f;
    p.vel *= damping;
    p.pos += p.vel * dt;
    p.pos = glm::clamp(p.pos, 1.0f, grid_res - 2.0f);

    glm::vec3 x_n = p.pos + p.vel;
    const float wall_min = 3.0f;
    const float wall_max = grid_res - 4.0f;
    if (x_n.x < wall_min) p.vel.x += (wall_min - x_n.x);
    if (x_n.x > wall_max) p.vel.x += (wall_max - x_n.x);
    if (x_n.y < wall_min) p.vel.y += (wall_min - x_n.y);
    if (x_n.y > wall_max) p.vel.y += (wall_max - x_n.y);
    if (x_n.z < wall_min) p.vel.z += (wall_min - x_n.z);
    if (x_n.z > wall_max) p.vel.z += (wall_max - x_n.z);
}

}这是使用openmp的进行了加速,但是我有在youtube上看到可以对其用simd指令优化到单线程的也能流畅运行的程度,但本人对此一窍不通(悲),所以来探讨一下是否有可行性。(本人尝试过使用simd进行改写,但是debug的太痛苦了,模板元什么的最讨厌了)https://github.com/Hab5/mls-mpm这是对应的完整代码仓库,https://www.youtube.com/watch?v=4Y58Pg9tpSo这是对应的YouTube视频介绍(梦开始的地方)

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions