gempy-project · Leguark · May 3, 2025 · May 3, 2025 · May 3, 2025 · May 3, 2025
diff --git a/docs/dev_logs/2025_May.md b/docs/dev_logs/2025_May.md
@@ -0,0 +1,27 @@
+# TODO:
+
+--- General
+- [ ] Create a proper suit of tests
+  - [ ] Keep extracting code into modules and functions 
+  - [ ] Posterior analysis for gempy 
+- [ ] Investigate speed
+- [ ] Investigate segmentation function 
+- [ ] Create a robust api to make sure we do not mess up assigning priors to gempy parameters
+- [ ] do we need to use index_put or just normal indexing?
+--- Vector examples
+- [ ] Bring the Vector example here...
+- [ ] Using categories as likelihood function (maybe I need to check out with Sam)
+--- MinEye
+- [ ] Revive magnetics
+- [ ] Add Simon's example
+
+### Doing:
+- [ ] Compressing code
+- [ ] GemPy posterior visualization
+
+### Possible Design:
+- Geological Model
+- Priors
+- Likelihood
+----
+- Having a runner or something like that?
diff --git a/tests/test_prob_modeling/__init__.py → ...lity/modules/model_definition/__init__.py b/tests/test_prob_modeling/__init__.py → ...lity/modules/model_definition/__init__.py
diff --git a/gempy_probability/modules/model_definition/model_examples.py b/gempy_probability/modules/model_definition/model_examples.py
@@ -0,0 +1,80 @@
+import gempy as gp
+import gempy.core.data
+import gempy_engine
+from gempy.modules.data_manipulation.engine_factory import interpolation_input_from_structural_frame
+
+import pyro
+import pyro.distributions as dist
+import torch
+
+
+def model(geo_model: gempy.core.data.GeoModel, normal, y_obs_list):
+    """
+    This Pyro model represents the probabilistic aspects of the geological model.
+    It defines a prior distribution for the top layer's location and 
+    computes the thickness of the geological layer as an observed variable.
+    """
+    # Define prior for the top layer's location:
+    # region Prior definition
+
+    mu_top = pyro.sample(
+        name=r'$\mu_{top}$',
+        fn=normal
+    )
+    # endregion
+
+    # region Prior injection into the gempy model
+
+    # * Update the model with the new top layer's location
+    interpolation_input = interpolation_input_from_structural_frame(geo_model)
+
+    if True: # ?? I need to figure out if we need the index_put or not
+        indices__ = (torch.tensor([0]), torch.tensor([2]))  # * This has to be Tensors
+        new_tensor: torch.Tensor = torch.index_put(
+            input=interpolation_input.surface_points.sp_coords,
+            indices=indices__,
+            values=mu_top
+        )
+        interpolation_input.surface_points.sp_coords = new_tensor
+    else:
+        interpolation_input.surface_points.sp_coords[0, 2] = mu_top
+
+    # endregion
+
+    # region Forward model computation
+
+    # * Compute the geological model
+    geo_model.solutions = gempy_engine.compute_model(
+        interpolation_input=interpolation_input,
+        options=geo_model.interpolation_options,
+        data_descriptor=geo_model.input_data_descriptor,
+        geophysics_input=geo_model.geophysics_input,
+    )
+
+    # Compute and observe the thickness of the geological layer
+    model_solutions: gp.data.Solutions = geo_model.solutions
+    thickness = _likelihood(model_solutions)
+
+    # endregion
+
+    posterior_dist_normal = dist.Normal(thickness, 25)
+
+    # * This is used automagically by pyro to compute the log-probability
+    y_thickness = pyro.sample(
+        name=r'$y_{thickness}$',
+        fn=posterior_dist_normal,
+        obs=y_obs_list
+    )
+
+
+def _likelihood(model_solutions: gp.data.Solutions) -> torch.Tensor:
+    """
+    This function computes the thickness of the geological layer.
+    """
+    simulated_well = model_solutions.octrees_output[0].last_output_center.custom_grid_values
+    thickness = simulated_well.sum()
+    pyro.deterministic(
+        name=r'$\mu_{thickness}$',
+        value=thickness.detach()  # * This is only for az to track progress
+    )
+    return thickness
diff --git a/tests/test_prob_model/__init__.py b/tests/test_prob_model/__init__.py
diff --git a/tests/test_prob_model/test_prob_I.py b/tests/test_prob_model/test_prob_I.py
@@ -0,0 +1,144 @@
+import numpy as np
+import os
+import pyro.distributions as dist
+import torch
+
+import gempy as gp
+from gempy_engine.core.backend_tensor import BackendTensor
+
+
+def test_basic_gempy_I() -> None:
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    data_path = os.path.abspath(os.path.join(current_dir, '..', '..', 'examples', 'tutorials', 'data'))
+    geo_model = gp.create_geomodel(
+        project_name='Wells',
+        extent=[0, 12000, -500, 500, 0, 4000],
+        refinement=3,
+        importer_helper=gp.data.ImporterHelper(
+            path_to_orientations=os.path.join(data_path, "2-layers", "2-layers_orientations.csv"),
+            path_to_surface_points=os.path.join(data_path, "2-layers", "2-layers_surface_points.csv")
+        )
+    )
+
+    # TODO: Convert this into an options preset
+    geo_model.interpolation_options.uni_degree = 0
+    geo_model.interpolation_options.mesh_extraction = False
+    geo_model.interpolation_options.sigmoid_slope = 1100.
+
+    # region Minimal grid for the specific likelihood function
+    x_loc = 6000
+    y_loc = 0
+    z_loc = np.linspace(0, 4000, 100)
+    xyz_coord = np.array([[x_loc, y_loc, z] for z in z_loc])
+    gp.set_custom_grid(geo_model.grid, xyz_coord=xyz_coord)
+    # endregion
+
+    geo_model.grid.active_grids = gp.data.Grid.GridTypes.CUSTOM
+    assert geo_model.grid.values.shape[0] == 100, "Custom grid should have 100 cells"
+    gp.compute_model(gempy_model=geo_model)
+
+    # TODO: Make this a more elegant way 
+    BackendTensor.change_backend_gempy(engine_backend=gp.data.AvailableBackends.PYTORCH)
+
+    normal = dist.Normal(
+        loc=(geo_model.surface_points_copy_transformed.xyz[0, 2]),
+        scale=torch.tensor(0.1, dtype=torch.float64)
+    )
+
+    from gempy_probability.modules.model_definition.model_examples import model
+    _prob_run(
+        geo_model=geo_model,
+        prob_model=model,
+        normal=normal,
+        y_obs_list=torch.tensor([200, 210, 190])
+    )
+
+
+def _prob_run(geo_model: gp.data.GeoModel, prob_model: callable,
+              normal: torch.distributions.Distribution, y_obs_list: torch.Tensor) -> None:
+    # Run prior sampling and visualization
+    from pyro.infer import Predictive
+    import pyro
+    import arviz as az
+    import matplotlib.pyplot as plt
+
+    from pyro.infer import NUTS
+    from pyro.infer import MCMC
+    from pyro.infer.autoguide import init_to_mean
+
+    # region prior sampling
+    predictive = Predictive(
+        model=prob_model,
+        num_samples=50
+    )
+    prior = predictive(geo_model, normal, y_obs_list)
+
+    data = az.from_pyro(prior=prior)
+    az.plot_trace(data.prior)
+    plt.show()
+
+    # endregion
+
+    # region inference
+    pyro.primitives.enable_validation(is_validate=True)
+    nuts_kernel = NUTS(
+        prob_model,
+        step_size=0.0085,
+        adapt_step_size=True,
+        target_accept_prob=0.9,
+        max_tree_depth=10,
+        init_strategy=init_to_mean
+    )
+    mcmc = MCMC(
+        kernel=nuts_kernel,
+        num_samples=200,
+        warmup_steps=50,
+        disable_validation=False
+    )
+    mcmc.run(geo_model, normal, y_obs_list)
+    posterior_samples = mcmc.get_samples()
+    posterior_predictive_fn = Predictive(
+        model=prob_model,
+        posterior_samples=posterior_samples
+    )
+    posterior_predictive = posterior_predictive_fn(geo_model, normal, y_obs_list)
+    data = az.from_pyro(posterior=mcmc, prior=prior, posterior_predictive=posterior_predictive)
+    # endregion
+
+    az.plot_trace(data)
+    plt.show()
+    from gempy_probability.modules.plot.plot_posterior import default_red, default_blue
+    az.plot_density(
+        data=[data.posterior_predictive, data.prior_predictive],
+        shade=.9,
+        var_names=[r'$\mu_{thickness}$'],
+        data_labels=["Posterior Predictive", "Prior Predictive"],
+        colors=[default_red, default_blue],
+    )
+    plt.show()
+    az.plot_density(
+        data=[data, data.prior],
+        shade=.9,
+        hdi_prob=.99,
+        data_labels=["Posterior", "Prior"],
+        colors=[default_red, default_blue],
+    )
+    plt.show()
+
+    # Test posterior mean values
+    posterior_top_mean = float(data.posterior[r'$\mu_{top}$'].mean())
+    target_top = 0.00875
+    target_thickness = 223
+    assert abs(posterior_top_mean - target_top) < 0.0200, f"Top layer mean {posterior_top_mean} outside expected range"
+    posterior_thickness_mean = float(data.posterior_predictive[r'$\mu_{thickness}$'].mean())
+    assert abs(posterior_thickness_mean - target_thickness) < 5, f"Thickness mean {posterior_thickness_mean} outside expected range"
+    # Test convergence diagnostics
+    assert float(data.sample_stats.diverging.sum()) == 0, "MCMC sampling has divergences"
+
+    print("Posterior mean values:")
+    print(f"Top layer mean: {posterior_top_mean}")
+    print(f"Thickness mean: {posterior_thickness_mean}")
+    print("MCMC convergence diagnostics:")
+    print(f"Divergences: {float(data.sample_stats.diverging.sum())}")
+    print(f"Acceptance rate: {float(data.sample_stats.acceptance_rate.mean())}")
+    print(f"Mean tree depth: {float(data.sample_stats.mean_tree_depth.mean())}")