Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 159 additions & 17 deletions dimsim/compute/apps.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import openmm
import openmm.app
from openff.toolkit import Topology
from parsl import File
from parsl.app.app import python_app

from dimsim.configs.compute.density import DensityConfig
from dimsim.configs.compute.equilibration import EquilibrationConfig
from dimsim.configs.compute.minimization import MinimizationConfig, default_minimization_config


@python_app
Expand Down Expand Up @@ -104,30 +108,28 @@ def prepare_openmm_system(


@python_app
def minimize_energy(
system_future: dict[str, DensityConfig | openmm.System], job_dir: str
) -> dict[str, DensityConfig | float]:
def prepare_openmm_simulation(
system_future: dict[str, DensityConfig | openmm.System],
job_dir: str,
) -> dict[str, DensityConfig | tuple[File, File, File, File]]:
import logging

import openmm
import openmm.app
import openmm.unit
from openff.toolkit import Molecule, Topology

logging.basicConfig(
filename=f"{job_dir}/simulation.log",
level=logging.INFO,
)
logging.info("Starting energy minimization app")
logging.info("Starting OpenMM simulation creation app")
system = system_future["openmm_system"]

data_entry = system_future["compute_config"]["target"]
compute_config: DensityConfig = system_future["compute_config"]
data_entry = compute_config["target"]

# this should be in Kelvin
temperature = system_future["compute_config"]["target"]["temperature"]
temperature = compute_config["target"]["temperature"]

packed_topology_file = f"{job_dir}/packed_topology.pdb"
minimized_topology_file = f"{job_dir}/minimized_topology.pdb"

molecules = [Molecule.from_smiles(smiles) for smiles in data_entry["smiles"]]

Expand All @@ -142,20 +144,75 @@ def minimize_energy(
integrator=openmm.LangevinMiddleIntegrator(
temperature * openmm.unit.kelvin,
1.0 / openmm.unit.picosecond,
1.0 * openmm.unit.femtoseconds,
1.0 * openmm.unit.femtoseconds, # TODO: This should be wired to user-valuable
),
)

simulation.context.setPositions(topology.get_positions().to_openmm())

original_state = simulation.context.getState(energy=True)
simulation_files = [
File(f"{job_dir}/simulation_toology.pdb"),
File(f"{job_dir}/simulation_system.xml"),
File(f"{job_dir}/simulation_integrator.xml"),
File(f"{job_dir}/simulation_checkpoint.chk"),
]

with open(simulation_files[0].filepath, "w") as f:
openmm.app.PDBFile.writeFile(
topology=simulation.topology,
positions=simulation.context.getState(getPositions=True).getPositions(),
file=f,
)

with open(simulation_files[1].filepath, "w") as f:
f.write(openmm.XmlSerializer.serialize(simulation.system))

with open(simulation_files[2].filepath, "w") as f:
f.write(openmm.XmlSerializer.serialize(simulation.integrator))

simulation.saveCheckpoint(simulation_files[3].filepath)

return {
"compute_config": compute_config,
"simulation_files": tuple(simulation_files),
}


@python_app
def minimize_energy(
simulation_future: dict[str, DensityConfig | tuple[File, File, File, File]],
job_dir: str,
minimization_config: MinimizationConfig = default_minimization_config,
) -> dict[str, DensityConfig | float | tuple[File]]:
import logging

import openmm
import openmm.app
import openmm.unit

minimized_topology_file = f"{job_dir}/minimized_topology.pdb"

simulation_files: tuple[File, File, File, File] = simulation_future["simulation_files"]

simulation.minimizeEnergy()
with open(simulation_files[0].filepath) as f:
topology = openmm.app.PDBFile(f).getTopology()

# simulation.reporters.append(openmm.app.DCDReporter(outputs[0].filepath, 100))
with open(simulation_files[1].filepath) as f:
system = openmm.XmlSerializer.deserialize(f.read())

# print("Minimized energy, now running 10,000 steps of dynamics...")
# simulation.step(10_000)
with open(simulation_files[2].filepath) as f:
integrator = openmm.XmlSerializer.deserialize(f.read())

with open(simulation_files[3].filepath, "rb") as f:
simulation = openmm.app.Simulation(topology, system, integrator)
simulation.loadCheckpoint(f)

original_state = simulation.context.getState(energy=True)

simulation.minimizeEnergy(
tolerance=minimization_config["tolerance"],
maxIterations=minimization_config["max_iterations"],
)

final_state = simulation.context.getState(energy=True, positions=True)

Expand All @@ -167,7 +224,92 @@ def minimize_energy(

logging.info(f"Minimized energy from {original:.2f} to {final:.2f}")

return original, final
minimized_files: list[File] = [
File(f"{job_dir}/minimized_topology.pdb"),
File(f"{job_dir}/minimized_system.xml"),
File(f"{job_dir}/minimized_integrator.xml"),
File(f"{job_dir}/minimized_checkpoint.chk"),
]

with open(minimized_files[0].filepath, "w") as f:
openmm.app.PDBFile.writeFile(
topology=simulation.topology,
positions=simulation.context.getState(getPositions=True).getPositions(),
file=f,
)

with open(minimized_files[1].filepath, "w") as f:
f.write(openmm.XmlSerializer.serialize(simulation.system))

with open(minimized_files[2].filepath, "w") as f:
f.write(openmm.XmlSerializer.serialize(simulation.integrator))

simulation.saveCheckpoint(minimized_files[3].filepath)

return {
"compute_config": simulation_future["compute_config"],
"original_energy": original,
"final_energy": final,
"simulation_files": tuple(minimized_files),
}


@python_app
def run_equilibration(
compute_config: DensityConfig,
equilibration_config: EquilibrationConfig,
minimization_future: dict[str, DensityConfig | float | tuple[File]],
job_dir: str,
) -> dict[str, File]:
# TODO: Expose barostat (+ thermostat?) to user

minimized_files: tuple[File, File, File, File] = minimization_future["simulation_files"]

with open(minimized_files[0].filepath) as f:
topology = openmm.app.PDBFile(f).getTopology()

with open(minimized_files[1].filepath) as f:
system = openmm.XmlSerializer.deserialize(f.read())

with open(minimized_files[2].filepath) as f:
integrator = openmm.XmlSerializer.deserialize(f.read())

with open(minimized_files[3].filepath, "rb") as f:
simulation = openmm.app.Simulation(topology, system, integrator)
simulation.loadCheckpoint(f)

simulation.system.addForce(
openmm.MonteCarloBarostat(
1.0 * openmm.unit.atmosphere,
300.0 * openmm.unit.kelvin,
)
)

simulation.context.reinitialize(preserveState=True)

trajectory_file = File(f"{job_dir}/trajectory.dcd")
data_file = File(f"{job_dir}/simulation_data.log")

simulation.reporters.append(
openmm.app.StateDataReporter(
file=data_file.filepath,
reportInterval=1000,
step=True,
potentialEnergy=True,
kineticEnergy=True,
totalEnergy=True,
temperature=True,
volume=True,
density=True,
speed=True,
)
)

simulation.reporters.append(openmm.app.DCDReporter(trajectory_file.filepath, 1000))

simulation.step(equilibration_config["steps_per_iteration"])

return {"trajectory_file": trajectory_file}


@python_app
Expand Down
13 changes: 13 additions & 0 deletions dimsim/compute/equilibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import numpy
import polars
import red


def assert_equilibrated(simulation_data: polars.DataFrame) -> bool:
idx, _g, ess = red.detect_equilibration_window(
numpy.array(simulation_data["Potential Energy (kJ/mole)"]),
method="min_sse",
plot=True,
)

return (ess > 50) and (idx < len(simulation_data) * 0.5)
41 changes: 33 additions & 8 deletions dimsim/compute/workflow.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
import json
import pathlib

import parsl

from dimsim.compute.apps import (
minimize_energy,
prepare_openmm_simulation,
prepare_openmm_system,
prepare_packed_topology,
run_equilibration,
)
from dimsim.compute.jobs import get_job_paths, is_complete, make_job_id
from dimsim.configs.compute.density import DensityConfig
from dimsim.configs.compute.ensemble import Ensemble
from dimsim.configs.compute.equilibration import EquilibrationConfig


class SimulationWorkflow:
Expand All @@ -24,6 +29,11 @@ def submit(self, compute_config: DensityConfig):
job_id = make_job_id(compute_config)
job_dir = get_job_paths(self.base_dir, job_id)["root"]

pathlib.Path(job_dir).mkdir(exist_ok=True)

with open(f"{job_dir}/compute_config_{compute_config['tag']}.json", "w") as f:
f.write(json.dumps(compute_config, indent=4))

if is_complete(self.base_dir, job_id):
return None # already done, skip

Expand All @@ -37,17 +47,32 @@ def submit(self, compute_config: DensityConfig):
# 2. set up openmm system
setup_future = prepare_openmm_system(pack_future, job_dir)

# 3 (for now ...) get minimized energy
minimize_future = minimize_energy(setup_future, job_dir)

# 3. run equilibration step
# 4. run "production" step
# 3. set up openmm simulation
simulation_future = prepare_openmm_simulation(setup_future, job_dir)

# TODO: How to wire settings for individual apps into the public-facing run()?
# 4 (for now ...) get minimized energy
minimize_future = minimize_energy(simulation_future, job_dir)

equilibration_future = run_equilibration(
compute_config=compute_config,
equilibration_config=EquilibrationConfig(
ensemble=Ensemble.NVT,
steps_per_iteration=1_000_000,
step_size=1.0,
),
minimization_future=minimize_future,
job_dir=job_dir,
)

# 1. run equilibration step
# 1. run "production" step
# sim_future = run_simulation(config_future, job_dir)
# 5. analyze trajectory
# 6. check for convergence, if not converged, run more production and repeat
# 1. analyze trajectory
# 1. check for convergence, if not converged, run more production and repeat
# analysis_future = analyze_trajectory(sim_future, job_dir)

return {"job_id": job_id, "future": minimize_future}
return {"job_id": job_id, "future": equilibration_future}

def submit_batch(self, compute_configs: list[DensityConfig]):
"""Submit many jobs, skipping already-complete ones."""
Expand Down
5 changes: 4 additions & 1 deletion dimsim/configs/compute/density.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import typing

from dimsim.configs.compute.minimization import MinimizationConfig
from dimsim.configs.targets.thermo import DataEntry


Expand All @@ -12,6 +13,8 @@ class DensityConfig(typing.TypedDict):

n_molecules: int

minimization_config: MinimizationConfig

"""
Maybe add an RNG seed?
Maybe add an RNG seed? - this would be wrapped into equilibration and/or production MD runs
"""
6 changes: 6 additions & 0 deletions dimsim/configs/compute/ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from enum import Enum


class Ensemble(Enum):
NVT = "nvt"
NPT = "npt"
11 changes: 11 additions & 0 deletions dimsim/configs/compute/equilibration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import typing

from dimsim.configs.compute.ensemble import Ensemble


class EquilibrationConfig(typing.TypedDict):
ensemble: Ensemble

steps_per_iteration: int

step_size: float
15 changes: 15 additions & 0 deletions dimsim/configs/compute/minimization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import typing


class MinimizationConfig(typing.TypedDict):
tolerance: float
"""Energy tolerance, passed directly to LocalEnergyMinimizer.minimize()"""

max_iterations: int
"""Maximum number of iterations, passed directly to LocalEnergyMinimizer.minimize()"""


default_minimization_config = MinimizationConfig(
tolerance=10.0, # kJ/mol
max_iterations=0,
)
7 changes: 6 additions & 1 deletion examples/run_density.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
from dimsim.compute.configs import local_config, slurm_config
from dimsim.compute.workflow import SimulationWorkflow
from dimsim.configs.compute.density import DensityConfig
from dimsim.configs.compute.minimization import MinimizationConfig
from dimsim.datasets.thermoml import ThermoMLDataSet

dataset = ThermoMLDataSet.from_xml(open("dimsim/_tests/data/thermoml/single_density.xml").read())

job_specs = list()

for target in dataset.properties:
for extra_n_molecules in range(20):
for extra_n_molecules in range(0, 20, 10):
job_specs.append(
DensityConfig(
tag="density",
target=target,
force_field="openff-2.3.0.offxml",
n_molecules=200 + extra_n_molecules,
minimization_config=MinimizationConfig(
tolerance=0.0,
max_iterations=100,
),
)
)

Expand Down
Loading