from pathlib import Path
import sys
import numpy as np
from typing import Optional, Any, Sequence
from .basic import nn, nl
from veloxchem.outputstream import OutputStream
from veloxchem.veloxchemlib import mpi_master
import mpi4py.MPI as MPI
from veloxchem.errorhandler import assert_msg_critical
[docs]
class GroWriter:
"""
Write atomic structure data to GROMACS .gro coordinate files.
Handles formatting, unit conversion, residue management, and MPI-based output stream selection.
Supports both orthorhombic and triclinic box specification.
Attributes:
comm (MPI.Comm): MPI communicator.
rank (int): MPI process rank.
nodes (int): Number of MPI processes.
ostream (OutputStream): Output stream for printing/logging.
filepath (Optional[str]): Path to the target .gro file (default None, set by user).
_debug (bool): Toggle debug printouts.
file_dir (Optional[Path]): Directory containing the output file (set during write).
Methods:
write: Write data to a .gro file. Handles formatting and box conversion.
"""
def __init__(
self,
comm: Optional[Any] = None,
ostream: Optional[OutputStream] = None,
filepath: Optional[str] = None,
debug: bool = False,
) -> None:
"""
Initialize a GroWriter instance.
Args:
comm (Optional[Any]): MPI communicator. Defaults to MPI.COMM_WORLD.
ostream (Optional[OutputStream]): Output stream for logging. Defaults to rank-aware OutputStream.
filepath (Optional[str]): Target file path for writing. Can be set per-write.
debug (bool): Toggle debug output. Default is False.
"""
if comm is None:
comm = MPI.COMM_WORLD
if ostream is None:
if comm.Get_rank() == mpi_master():
ostream = OutputStream(sys.stdout)
else:
ostream = OutputStream(None)
self.comm: Any = comm
self.rank: int = self.comm.Get_rank()
self.nodes: int = self.comm.Get_size()
self.ostream: OutputStream = ostream
self.filepath: Optional[str] = filepath
self._debug: bool = debug
self.file_dir: Optional[Path] = None
[docs]
def write(
self,
filepath: Optional[str] = None,
header: str = "Generated by MofBuilder\n",
lines: Sequence[Sequence[Any]] = [],
box: Optional[Sequence[float]] = None,
triclinic: bool = False,
) -> None:
"""
Write atomic coordinates and box information to a .gro coordinate file.
The input 'lines' should be a list of lists/arrays, one per atom, following this order:
[atom_type, atom_label, atom_number, residue_name, residue_number, x, y, z, spin, charge, note]
Args:
filepath (Optional[str]): Output .gro file path (overrides instance attribute, required if not set).
header (str): Header string to write at the top of the .gro file. Defaults to "Generated by MofBuilder".
lines (Sequence[Sequence[Any]]): List of atom data lines.
box (Optional[Sequence[float]]): Box vector or box+extra terms for triclinic (see Note). Units: angstroms.
triclinic (bool): If True, expects box to have 6 elements.
Raises:
AssertionError: If filepath is not provided.
AssertionError: If triclinic=True and box does not have 6 elements.
Note:
Positions (x, y, z) are converted from angstroms to nanometers.
Box lengths are also written in nanometers.
Triclinic box format: [lx, ly, lz, xy, xz, yz].
Example:
>>> writer = GroWriter()
>>> writer.write("output.gro", lines=atom_list, box=[30.0, 30.0, 30.0])
"""
filepath_resolved = Path(filepath) if filepath is not None else Path(self.filepath)
assert_msg_critical(
filepath_resolved is not None, "gro filepath is not specified"
)
# Check if the file directory exists and create it if it doesn't
self.file_dir = filepath_resolved.parent
if self._debug:
self.ostream.print_info(f"targeting directory: {self.file_dir}")
self.file_dir.mkdir(parents=True, exist_ok=True)
if filepath_resolved.suffix != ".gro":
filepath_resolved = filepath_resolved.with_suffix(".gro")
newgro = []
newgro.append(header)
newgro.append(str(len(lines)) + "\n")
last_name = ""
last_residue_number = 0
residue_count = 0
# Collect coordinates and compute center and box for recentering
coords = np.vstack((lines))[:, 5:8].astype(float)
center = np.mean(coords, axis=0)
x_min, y_min, z_min = np.min(coords, axis=0)
x_max, y_max, z_max = np.max(coords, axis=0)
box_size = np.array([x_max - x_min, y_max - y_min, z_max - z_min])
translation = box_size / 2 - center[:3]
# If original box size is big enough, use it instead
if box is not None:
if (box[0] >= box_size[0]) and (box[1] >= box_size[1]) and (box[2] >= box_size[2]):
box_size = np.array(box)
if triclinic:
assert_msg_critical(
box is not None and len(box) == 6, "Box should have 6 values for triclinic box"
)
with open(filepath_resolved, "w") as fp:
# Iterate over each line (atom) and format as GROMACS expects
for i, values in enumerate(lines):
if values[3] != last_name or values[4] != last_residue_number:
residue_count += 1
last_name = values[3]
last_residue_number = values[4]
j = 0
atom_type = values[0]
atom_label = values[1]
atom_number = i + 1
residue_name = values[3].split("_")[0][:3]
residue_number = residue_count
x = (float(values[5]) + translation[0]) / 10 # Angstroms to nm
y = (float(values[6]) + translation[1]) / 10
z = (float(values[7]) + translation[2]) / 10
spin = values[8]
charge = values[9]
note = values[10]
formatted_line = "%5d%-5s%5s%5s%8.3f%8.3f%8.3f" % (
residue_number,
residue_name[:5],
(nn(atom_label[:5]) + str(j + 1))[:5],
str(atom_number)[-5:],
x,
y,
z,
)
j += 1
newgro.append(formatted_line + "\n")
if triclinic and box is not None:
tail = (
f"{float(box[0]) / 10:.6f} {float(box[1]) / 10:.6f} {float(box[2]) / 10:.6f} "
f"{float(box[3]):.2f} {float(box[4]):.2f} {float(box[5]):.2f}\n"
)
else:
tail = (
f"{float(box_size[0]) / 10:.6f} {float(box_size[1]) / 10:.6f} {float(box_size[2]) / 10:.6f}\n"
)
newgro.append(tail)
fp.writelines(newgro)