from pathlib import Path
import sys
from typing import Optional, Any, Sequence
from veloxchem.outputstream import OutputStream
from veloxchem.veloxchemlib import mpi_master
import mpi4py.MPI as MPI
from veloxchem.errorhandler import assert_msg_critical
from .basic import nn
[docs]
class PdbWriter:
"""
Writer for PDB format structure files. Handles MPI-aware write operations for PDB output.
Attributes:
comm (Any): MPI communicator.
rank (int): MPI process rank.
nodes (int): Number of MPI processes.
ostream (OutputStream): Output stream for printing/logging.
filepath (Optional[str or Path]): Path to the PDB file for writing.
_debug (bool): Flag for debug printout.
file_dir (Optional[Path]): Output file directory, set during write.
Methods:
write(filepath, header, lines): Write molecular structure to PDB file.
"""
def __init__(
self,
comm: Optional[Any] = None,
ostream: Optional[OutputStream] = None,
filepath: Optional[str] = None,
debug: bool = False
):
"""
Initializes a PdbWriter instance.
Args:
comm (Optional[Any]): MPI communicator. Defaults to MPI.COMM_WORLD.
ostream (Optional[OutputStream]): Output stream for logging/info. Auto-selects based on rank if None.
filepath (Optional[str]): Default file path for writing PDBs.
debug (bool): Enable debug logging if True.
"""
if comm is None:
comm = MPI.COMM_WORLD
if ostream is None:
if comm.Get_rank() == mpi_master():
ostream = OutputStream(sys.stdout)
else:
ostream = OutputStream(None)
self.comm = comm
self.rank = self.comm.Get_rank()
self.nodes = self.comm.Get_size()
self.ostream = ostream
self.filepath = filepath
self._debug = debug
self.file_dir: Optional[Path] = None
[docs]
def write(
self,
filepath: Optional[str] = None,
header: str = "",
lines: Sequence[Sequence[Any]] = ()
) -> None:
"""
Write a molecular structure to a PDB file.
Args:
filepath (Optional[str]): Output PDB file path. Uses self.filepath if None.
header (str): Optional header line to add at the top of the PDB file.
lines (Sequence[Sequence[Any]]): Sequence of atom record lists.
Each line should have:
[atom_type, atom_label, atom_number, residue_name, residue_number, x, y, z, spin, charge, note]
Raises:
AssertionError: If the output file path is not specified.
Example:
writer.write(
filepath="output.pdb",
header="MODEL GENERATED BY MOFbuilder",
lines=[
["ATOM", "C", 1, "BENZENE", 1, 0.0, 0.0, 0.0, 1.0, 0.0, "C1"],
...
]
)
"""
filepath = Path(filepath) if filepath is not None else Path(self.filepath)
assert_msg_critical(filepath is not None, "pdb filepath is not specified")
# check if the file directory exists and create it if it doesn't
self.file_dir = filepath.parent
if self._debug:
self.ostream.print_info(f"targeting directory: {self.file_dir}")
self.file_dir.mkdir(parents=True, exist_ok=True)
if filepath.suffix != ".pdb":
filepath = filepath.with_suffix(".pdb")
newpdb = []
newpdb.append(header)
last_name = ""
last_residue_number = 0
residue_count = 0
with open(filepath, "w") as fp:
# Each 'line' is a list: [atom_type, atom_label, atom_number, residue_name, residue_number, x, y, z, spin, charge, note]
for i in range(len(lines)):
values = lines[i]
# Update residue number if residue name or residue number changes
if values[3] != last_name or values[4] != last_residue_number:
residue_count += 1
last_name = values[3]
last_residue_number = values[4]
j = 0
atom_type = values[0]
atom_label = values[1] + str(j + 1)
atom_number = i + 1
residue_name = values[3].split('_')[0]
residue_number = residue_count
x = values[5]
y = values[6]
z = values[7]
spin = values[8]
charge = values[9]
note = values[10].split('_')[0]
j += 1
# Format the values using the specified format string
formatted_line = (
"%-6s%5d %-3s%1s%3s %1s%4d%1s "
"%8.3f%8.3f%8.3f%6.2f%6.2f %2s"
) % (
"ATOM", # 1-6
int(atom_number), # 7-11
atom_label[:3], # 13-15 (Atom name, up to 3 chars)
" ", # 16 (AltLoc - SPACE)
residue_name[:3], # 18-20 (Residue Name)
"A", # 22 (Chain ID)
int(residue_number), # 23-26 (Residue Seq)
" ", # 27 (iCode)
float(x), float(y), float(z),
1.00, # Occupancy
0.00, # Temp. factor
nn(note) # 77-78 (Element symbol)
)
newpdb.append(formatted_line + "\n")
fp.writelines(newpdb)