Compare commits

...

2 Commits

4 changed files with 28995 additions and 17 deletions

View File

@ -4,6 +4,7 @@
from __future__ import annotations from __future__ import annotations
import json import json
import sys
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass
@ -45,14 +46,20 @@ class Point3D:
z: float z: float
class CloseWaterMoleculesFinder: @dataclass
class WaterMoleculesCategorizedByDistance:
close_water_molecules: list[Atom]
far_water_molecules: list[Atom]
class MoleculeCategorizer:
def __init__( def __init__(
self, protein_pdb_file_path: str, maximum_distance_from_protein: float self, protein_pdb_file_path: str, maximum_distance_from_protein: float
): ):
self.protein_pdb_file_path = protein_pdb_file_path self.protein_pdb_file_path = protein_pdb_file_path
self.maximum_distance_from_protein = maximum_distance_from_protein self.maximum_distance_from_protein = maximum_distance_from_protein
def find_close_water_molecules(self) -> list[Atom]: def categorize_water_molecules(self) -> WaterMoleculesCategorizedByDistance:
pdb_entries = self._read_protein_pdb() pdb_entries = self._read_protein_pdb()
protein_atoms: list[Atom] = self._get_protein_atoms(pdb_entries=pdb_entries) protein_atoms: list[Atom] = self._get_protein_atoms(pdb_entries=pdb_entries)
water_molecules: list[Atom] = self._get_water_molecules(pdb_entries=pdb_entries) water_molecules: list[Atom] = self._get_water_molecules(pdb_entries=pdb_entries)
@ -64,12 +71,19 @@ class CloseWaterMoleculesFinder:
close_water_molecules: list[Atom] = self._get_close_water_molecules( close_water_molecules: list[Atom] = self._get_close_water_molecules(
protein_atoms=protein_atoms, water_molecules=water_molecules protein_atoms=protein_atoms, water_molecules=water_molecules
) )
far_water_molecules: list[Atom] = self._get_far_water_molecules(
water_molecules=water_molecules, close_water_molecules=close_water_molecules
)
logger.info( logger.info(
f"There are '{len(close_water_molecules)}' water molecules " f"There are '{len(water_molecules)}' total water molecules, from which '{len(close_water_molecules)}' "
f"that are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms" f"are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms "
f"and '{len(far_water_molecules)}' that are further"
)
return WaterMoleculesCategorizedByDistance(
close_water_molecules=close_water_molecules,
far_water_molecules=far_water_molecules,
) )
return close_water_molecules
def _get_close_water_molecules( def _get_close_water_molecules(
self, protein_atoms: list[Atom], water_molecules: list[Atom] self, protein_atoms: list[Atom], water_molecules: list[Atom]
@ -87,6 +101,19 @@ class CloseWaterMoleculesFinder:
break break
return close_water_molecules return close_water_molecules
@staticmethod
def _get_far_water_molecules(
water_molecules: list[Atom], close_water_molecules: list[Atom]
) -> list[Atom]:
close_water_molecule_ids = {
close_water_molecule.id for close_water_molecule in close_water_molecules
}
return [
water_molecule
for water_molecule in water_molecules
if water_molecule.id not in close_water_molecule_ids
]
@staticmethod @staticmethod
def _get_water_molecules(pdb_entries: list[PDBEntry]) -> list[Atom]: def _get_water_molecules(pdb_entries: list[PDBEntry]) -> list[Atom]:
water_molecules: list[Atom] = [] water_molecules: list[Atom] = []
@ -127,18 +154,36 @@ class CloseWaterMoleculesFinder:
return (p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2 return (p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2
def main() -> None: def save_atom_ids_to_file(atoms: list[Atom], filename: str) -> None:
logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s") atom_ids = {atom.id for atom in atoms}
with open(filename, "w") as f:
f.write(json.dumps(list(atom_ids)))
close_water_molecules = CloseWaterMoleculesFinder(
protein_pdb_file_path="prot_ligand.pdb", maximum_distance_from_protein=5.0 def main(pdb_filename: str) -> None:
).find_close_water_molecules() logging.basicConfig(
result = json.dumps([water_molecule.id for water_molecule in close_water_molecules]) level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s"
result_fpath = "close_water_molecules.json" )
logger.info(f"Saving IDs of a close water molecules to '{result_fpath}'")
with open(result_fpath, "w") as f: result = MoleculeCategorizer(
f.write(result) protein_pdb_file_path=pdb_filename, maximum_distance_from_protein=5.0
).categorize_water_molecules()
logger.info("Saving IDs of a close water molecules to 'close_water_molecules.json'")
save_atom_ids_to_file(
atoms=result.close_water_molecules, filename="close_water_molecules.json"
)
logger.info("Saving IDs of a far water molecules to 'far_water_molecules.json'")
save_atom_ids_to_file(
atoms=result.far_water_molecules, filename="far_water_molecules.json"
)
if __name__ == "__main__": if __name__ == "__main__":
main() if len(sys.argv) <= 1:
pdb_filename = "prot_ligand.pdb"
else:
pdb_filename = sys.argv[1]
main(pdb_filename=pdb_filename)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff