molecule categorizer: far water molecules are also saved + unit test + refactoring

This commit is contained in:
KoroLion 2024-05-19 17:38:07 +04:00
parent fbf8e4dd27
commit 6239879802
4 changed files with 4356 additions and 17 deletions

View File

@ -4,6 +4,7 @@
from __future__ import annotations
import json
import sys
import logging
from dataclasses import dataclass
@ -45,14 +46,20 @@ class Point3D:
z: float
class CloseWaterMoleculesFinder:
@dataclass
class WaterMoleculesCategorizedByDistance:
close_water_molecules: list[Atom]
far_water_molecules: list[Atom]
class MoleculeCategorizer:
def __init__(
self, protein_pdb_file_path: str, maximum_distance_from_protein: float
):
self.protein_pdb_file_path = protein_pdb_file_path
self.maximum_distance_from_protein = maximum_distance_from_protein
def find_close_water_molecules(self) -> list[Atom]:
def categorize_water_molecules(self) -> WaterMoleculesCategorizedByDistance:
pdb_entries = self._read_protein_pdb()
protein_atoms: list[Atom] = self._get_protein_atoms(pdb_entries=pdb_entries)
water_molecules: list[Atom] = self._get_water_molecules(pdb_entries=pdb_entries)
@ -64,12 +71,19 @@ class CloseWaterMoleculesFinder:
close_water_molecules: list[Atom] = self._get_close_water_molecules(
protein_atoms=protein_atoms, water_molecules=water_molecules
)
far_water_molecules: list[Atom] = self._get_far_water_molecules(
water_molecules=water_molecules, close_water_molecules=close_water_molecules
)
logger.info(
f"There are '{len(close_water_molecules)}' water molecules "
f"that are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms"
f"There are '{len(water_molecules)}' total water molecules, from which '{len(close_water_molecules)}' "
f"are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms "
f"and '{len(far_water_molecules)}' that are further"
)
return WaterMoleculesCategorizedByDistance(
close_water_molecules=close_water_molecules,
far_water_molecules=far_water_molecules,
)
return close_water_molecules
def _get_close_water_molecules(
self, protein_atoms: list[Atom], water_molecules: list[Atom]
@ -87,6 +101,19 @@ class CloseWaterMoleculesFinder:
break
return close_water_molecules
@staticmethod
def _get_far_water_molecules(
water_molecules: list[Atom], close_water_molecules: list[Atom]
) -> list[Atom]:
close_water_molecule_ids = {
close_water_molecule.id for close_water_molecule in close_water_molecules
}
return [
water_molecule
for water_molecule in water_molecules
if water_molecule.id not in close_water_molecule_ids
]
@staticmethod
def _get_water_molecules(pdb_entries: list[PDBEntry]) -> list[Atom]:
water_molecules: list[Atom] = []
@ -127,18 +154,36 @@ class CloseWaterMoleculesFinder:
return (p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2
def main() -> None:
logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s")
def save_atom_ids_to_file(atoms: list[Atom], filename: str) -> None:
atom_ids = {atom.id for atom in atoms}
with open(filename, "w") as f:
f.write(json.dumps(list(atom_ids)))
close_water_molecules = CloseWaterMoleculesFinder(
protein_pdb_file_path="prot_ligand.pdb", maximum_distance_from_protein=5.0
).find_close_water_molecules()
result = json.dumps([water_molecule.id for water_molecule in close_water_molecules])
result_fpath = "close_water_molecules.json"
logger.info(f"Saving IDs of a close water molecules to '{result_fpath}'")
with open(result_fpath, "w") as f:
f.write(result)
def main(pdb_filename: str) -> None:
logging.basicConfig(
level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s"
)
result = MoleculeCategorizer(
protein_pdb_file_path=pdb_filename, maximum_distance_from_protein=5.0
).categorize_water_molecules()
logger.info("Saving IDs of a close water molecules to 'close_water_molecules.json'")
save_atom_ids_to_file(
atoms=result.close_water_molecules, filename="close_water_molecules.json"
)
logger.info("Saving IDs of a far water molecules to 'far_water_molecules.json'")
save_atom_ids_to_file(
atoms=result.close_water_molecules, filename="far_water_molecules.json"
)
if __name__ == "__main__":
main()
if len(sys.argv) <= 1:
pdb_filename = "prot_ligand.pdb"
else:
pdb_filename = sys.argv[1]
main(pdb_filename=pdb_filename)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff