molecule categorizer: far water molecules are also saved + unit test + refactoring
This commit is contained in:
parent
fbf8e4dd27
commit
6239879802
|
@ -4,6 +4,7 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import sys
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
@ -45,14 +46,20 @@ class Point3D:
|
|||
z: float
|
||||
|
||||
|
||||
class CloseWaterMoleculesFinder:
|
||||
@dataclass
|
||||
class WaterMoleculesCategorizedByDistance:
|
||||
close_water_molecules: list[Atom]
|
||||
far_water_molecules: list[Atom]
|
||||
|
||||
|
||||
class MoleculeCategorizer:
|
||||
def __init__(
|
||||
self, protein_pdb_file_path: str, maximum_distance_from_protein: float
|
||||
):
|
||||
self.protein_pdb_file_path = protein_pdb_file_path
|
||||
self.maximum_distance_from_protein = maximum_distance_from_protein
|
||||
|
||||
def find_close_water_molecules(self) -> list[Atom]:
|
||||
def categorize_water_molecules(self) -> WaterMoleculesCategorizedByDistance:
|
||||
pdb_entries = self._read_protein_pdb()
|
||||
protein_atoms: list[Atom] = self._get_protein_atoms(pdb_entries=pdb_entries)
|
||||
water_molecules: list[Atom] = self._get_water_molecules(pdb_entries=pdb_entries)
|
||||
|
@ -64,12 +71,19 @@ class CloseWaterMoleculesFinder:
|
|||
close_water_molecules: list[Atom] = self._get_close_water_molecules(
|
||||
protein_atoms=protein_atoms, water_molecules=water_molecules
|
||||
)
|
||||
far_water_molecules: list[Atom] = self._get_far_water_molecules(
|
||||
water_molecules=water_molecules, close_water_molecules=close_water_molecules
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"There are '{len(close_water_molecules)}' water molecules "
|
||||
f"that are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms"
|
||||
f"There are '{len(water_molecules)}' total water molecules, from which '{len(close_water_molecules)}' "
|
||||
f"are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms "
|
||||
f"and '{len(far_water_molecules)}' that are further"
|
||||
)
|
||||
return WaterMoleculesCategorizedByDistance(
|
||||
close_water_molecules=close_water_molecules,
|
||||
far_water_molecules=far_water_molecules,
|
||||
)
|
||||
return close_water_molecules
|
||||
|
||||
def _get_close_water_molecules(
|
||||
self, protein_atoms: list[Atom], water_molecules: list[Atom]
|
||||
|
@ -87,6 +101,19 @@ class CloseWaterMoleculesFinder:
|
|||
break
|
||||
return close_water_molecules
|
||||
|
||||
@staticmethod
|
||||
def _get_far_water_molecules(
|
||||
water_molecules: list[Atom], close_water_molecules: list[Atom]
|
||||
) -> list[Atom]:
|
||||
close_water_molecule_ids = {
|
||||
close_water_molecule.id for close_water_molecule in close_water_molecules
|
||||
}
|
||||
return [
|
||||
water_molecule
|
||||
for water_molecule in water_molecules
|
||||
if water_molecule.id not in close_water_molecule_ids
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _get_water_molecules(pdb_entries: list[PDBEntry]) -> list[Atom]:
|
||||
water_molecules: list[Atom] = []
|
||||
|
@ -127,18 +154,36 @@ class CloseWaterMoleculesFinder:
|
|||
return (p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2
|
||||
|
||||
|
||||
def main() -> None:
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s")
|
||||
def save_atom_ids_to_file(atoms: list[Atom], filename: str) -> None:
|
||||
atom_ids = {atom.id for atom in atoms}
|
||||
with open(filename, "w") as f:
|
||||
f.write(json.dumps(list(atom_ids)))
|
||||
|
||||
close_water_molecules = CloseWaterMoleculesFinder(
|
||||
protein_pdb_file_path="prot_ligand.pdb", maximum_distance_from_protein=5.0
|
||||
).find_close_water_molecules()
|
||||
result = json.dumps([water_molecule.id for water_molecule in close_water_molecules])
|
||||
result_fpath = "close_water_molecules.json"
|
||||
logger.info(f"Saving IDs of a close water molecules to '{result_fpath}'")
|
||||
with open(result_fpath, "w") as f:
|
||||
f.write(result)
|
||||
|
||||
def main(pdb_filename: str) -> None:
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s"
|
||||
)
|
||||
|
||||
result = MoleculeCategorizer(
|
||||
protein_pdb_file_path=pdb_filename, maximum_distance_from_protein=5.0
|
||||
).categorize_water_molecules()
|
||||
|
||||
logger.info("Saving IDs of a close water molecules to 'close_water_molecules.json'")
|
||||
save_atom_ids_to_file(
|
||||
atoms=result.close_water_molecules, filename="close_water_molecules.json"
|
||||
)
|
||||
|
||||
logger.info("Saving IDs of a far water molecules to 'far_water_molecules.json'")
|
||||
save_atom_ids_to_file(
|
||||
atoms=result.close_water_molecules, filename="far_water_molecules.json"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
if len(sys.argv) <= 1:
|
||||
pdb_filename = "prot_ligand.pdb"
|
||||
else:
|
||||
pdb_filename = sys.argv[1]
|
||||
|
||||
main(pdb_filename=pdb_filename)
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user