sem1 pfme task

2023-12-09 22:11:24 +04:00 · 2023-12-09 22:11:24 +04:00 · fbf8e4dd27
commit fbf8e4dd27
3 changed files with 31755 additions and 0 deletions
--- a/sem1/PhysicalFoundationsOfMolecularElectronics/close_water_molecules.json
+++ b/sem1/PhysicalFoundationsOfMolecularElectronics/close_water_molecules.json
--- a/sem1/PhysicalFoundationsOfMolecularElectronics/find_water_molecules_that_are_close_to_protein_atoms.py
+++ b/sem1/PhysicalFoundationsOfMolecularElectronics/find_water_molecules_that_are_close_to_protein_atoms.py
@ -0,0 +1,144 @@
 # pdb spec
 # https://www.biostat.jhsph.edu/~iruczins/teaching/260.655/links/pdbformat.pdf
 from __future__ import annotations
 import json
 import logging
 from dataclasses import dataclass
 logger = logging.getLogger(__name__)
@dataclass
 class PDBEntry:
    id: str
@dataclass
 class Atom(PDBEntry):
    type = "ATOM"
    residue_name: str
    position: Point3D
    @classmethod
    def parse(cls, row: str) -> Atom:
        return cls(
            id=row[6:11].strip(),
            residue_name=row[17:20].strip(),
            position=Point3D(
                x=float(row[30:38]), y=float(row[39:46]), z=float(row[46:54])
            ),
        )
 class Hetatm(Atom):
    type = "HETATM"
@dataclass
 class Point3D:
    x: float
    y: float
    z: float
 class CloseWaterMoleculesFinder:
    def __init__(
        self, protein_pdb_file_path: str, maximum_distance_from_protein: float
    ):
        self.protein_pdb_file_path = protein_pdb_file_path
        self.maximum_distance_from_protein = maximum_distance_from_protein
    def find_close_water_molecules(self) -> list[Atom]:
        pdb_entries = self._read_protein_pdb()
        protein_atoms: list[Atom] = self._get_protein_atoms(pdb_entries=pdb_entries)
        water_molecules: list[Atom] = self._get_water_molecules(pdb_entries=pdb_entries)
        logger.info(f"Total amount of pdb entries: '{len(pdb_entries)}'")
        logger.info(f"Amount of protein's atoms: '{len(protein_atoms)}'")
        logger.info(f"Amount of water molecules: '{len(water_molecules)}'")
        close_water_molecules: list[Atom] = self._get_close_water_molecules(
            protein_atoms=protein_atoms, water_molecules=water_molecules
        )
        logger.info(
            f"There are '{len(close_water_molecules)}' water molecules "
            f"that are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms"
        )
        return close_water_molecules
    def _get_close_water_molecules(
        self, protein_atoms: list[Atom], water_molecules: list[Atom]
    ) -> list[Atom]:
        close_water_molecules: list[Atom] = []
        for water_molecule in water_molecules:
            for protein_atom in protein_atoms:
                if (
                    self._calc_distance_square(
                        water_molecule.position, protein_atom.position
                    )
                    <= self.maximum_distance_from_protein**2
                ):
                    close_water_molecules.append(water_molecule)
                    break
        return close_water_molecules
    @staticmethod
    def _get_water_molecules(pdb_entries: list[PDBEntry]) -> list[Atom]:
        water_molecules: list[Atom] = []
        for entry in pdb_entries:
            if not (isinstance(entry, Atom) or isinstance(entry, Hetatm)):
                continue
            if entry.residue_name == "HOH":
                water_molecules.append(entry)
        return water_molecules
    @staticmethod
    def _get_protein_atoms(pdb_entries: list[PDBEntry]) -> list[Atom]:
        protein_atoms: list[Atom] = []
        for entry in pdb_entries:
            if not (isinstance(entry, Atom) or isinstance(entry, Hetatm)):
                break
            if entry.residue_name == "BNZ":
                break
            protein_atoms.append(entry)
        return protein_atoms
    def _read_protein_pdb(self) -> list[PDBEntry]:
        with open(self.protein_pdb_file_path) as f:
            data = f.read()
        pdb_entries: list[PDBEntry] = []
        for row in data.split("\n"):
            if row.startswith(Atom.type):
                pdb_entries.append(Atom.parse(row))
            elif row.startswith(Hetatm.type):
                pdb_entries.append(Hetatm.parse(row))
        return pdb_entries
    @staticmethod
    def _calc_distance_square(p1: Point3D, p2: Point3D) -> float:
        return (p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2
 def main() -> None:
    logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s")
    close_water_molecules = CloseWaterMoleculesFinder(
        protein_pdb_file_path="prot_ligand.pdb", maximum_distance_from_protein=5.0
    ).find_close_water_molecules()
    result = json.dumps([water_molecule.id for water_molecule in close_water_molecules])
    result_fpath = "close_water_molecules.json"
    logger.info(f"Saving IDs of a close water molecules to '{result_fpath}'")
    with open(result_fpath, "w") as f:
        f.write(result)
 if __name__ == "__main__":
    main()
--- a/sem1/PhysicalFoundationsOfMolecularElectronics/prot_ligand.pdb
+++ b/sem1/PhysicalFoundationsOfMolecularElectronics/prot_ligand.pdb