sem1 pfme task
This commit is contained in:
commit
fbf8e4dd27
File diff suppressed because one or more lines are too long
|
@ -0,0 +1,144 @@
|
|||
# pdb spec
|
||||
# https://www.biostat.jhsph.edu/~iruczins/teaching/260.655/links/pdbformat.pdf
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class PDBEntry:
|
||||
id: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class Atom(PDBEntry):
|
||||
type = "ATOM"
|
||||
|
||||
residue_name: str
|
||||
|
||||
position: Point3D
|
||||
|
||||
@classmethod
|
||||
def parse(cls, row: str) -> Atom:
|
||||
return cls(
|
||||
id=row[6:11].strip(),
|
||||
residue_name=row[17:20].strip(),
|
||||
position=Point3D(
|
||||
x=float(row[30:38]), y=float(row[39:46]), z=float(row[46:54])
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class Hetatm(Atom):
|
||||
type = "HETATM"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Point3D:
|
||||
x: float
|
||||
y: float
|
||||
z: float
|
||||
|
||||
|
||||
class CloseWaterMoleculesFinder:
|
||||
def __init__(
|
||||
self, protein_pdb_file_path: str, maximum_distance_from_protein: float
|
||||
):
|
||||
self.protein_pdb_file_path = protein_pdb_file_path
|
||||
self.maximum_distance_from_protein = maximum_distance_from_protein
|
||||
|
||||
def find_close_water_molecules(self) -> list[Atom]:
|
||||
pdb_entries = self._read_protein_pdb()
|
||||
protein_atoms: list[Atom] = self._get_protein_atoms(pdb_entries=pdb_entries)
|
||||
water_molecules: list[Atom] = self._get_water_molecules(pdb_entries=pdb_entries)
|
||||
|
||||
logger.info(f"Total amount of pdb entries: '{len(pdb_entries)}'")
|
||||
logger.info(f"Amount of protein's atoms: '{len(protein_atoms)}'")
|
||||
logger.info(f"Amount of water molecules: '{len(water_molecules)}'")
|
||||
|
||||
close_water_molecules: list[Atom] = self._get_close_water_molecules(
|
||||
protein_atoms=protein_atoms, water_molecules=water_molecules
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"There are '{len(close_water_molecules)}' water molecules "
|
||||
f"that are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms"
|
||||
)
|
||||
return close_water_molecules
|
||||
|
||||
def _get_close_water_molecules(
|
||||
self, protein_atoms: list[Atom], water_molecules: list[Atom]
|
||||
) -> list[Atom]:
|
||||
close_water_molecules: list[Atom] = []
|
||||
for water_molecule in water_molecules:
|
||||
for protein_atom in protein_atoms:
|
||||
if (
|
||||
self._calc_distance_square(
|
||||
water_molecule.position, protein_atom.position
|
||||
)
|
||||
<= self.maximum_distance_from_protein**2
|
||||
):
|
||||
close_water_molecules.append(water_molecule)
|
||||
break
|
||||
return close_water_molecules
|
||||
|
||||
@staticmethod
|
||||
def _get_water_molecules(pdb_entries: list[PDBEntry]) -> list[Atom]:
|
||||
water_molecules: list[Atom] = []
|
||||
for entry in pdb_entries:
|
||||
if not (isinstance(entry, Atom) or isinstance(entry, Hetatm)):
|
||||
continue
|
||||
if entry.residue_name == "HOH":
|
||||
water_molecules.append(entry)
|
||||
return water_molecules
|
||||
|
||||
@staticmethod
|
||||
def _get_protein_atoms(pdb_entries: list[PDBEntry]) -> list[Atom]:
|
||||
protein_atoms: list[Atom] = []
|
||||
for entry in pdb_entries:
|
||||
if not (isinstance(entry, Atom) or isinstance(entry, Hetatm)):
|
||||
break
|
||||
|
||||
if entry.residue_name == "BNZ":
|
||||
break
|
||||
|
||||
protein_atoms.append(entry)
|
||||
return protein_atoms
|
||||
|
||||
def _read_protein_pdb(self) -> list[PDBEntry]:
|
||||
with open(self.protein_pdb_file_path) as f:
|
||||
data = f.read()
|
||||
|
||||
pdb_entries: list[PDBEntry] = []
|
||||
for row in data.split("\n"):
|
||||
if row.startswith(Atom.type):
|
||||
pdb_entries.append(Atom.parse(row))
|
||||
elif row.startswith(Hetatm.type):
|
||||
pdb_entries.append(Hetatm.parse(row))
|
||||
return pdb_entries
|
||||
|
||||
@staticmethod
|
||||
def _calc_distance_square(p1: Point3D, p2: Point3D) -> float:
|
||||
return (p2.x - p1.x) ** 2 + (p2.y - p1.y) ** 2 + (p2.z - p1.z) ** 2
|
||||
|
||||
|
||||
def main() -> None:
|
||||
logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s")
|
||||
|
||||
close_water_molecules = CloseWaterMoleculesFinder(
|
||||
protein_pdb_file_path="prot_ligand.pdb", maximum_distance_from_protein=5.0
|
||||
).find_close_water_molecules()
|
||||
result = json.dumps([water_molecule.id for water_molecule in close_water_molecules])
|
||||
result_fpath = "close_water_molecules.json"
|
||||
logger.info(f"Saving IDs of a close water molecules to '{result_fpath}'")
|
||||
with open(result_fpath, "w") as f:
|
||||
f.write(result)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
31610
sem1/PhysicalFoundationsOfMolecularElectronics/prot_ligand.pdb
Normal file
31610
sem1/PhysicalFoundationsOfMolecularElectronics/prot_ligand.pdb
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user