Compare commits
1 Commits
e935617894
...
cae5a1bee8
Author | SHA1 | Date | |
---|---|---|---|
|
cae5a1bee8 |
|
@ -24,6 +24,8 @@ class Atom(PDBEntry):
|
||||||
|
|
||||||
position: Point3D
|
position: Point3D
|
||||||
|
|
||||||
|
_row: str
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def parse(cls, row: str) -> Atom:
|
def parse(cls, row: str) -> Atom:
|
||||||
return cls(
|
return cls(
|
||||||
|
@ -32,8 +34,12 @@ class Atom(PDBEntry):
|
||||||
position=Point3D(
|
position=Point3D(
|
||||||
x=float(row[30:38]), y=float(row[39:46]), z=float(row[46:54])
|
x=float(row[30:38]), y=float(row[39:46]), z=float(row[46:54])
|
||||||
),
|
),
|
||||||
|
_row=row,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
return self._row
|
||||||
|
|
||||||
|
|
||||||
class Hetatm(Atom):
|
class Hetatm(Atom):
|
||||||
type = "HETATM"
|
type = "HETATM"
|
||||||
|
@ -47,82 +53,84 @@ class Point3D:
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class WaterMoleculesCategorizedByDistance:
|
class WaterAtomsCategorizedByDistance:
|
||||||
close_water_molecules: list[Atom]
|
protein_atoms: list[Atom]
|
||||||
far_water_molecules: list[Atom]
|
close_water_atoms: list[Atom]
|
||||||
|
far_water_atoms: list[Atom]
|
||||||
|
|
||||||
|
|
||||||
class MoleculeCategorizer:
|
class AtomCategorizer:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, protein_pdb_file_path: str, maximum_distance_from_protein: float
|
self, protein_pdb_file_path: str, maximum_distance_from_protein: float
|
||||||
):
|
):
|
||||||
self.protein_pdb_file_path = protein_pdb_file_path
|
self.protein_pdb_file_path = protein_pdb_file_path
|
||||||
self.maximum_distance_from_protein = maximum_distance_from_protein
|
self.maximum_distance_from_protein = maximum_distance_from_protein
|
||||||
|
|
||||||
def categorize_water_molecules(self) -> WaterMoleculesCategorizedByDistance:
|
def categorize_water_atoms(self) -> WaterAtomsCategorizedByDistance:
|
||||||
pdb_entries = self._read_protein_pdb()
|
pdb_entries = self._read_protein_pdb()
|
||||||
protein_atoms: list[Atom] = self._get_protein_atoms(pdb_entries=pdb_entries)
|
protein_atoms: list[Atom] = self._get_protein_atoms(pdb_entries=pdb_entries)
|
||||||
water_molecules: list[Atom] = self._get_water_molecules(pdb_entries=pdb_entries)
|
water_atoms: list[Atom] = self._get_water_atoms(pdb_entries=pdb_entries)
|
||||||
|
|
||||||
logger.info(f"Total amount of pdb entries: '{len(pdb_entries)}'")
|
logger.info(f"Total amount of pdb entries: '{len(pdb_entries)}'")
|
||||||
logger.info(f"Amount of protein's atoms: '{len(protein_atoms)}'")
|
logger.info(f"Amount of protein's atoms: '{len(protein_atoms)}'")
|
||||||
logger.info(f"Amount of water molecules: '{len(water_molecules)}'")
|
logger.info(f"Amount of water atom: '{len(water_atoms)}'")
|
||||||
|
|
||||||
close_water_molecules: list[Atom] = self._get_close_water_molecules(
|
close_water_atoms: list[Atom] = self._get_close_water_atoms(
|
||||||
protein_atoms=protein_atoms, water_molecules=water_molecules
|
protein_atoms=protein_atoms, water_atoms=water_atoms
|
||||||
)
|
)
|
||||||
far_water_molecules: list[Atom] = self._get_far_water_molecules(
|
far_water_atoms: list[Atom] = self._get_far_water_atoms(
|
||||||
water_molecules=water_molecules, close_water_molecules=close_water_molecules
|
water_atoms=water_atoms, close_water_atoms=close_water_atoms
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"There are '{len(water_molecules)}' total water molecules, from which '{len(close_water_molecules)}' "
|
f"There are '{len(water_atoms)}' total water atoms, from which '{len(close_water_atoms)}' "
|
||||||
f"are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms "
|
f"are not further than '{self.maximum_distance_from_protein}' angstrom from protein atoms "
|
||||||
f"and '{len(far_water_molecules)}' that are further"
|
f"and '{len(far_water_atoms)}' that are further"
|
||||||
)
|
)
|
||||||
return WaterMoleculesCategorizedByDistance(
|
return WaterAtomsCategorizedByDistance(
|
||||||
close_water_molecules=close_water_molecules,
|
protein_atoms=protein_atoms,
|
||||||
far_water_molecules=far_water_molecules,
|
close_water_atoms=close_water_atoms,
|
||||||
|
far_water_atoms=far_water_atoms,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _get_close_water_molecules(
|
def _get_close_water_atoms(
|
||||||
self, protein_atoms: list[Atom], water_molecules: list[Atom]
|
self, protein_atoms: list[Atom], water_atoms: list[Atom]
|
||||||
) -> list[Atom]:
|
) -> list[Atom]:
|
||||||
close_water_molecules: list[Atom] = []
|
close_water_atoms: list[Atom] = []
|
||||||
for water_molecule in water_molecules:
|
for water_atoms in water_atoms:
|
||||||
for protein_atom in protein_atoms:
|
for protein_atom in protein_atoms:
|
||||||
if (
|
if (
|
||||||
self._calc_distance_square(
|
self._calc_distance_square(
|
||||||
water_molecule.position, protein_atom.position
|
water_atoms.position, protein_atom.position
|
||||||
)
|
)
|
||||||
<= self.maximum_distance_from_protein**2
|
<= self.maximum_distance_from_protein**2
|
||||||
):
|
):
|
||||||
close_water_molecules.append(water_molecule)
|
close_water_atoms.append(water_atoms)
|
||||||
break
|
break
|
||||||
return close_water_molecules
|
return close_water_atoms
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_far_water_molecules(
|
def _get_far_water_atoms(
|
||||||
water_molecules: list[Atom], close_water_molecules: list[Atom]
|
water_atoms: list[Atom], close_water_atoms: list[Atom]
|
||||||
) -> list[Atom]:
|
) -> list[Atom]:
|
||||||
close_water_molecule_ids = {
|
close_water_atom_ids = {
|
||||||
close_water_molecule.id for close_water_molecule in close_water_molecules
|
close_water_atom.id for close_water_atom in close_water_atoms
|
||||||
}
|
}
|
||||||
return [
|
return [
|
||||||
water_molecule
|
water_atom
|
||||||
for water_molecule in water_molecules
|
for water_atom in water_atoms
|
||||||
if water_molecule.id not in close_water_molecule_ids
|
if water_atom.id not in close_water_atom_ids
|
||||||
]
|
]
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_water_molecules(pdb_entries: list[PDBEntry]) -> list[Atom]:
|
def _get_water_atoms(pdb_entries: list[PDBEntry]) -> list[Atom]:
|
||||||
water_molecules: list[Atom] = []
|
water_atoms: list[Atom] = []
|
||||||
for entry in pdb_entries:
|
for entry in pdb_entries:
|
||||||
if not (isinstance(entry, Atom) or isinstance(entry, Hetatm)):
|
if not (isinstance(entry, Atom) or isinstance(entry, Hetatm)):
|
||||||
continue
|
continue
|
||||||
if entry.residue_name == "HOH":
|
if entry.residue_name == "HOH":
|
||||||
water_molecules.append(entry)
|
water_atoms.append(entry)
|
||||||
return water_molecules
|
return water_atoms
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_protein_atoms(pdb_entries: list[PDBEntry]) -> list[Atom]:
|
def _get_protein_atoms(pdb_entries: list[PDBEntry]) -> list[Atom]:
|
||||||
|
@ -155,28 +163,47 @@ class MoleculeCategorizer:
|
||||||
|
|
||||||
|
|
||||||
def save_atom_ids_to_file(atoms: list[Atom], filename: str) -> None:
|
def save_atom_ids_to_file(atoms: list[Atom], filename: str) -> None:
|
||||||
|
logger.info(f"Saving '{len(atoms)}' atoms ids to '{filename}'.")
|
||||||
atom_ids = {atom.id for atom in atoms}
|
atom_ids = {atom.id for atom in atoms}
|
||||||
with open(filename, "w") as f:
|
with open(filename, "w") as f:
|
||||||
f.write(json.dumps(list(atom_ids)))
|
f.write(json.dumps(list(atom_ids)))
|
||||||
|
|
||||||
|
|
||||||
|
def save_atom_rows_to_file(atoms: list[Atom], filename: str) -> None:
|
||||||
|
data = ""
|
||||||
|
for water_atom in atoms:
|
||||||
|
data += str(water_atom) + "\n"
|
||||||
|
with open(filename, "w") as f:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
|
||||||
def main(pdb_filename: str) -> None:
|
def main(pdb_filename: str) -> None:
|
||||||
logging.basicConfig(
|
logging.basicConfig(
|
||||||
level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s"
|
level=logging.INFO, format="%(levelname)s | %(asctime)s | %(message)s"
|
||||||
)
|
)
|
||||||
|
|
||||||
result = MoleculeCategorizer(
|
result = AtomCategorizer(
|
||||||
protein_pdb_file_path=pdb_filename, maximum_distance_from_protein=5.0
|
protein_pdb_file_path=pdb_filename, maximum_distance_from_protein=5.0
|
||||||
).categorize_water_molecules()
|
).categorize_water_atoms()
|
||||||
|
|
||||||
logger.info("Saving IDs of a close water molecules to 'close_water_molecules.json'")
|
|
||||||
save_atom_ids_to_file(
|
save_atom_ids_to_file(
|
||||||
atoms=result.close_water_molecules, filename="close_water_molecules.json"
|
atoms=result.close_water_atoms, filename="close_water_atom_ids.json"
|
||||||
|
)
|
||||||
|
save_atom_rows_to_file(
|
||||||
|
atoms=result.close_water_atoms, filename="close_water_atoms.pdb"
|
||||||
|
)
|
||||||
|
save_atom_rows_to_file(
|
||||||
|
atoms=result.close_water_atoms + result.protein_atoms,
|
||||||
|
filename="close_water_atoms_with_protein_atoms.pdb",
|
||||||
)
|
)
|
||||||
|
|
||||||
logger.info("Saving IDs of a far water molecules to 'far_water_molecules.json'")
|
|
||||||
save_atom_ids_to_file(
|
save_atom_ids_to_file(
|
||||||
atoms=result.far_water_molecules, filename="far_water_molecules.json"
|
atoms=result.far_water_atoms, filename="far_water_atom_ids.json"
|
||||||
|
)
|
||||||
|
save_atom_rows_to_file(atoms=result.far_water_atoms, filename="far_water_atoms.pdb")
|
||||||
|
save_atom_rows_to_file(
|
||||||
|
atoms=result.far_water_atoms + result.protein_atoms,
|
||||||
|
filename="far_water_atoms_with_protein_atoms.pdb",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
File diff suppressed because one or more lines are too long
2139
sem1/PhysicalFoundationsOfMolecularElectronics/close_water_atoms.pdb
Normal file
2139
sem1/PhysicalFoundationsOfMolecularElectronics/close_water_atoms.pdb
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
26778
sem1/PhysicalFoundationsOfMolecularElectronics/far_water_atoms.pdb
Normal file
26778
sem1/PhysicalFoundationsOfMolecularElectronics/far_water_atoms.pdb
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
|
@ -1,13 +1,13 @@
|
||||||
from categorize_water_molecules_by_distance import MoleculeCategorizer
|
from categorize_water_molecules_by_distance import AtomCategorizer
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
molecule_categorizer = MoleculeCategorizer(
|
molecule_categorizer = AtomCategorizer(
|
||||||
protein_pdb_file_path="prot_ligand.pdb", maximum_distance_from_protein=5.0
|
protein_pdb_file_path="prot_ligand.pdb", maximum_distance_from_protein=5.0
|
||||||
)
|
)
|
||||||
result = molecule_categorizer.categorize_water_molecules()
|
result = molecule_categorizer.categorize_water_atoms()
|
||||||
|
|
||||||
assert {m.id for m in result.close_water_molecules} == {
|
assert {m.id for m in result.close_water_atoms} == {
|
||||||
"2715",
|
"2715",
|
||||||
"2716",
|
"2716",
|
||||||
"2717",
|
"2717",
|
||||||
|
@ -2148,7 +2148,7 @@ if __name__ == "__main__":
|
||||||
"25342",
|
"25342",
|
||||||
"25343",
|
"25343",
|
||||||
}
|
}
|
||||||
assert {m.id for m in result.far_water_molecules} == {
|
assert {m.id for m in result.far_water_atoms} == {
|
||||||
"8993",
|
"8993",
|
||||||
"18608",
|
"18608",
|
||||||
"13848",
|
"13848",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user