Source code for pyCADD.Dock.common
from pyCADD.utils.common import File
from .const import AMINO_ACIDS, ATOM_RECORDS
from .utils import check_pdb
[docs]
class PDBLine:
[docs]
def __init__(self, line: str) -> None:
"""One line in a pdb file
Args:
line (str): one line in a pdb file
"""
self._line = line.strip()
self.record_name = "" # record name
self.atom_idx = "" # atom serial number
self.atom_name = "" # atom type
self.alt_loc = "" # alternate location indicator
self.res_name = "" # residue name
self.chain_id = "" # chainID
self.res_id = "" # resSeq
self.insertion_code = "" # iCode
self.coord_x = "" # x
self.coord_y = "" # y
self.coord_z = "" # z
self.occupancy = "" # occupancy
self.temp_factor = "" # tempFactor
self.element = "" # element symbol, right-justified
self.charge = "" # charge on the atom
self._parse()
@property
def _slice_define(self):
return {
"record_name": (0, 6),
"atom_idx": (6, 11),
"atom_name": (12, 16),
"alt_loc": (16, 17),
"res_name": (17, 20),
"chain_id": (21, 22),
"res_id": (22, 26),
"insertion_code": (26, 27),
"coord_x": (30, 38),
"coord_y": (38, 46),
"coord_z": (46, 54),
"occupancy": (54, 60),
"temp_factor": (60, 66),
"element": (76, 78),
"charge": (78, 80),
}
def _get_line_slice(self, start: int, end: int, strip: bool = True):
try:
info = self._line[start:end]
except Exception:
info = ""
return info.strip() if strip else info
def _parse(self):
for key, (start, end) in self._slice_define.items():
setattr(self, key, self._get_line_slice(start, end))
def __str__(self) -> str:
return f"<PDBLine {self.line} >"
def __repr__(self) -> str:
return self.__str__()
@property
def is_atom_line(self) -> bool:
return self._line.startswith("ATOM") or self._line.startswith("HETATM")
@property
def is_amino(self) -> bool:
return self.record_name in ATOM_RECORDS and self.res_name in AMINO_ACIDS
@property
def is_hetatm(self) -> bool:
return self._line.startswith("HETATM")
@property
def is_conect(self) -> bool:
return self._line.startswith("CONECT")
@property
def is_ter(self) -> bool:
return self._line.startswith("TER")
@property
def is_end(self) -> bool:
return self._line.startswith("END")
@property
def _formatter(self) -> str:
if len(self.get_atom_name()) == 4:
return "{:<6}{:>5} {:<4}{:1}{:<3} {:1}{:>4}{:1} {:>8}{:>8}{:>8}{:>6}{:>6} {:>2}{:>2}"
# start writing atom name at 14th column
return "{:<6}{:>5} {:<3}{:1}{:<3} {:1}{:>4}{:1} {:>8}{:>8}{:>8}{:>6}{:>6} {:>2}{:>2}"
@property
def line(self) -> str:
return self._formatter.format(
self.record_name,
self.atom_idx,
self.atom_name,
self.alt_loc,
self.res_name,
self.chain_id,
self.res_id,
self.insertion_code,
self.coord_x,
self.coord_y,
self.coord_z,
self.occupancy,
self.temp_factor,
self.element,
self.charge,
)
[docs]
def get_line(self) -> str:
"""Get the line string from current attributes
Returns:
str: line string
"""
return self.line
[docs]
def get_atom_name(self) -> str:
"""Get the atom name from the line
Returns:
str: atom name
"""
atom_name = self.atom_name
if not atom_name:
return atom_name
elif atom_name[0].isdigit(): # for name such as: 1HB, 1HG2
atom_name = atom_name[1:] + atom_name[0]
return atom_name
[docs]
class PDBLineParser:
[docs]
def __init__(self, pdb_str: str = None, pdb_file: str = None) -> None:
"""Parse pdb file string or file.
Args:
pdb_str (str, optional): pdb string. Defaults to None.
pdb_file (str, optional): pdb file path. Required if pdb_str is not provided. Defaults to None.
Raises:
ValueError: Either pdb_str or pdb_file must be provided.
"""
if pdb_str is None and pdb_file is None:
raise ValueError("Either pdb_str or pdb_file must be provided")
self.pdb_file = pdb_file
self.pdb_str = pdb_str if pdb_str is not None else self._read_pdb_file()
self.pdb_lines = []
self._idx_map = {}
self._parse_lines()
def __str__(self) -> str:
return "\n".join(self.get_line_str_list())
def __repr__(self) -> str:
return self.__str__()
def _parse_lines(self):
self.pdb_lines = [PDBLine(line) for line in self.pdb_str.splitlines() if line.strip()]
self._idx_map = {line.atom_idx: i for i, line in enumerate(self.pdb_lines)}
def _read_pdb_file(self):
"""Read the pdb file if pdb_str is not provided
Returns:
str: pdb file content
"""
with open(self.pdb_file) as f:
_pdb_str = f.read()
return _pdb_str
[docs]
def get_lines(self):
"""Get a list of all lines parsed from the pdb file.
Returns:
list[PDBLine]: pdb line object list
"""
return self.pdb_lines
[docs]
def get_atom_lines(self):
"""Get a list of atom line objects parsed from the pdb file.
Returns:
list[PDBLine]: atom line object list
"""
return [line for line in self.pdb_lines if line.is_atom_line]
[docs]
def get_amino_lines(self):
"""Get a list of amino acid line objects parsed from the pdb file.
Amino acid line is defined as the line with record name 'ATOM' and res_name in AMINO_ACIDS
Returns:
list[PDBLine]: amino acid line objects list
"""
return [line for line in self.pdb_lines if line.is_amino]
[docs]
def get_hetatm_lines(self):
"""Get a list of HETATM line objects parsed from the pdb file.
Returns:
list[PDBLine]: HETATM line objects list
"""
return [line for line in self.pdb_lines if line.is_hetatm]
[docs]
def get_str_list(self):
"""Get a list of pdb line strings.
Returns:
list[str]: pdb line strings list
"""
return [line.get_line() for line in self.pdb_lines]
[docs]
def save_pdb(self, file_path):
"""Save the pdb file to the file_path
Args:
file_path (str): file path to save the pdb file
"""
with open(file_path, "w") as f:
f.write("\n".join(self.get_line_str_list()))
[docs]
class PDBFile(File):
[docs]
def __init__(self, path: str) -> None:
"""PDB file class
Args:
path (str): file path string
"""
super().__init__(path)
self.pdbid = self.file_prefix[:4] if check_pdb(self.file_prefix[:4]) else None
self.pdb_parser = PDBLineParser(pdb_file=self.file_path)
def _catch_lig(self) -> list:
result_list = []
_items = ["id", "chain", "resid", "atom_num"]
with open(self.file_path, "r") as f:
lines = f.read().splitlines()
for line in lines:
if line.startswith("HET "):
match = ",".join(line.split()[1:])
lig_dict = {k: v for k, v in zip(_items, match.split(","))}
result_list.append(lig_dict)
return result_list
[docs]
def get_lines(self, return_str: bool = False) -> list | str:
"""Get the pdb file content as a list of lines or a string
Args:
return_str (bool, optional): get the string instead of list[str]. Defaults to False.
Returns:
list|str: pdb file content
"""
return (
self.pdb_parser.get_str_list()
if not return_str
else "\n".join(self.pdb_parser.get_str_list())
)
[docs]
def get_chain(self, chain_id: str, return_str: bool = False) -> list | str:
"""Get the pdb file content of a single chain
Args:
chain_id (str): chain id
return_str (bool, optional): get the string instead of list[str]. Defaults to False.
Returns:
str: pdb file content of a single chain
"""
chain_content = [
line.get_line()
for line in self.pdb_parser.get_atom_lines()
if line.chain_id == chain_id
]
return "\n".join(chain_content) if return_str else chain_content
[docs]
class EnsembleInputFile(File):
[docs]
def __init__(self, path: str) -> None:
"""Input file class for ensemble docking
Args:
path (_type_): file path
"""
super().__init__(path)
self.mappings = None
self._pdbid_list = None
self._ligand_list = None
@property
def pdbid_list(self) -> list:
if self._pdbid_list is None:
self._pdbid_list = self.get_pdbid_list()
return self._pdbid_list
@property
def ligand_list(self) -> list:
if self._ligand_list is None:
self._ligand_list = self.get_ligand_list()
return self._ligand_list
[docs]
@classmethod
def from_csv(cls, file_path: str, sep: str = ",", header: bool = False) -> "EnsembleInputFile":
"""
Parse input file as csv format
Args:
file_path (str): csv file path
sep (str, optional): separator. Defaults to ','.
header (bool, optional): whether the csv file has header. Defaults to None.
csv examples:
```
1XJ7,DHT
1XQ3,R18
2AM9,TES
2AM9,DTT
2YLP,TES
2YLP,056
```
Returns:
EnsembleInputFile: instance of EnsembleInputFile
"""
csv_file = File(file_path)
with open(csv_file.file_path, "r") as f:
raw_list = f.read().splitlines()
if header:
raw_list = raw_list[1:]
mappings = []
for line in raw_list:
item = line.split(sep)
if len(item) == 1:
pdbid = line[0].strip()
ligand_name = ""
elif len(line) >= 2:
pdbid, ligand_name = item[0].strip(), item[1].strip()
mappings.append({"receptor": csv_file.file_prefix, "pdb": pdbid, "ligand": ligand_name})
ins = cls(file_path)
ins.mappings = mappings
return ins
[docs]
@classmethod
def from_ini(cls, config_file: str) -> "EnsembleInputFile":
"""Parse input file as ini format
Args:
config_file (str): ini file path
ini examples:
```
[P10275]
1XJ7: DHT
1XQ3: R18
2AM9: TES,DTT
2YLP: TES,056
```
Returns:
EnsembleInputFile: instance of EnsembleInputFile
"""
from pyCADD.utils.common import FixedConfig
config = FixedConfig()
config.read(config_file)
receptors = [receptor for receptor in config.sections()]
mappings = []
for receptor in receptors:
for _item in config.items(receptor):
ligs = _item[1].split(",")
for lig in ligs:
mappings.append({"receptor": receptor, "pdb": _item[0], "ligand": lig})
ins = cls(config_file)
ins.mappings = mappings
return ins
[docs]
@classmethod
def from_yaml(cls, yaml_file: str) -> "EnsembleInputFile":
"""Parse input file as yaml format
Args:
yaml_file (str): yaml file path
Returns:
EnsembleInputFile: instance of EnsembleInputFile
"""
import yaml
with open(yaml_file, "r") as f:
yaml_dict = yaml.load(f, Loader=yaml.FullLoader)
mappings = []
for receptor in yaml_dict.keys():
for pdb, ligs in yaml_dict[receptor].items():
if isinstance(ligs, str):
ligs = [ligs]
for lig in ligs:
mappings.append({"receptor": receptor, "pdb": pdb, "ligand": lig})
ins = cls(yaml_file)
ins.mappings = mappings
return ins
[docs]
@classmethod
def parse_file(cls, path: str, header: bool = False) -> "EnsembleInputFile":
"""Parse input file
Args:
path (str): file path
header (bool, optional): whether the file has header. Only for csv file. Defaults to False.
Raises:
ValueError: Unsupported file type
Returns:
EnsembleInputFile: instance of EnsembleInputFile
"""
file = File(path)
if file.file_ext.lower() in ["csv", "txt"]:
return cls.from_csv(path, header=header)
elif file.file_ext.lower() == "ini":
return cls.from_ini(path)
elif file.file_ext.lower() in ["yaml", "yml"]:
return cls.from_yaml(path)
else:
raise ValueError(f"Unsupported file type: {file.file_path}")
[docs]
def read(self, file_path: str) -> None:
"""Read and parse the input file
Args:
file_path (str): file path
"""
self.mappings = self.parse_file(file_path).mappings
[docs]
def get_pairs_list(self) -> list[tuple]:
"""Get the list of pairs. Pairs are defined as (pdb, ligand)
Returns:
list: list of pairs
"""
if self.mappings is None:
self.read(self.file_path)
return [(item["pdb"], item["ligand"]) for item in self.mappings]
[docs]
def get_pdbid_list(self) -> list[str]:
"""Get the list of unique pdb ids
Returns:
list: list of pdb ids
"""
if self.mappings is None:
self.read(self.file_path)
return sorted(set([item["pdb"] for item in self.mappings]))
[docs]
def get_ligand_list(self) -> list[str]:
"""Get the list of unique ligands
Returns:
list: list of ligands
"""
if self.mappings is None:
self.read(self.file_path)
return sorted(set([item["ligand"] for item in self.mappings]))