Source code for pyCADD.Dock.common

from pyCADD.utils.common import File

from .const import AMINO_ACIDS, ATOM_RECORDS
from .utils import check_pdb


[docs] class PDBLine:
[docs] def __init__(self, line: str) -> None: """One line in a pdb file Args: line (str): one line in a pdb file """ self._line = line.strip() self.record_name = "" # record name self.atom_idx = "" # atom serial number self.atom_name = "" # atom type self.alt_loc = "" # alternate location indicator self.res_name = "" # residue name self.chain_id = "" # chainID self.res_id = "" # resSeq self.insertion_code = "" # iCode self.coord_x = "" # x self.coord_y = "" # y self.coord_z = "" # z self.occupancy = "" # occupancy self.temp_factor = "" # tempFactor self.element = "" # element symbol, right-justified self.charge = "" # charge on the atom self._parse()
@property def _slice_define(self): return { "record_name": (0, 6), "atom_idx": (6, 11), "atom_name": (12, 16), "alt_loc": (16, 17), "res_name": (17, 20), "chain_id": (21, 22), "res_id": (22, 26), "insertion_code": (26, 27), "coord_x": (30, 38), "coord_y": (38, 46), "coord_z": (46, 54), "occupancy": (54, 60), "temp_factor": (60, 66), "element": (76, 78), "charge": (78, 80), } def _get_line_slice(self, start: int, end: int, strip: bool = True): try: info = self._line[start:end] except Exception: info = "" return info.strip() if strip else info def _parse(self): for key, (start, end) in self._slice_define.items(): setattr(self, key, self._get_line_slice(start, end)) def __str__(self) -> str: return f"<PDBLine {self.line} >" def __repr__(self) -> str: return self.__str__() @property def is_atom_line(self) -> bool: return self._line.startswith("ATOM") or self._line.startswith("HETATM") @property def is_amino(self) -> bool: return self.record_name in ATOM_RECORDS and self.res_name in AMINO_ACIDS @property def is_hetatm(self) -> bool: return self._line.startswith("HETATM") @property def is_conect(self) -> bool: return self._line.startswith("CONECT") @property def is_ter(self) -> bool: return self._line.startswith("TER") @property def is_end(self) -> bool: return self._line.startswith("END") @property def _formatter(self) -> str: if len(self.get_atom_name()) == 4: return "{:<6}{:>5} {:<4}{:1}{:<3} {:1}{:>4}{:1} {:>8}{:>8}{:>8}{:>6}{:>6} {:>2}{:>2}" # start writing atom name at 14th column return "{:<6}{:>5} {:<3}{:1}{:<3} {:1}{:>4}{:1} {:>8}{:>8}{:>8}{:>6}{:>6} {:>2}{:>2}" @property def line(self) -> str: return self._formatter.format( self.record_name, self.atom_idx, self.atom_name, self.alt_loc, self.res_name, self.chain_id, self.res_id, self.insertion_code, self.coord_x, self.coord_y, self.coord_z, self.occupancy, self.temp_factor, self.element, self.charge, )
[docs] def get_line(self) -> str: """Get the line string from current attributes Returns: str: line string """ return self.line
[docs] def get_atom_name(self) -> str: """Get the atom name from the line Returns: str: atom name """ atom_name = self.atom_name if not atom_name: return atom_name elif atom_name[0].isdigit(): # for name such as: 1HB, 1HG2 atom_name = atom_name[1:] + atom_name[0] return atom_name
[docs] class PDBLineParser:
[docs] def __init__(self, pdb_str: str = None, pdb_file: str = None) -> None: """Parse pdb file string or file. Args: pdb_str (str, optional): pdb string. Defaults to None. pdb_file (str, optional): pdb file path. Required if pdb_str is not provided. Defaults to None. Raises: ValueError: Either pdb_str or pdb_file must be provided. """ if pdb_str is None and pdb_file is None: raise ValueError("Either pdb_str or pdb_file must be provided") self.pdb_file = pdb_file self.pdb_str = pdb_str if pdb_str is not None else self._read_pdb_file() self.pdb_lines = [] self._idx_map = {} self._parse_lines()
def __str__(self) -> str: return "\n".join(self.get_line_str_list()) def __repr__(self) -> str: return self.__str__() def _parse_lines(self): self.pdb_lines = [PDBLine(line) for line in self.pdb_str.splitlines() if line.strip()] self._idx_map = {line.atom_idx: i for i, line in enumerate(self.pdb_lines)} def _read_pdb_file(self): """Read the pdb file if pdb_str is not provided Returns: str: pdb file content """ with open(self.pdb_file) as f: _pdb_str = f.read() return _pdb_str
[docs] def get_lines(self): """Get a list of all lines parsed from the pdb file. Returns: list[PDBLine]: pdb line object list """ return self.pdb_lines
[docs] def get_atom_lines(self): """Get a list of atom line objects parsed from the pdb file. Returns: list[PDBLine]: atom line object list """ return [line for line in self.pdb_lines if line.is_atom_line]
[docs] def get_amino_lines(self): """Get a list of amino acid line objects parsed from the pdb file. Amino acid line is defined as the line with record name 'ATOM' and res_name in AMINO_ACIDS Returns: list[PDBLine]: amino acid line objects list """ return [line for line in self.pdb_lines if line.is_amino]
[docs] def get_hetatm_lines(self): """Get a list of HETATM line objects parsed from the pdb file. Returns: list[PDBLine]: HETATM line objects list """ return [line for line in self.pdb_lines if line.is_hetatm]
[docs] def get_str_list(self): """Get a list of pdb line strings. Returns: list[str]: pdb line strings list """ return [line.get_line() for line in self.pdb_lines]
[docs] def save_pdb(self, file_path): """Save the pdb file to the file_path Args: file_path (str): file path to save the pdb file """ with open(file_path, "w") as f: f.write("\n".join(self.get_line_str_list()))
[docs] class PDBFile(File):
[docs] def __init__(self, path: str) -> None: """PDB file class Args: path (str): file path string """ super().__init__(path) self.pdbid = self.file_prefix[:4] if check_pdb(self.file_prefix[:4]) else None self.pdb_parser = PDBLineParser(pdb_file=self.file_path)
def _catch_lig(self) -> list: result_list = [] _items = ["id", "chain", "resid", "atom_num"] with open(self.file_path, "r") as f: lines = f.read().splitlines() for line in lines: if line.startswith("HET "): match = ",".join(line.split()[1:]) lig_dict = {k: v for k, v in zip(_items, match.split(","))} result_list.append(lig_dict) return result_list
[docs] def get_lines(self, return_str: bool = False) -> list | str: """Get the pdb file content as a list of lines or a string Args: return_str (bool, optional): get the string instead of list[str]. Defaults to False. Returns: list|str: pdb file content """ return ( self.pdb_parser.get_str_list() if not return_str else "\n".join(self.pdb_parser.get_str_list()) )
[docs] def get_chain(self, chain_id: str, return_str: bool = False) -> list | str: """Get the pdb file content of a single chain Args: chain_id (str): chain id return_str (bool, optional): get the string instead of list[str]. Defaults to False. Returns: str: pdb file content of a single chain """ chain_content = [ line.get_line() for line in self.pdb_parser.get_atom_lines() if line.chain_id == chain_id ] return "\n".join(chain_content) if return_str else chain_content
[docs] class EnsembleInputFile(File):
[docs] def __init__(self, path: str) -> None: """Input file class for ensemble docking Args: path (_type_): file path """ super().__init__(path) self.mappings = None self._pdbid_list = None self._ligand_list = None
@property def pdbid_list(self) -> list: if self._pdbid_list is None: self._pdbid_list = self.get_pdbid_list() return self._pdbid_list @property def ligand_list(self) -> list: if self._ligand_list is None: self._ligand_list = self.get_ligand_list() return self._ligand_list
[docs] @classmethod def from_csv(cls, file_path: str, sep: str = ",", header: bool = False) -> "EnsembleInputFile": """ Parse input file as csv format Args: file_path (str): csv file path sep (str, optional): separator. Defaults to ','. header (bool, optional): whether the csv file has header. Defaults to None. csv examples: ``` 1XJ7,DHT 1XQ3,R18 2AM9,TES 2AM9,DTT 2YLP,TES 2YLP,056 ``` Returns: EnsembleInputFile: instance of EnsembleInputFile """ csv_file = File(file_path) with open(csv_file.file_path, "r") as f: raw_list = f.read().splitlines() if header: raw_list = raw_list[1:] mappings = [] for line in raw_list: item = line.split(sep) if len(item) == 1: pdbid = line[0].strip() ligand_name = "" elif len(line) >= 2: pdbid, ligand_name = item[0].strip(), item[1].strip() mappings.append({"receptor": csv_file.file_prefix, "pdb": pdbid, "ligand": ligand_name}) ins = cls(file_path) ins.mappings = mappings return ins
[docs] @classmethod def from_ini(cls, config_file: str) -> "EnsembleInputFile": """Parse input file as ini format Args: config_file (str): ini file path ini examples: ``` [P10275] 1XJ7: DHT 1XQ3: R18 2AM9: TES,DTT 2YLP: TES,056 ``` Returns: EnsembleInputFile: instance of EnsembleInputFile """ from pyCADD.utils.common import FixedConfig config = FixedConfig() config.read(config_file) receptors = [receptor for receptor in config.sections()] mappings = [] for receptor in receptors: for _item in config.items(receptor): ligs = _item[1].split(",") for lig in ligs: mappings.append({"receptor": receptor, "pdb": _item[0], "ligand": lig}) ins = cls(config_file) ins.mappings = mappings return ins
[docs] @classmethod def from_yaml(cls, yaml_file: str) -> "EnsembleInputFile": """Parse input file as yaml format Args: yaml_file (str): yaml file path Returns: EnsembleInputFile: instance of EnsembleInputFile """ import yaml with open(yaml_file, "r") as f: yaml_dict = yaml.load(f, Loader=yaml.FullLoader) mappings = [] for receptor in yaml_dict.keys(): for pdb, ligs in yaml_dict[receptor].items(): if isinstance(ligs, str): ligs = [ligs] for lig in ligs: mappings.append({"receptor": receptor, "pdb": pdb, "ligand": lig}) ins = cls(yaml_file) ins.mappings = mappings return ins
[docs] @classmethod def parse_file(cls, path: str, header: bool = False) -> "EnsembleInputFile": """Parse input file Args: path (str): file path header (bool, optional): whether the file has header. Only for csv file. Defaults to False. Raises: ValueError: Unsupported file type Returns: EnsembleInputFile: instance of EnsembleInputFile """ file = File(path) if file.file_ext.lower() in ["csv", "txt"]: return cls.from_csv(path, header=header) elif file.file_ext.lower() == "ini": return cls.from_ini(path) elif file.file_ext.lower() in ["yaml", "yml"]: return cls.from_yaml(path) else: raise ValueError(f"Unsupported file type: {file.file_path}")
[docs] def read(self, file_path: str) -> None: """Read and parse the input file Args: file_path (str): file path """ self.mappings = self.parse_file(file_path).mappings
[docs] def get_pairs_list(self) -> list[tuple]: """Get the list of pairs. Pairs are defined as (pdb, ligand) Returns: list: list of pairs """ if self.mappings is None: self.read(self.file_path) return [(item["pdb"], item["ligand"]) for item in self.mappings]
[docs] def get_pdbid_list(self) -> list[str]: """Get the list of unique pdb ids Returns: list: list of pdb ids """ if self.mappings is None: self.read(self.file_path) return sorted(set([item["pdb"] for item in self.mappings]))
[docs] def get_ligand_list(self) -> list[str]: """Get the list of unique ligands Returns: list: list of ligands """ if self.mappings is None: self.read(self.file_path) return sorted(set([item["ligand"] for item in self.mappings]))