Skip to content

Commit 7ba559f

Browse files
authoredSep 12, 2023
Add a library of all CCD residues separate from modified amino/nucleic acids (ParmEd#1315)
* Add templates for all CCD residues other than currently stored amino/nucleic acids * Fix file name * Fix issues in RDKit interconversions * Fix copying of qualitative bond type * Update dependencies. * Do not arbitrarily override residue number * Fix tests to work with newer OpenMM versions. * Fix for newer scipy
1 parent bf0790f commit 7ba559f

17 files changed

+244
-180
lines changed
 

‎.github/workflows/Test.yaml

+4-4
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,11 @@ jobs:
1313
strategy:
1414
matrix:
1515
os: ["ubuntu-latest"]
16-
python-version: ["3.8", "3.9"]
16+
python-version: ["3.9", "3.10"]
1717

1818
steps:
1919
- name: Check out source code
20-
uses: actions/checkout@v2
20+
uses: actions/checkout@v3
2121
with:
2222
fetch-depth: 0
2323

@@ -29,7 +29,7 @@ jobs:
2929
environment-file: devtools/environment-dev.yaml
3030
activate-environment: parmed-dev
3131
channels: conda-forge,bioconda
32-
# mamba-version: "*"
32+
mamba-version: "*"
3333

3434
- name: Environment Information
3535
shell: bash -l {0}
@@ -38,7 +38,7 @@ jobs:
3838
conda list
3939
4040
- name: Install and test
41-
shell: bash -lex {0}
41+
shell: bash -l {0}
4242
run: |
4343
bash -ex devtools/ci/install.sh
4444

‎ccd.js

+38-8
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,39 @@ node ccd.js components.cif > components.json
77

88
const { CIF } = require('molstar/lib/commonjs/mol-io/reader/cif');
99
const fs = require('fs');
10+
const JSONStream = require('JSONStream');
1011

11-
function isValidComponent(block) {
12+
function isLinkingMonomer(block) {
1213
const { chem_comp } = block.categories;
1314
const type = chem_comp.getField('type')?.str(0).toLowerCase() ?? '';
1415

15-
return type.endsWith('linking')
16-
&& (type.includes('dna') || type.includes('rna') || type.includes('peptide'));
16+
return type.endsWith('linking') && type.includes('peptide');
17+
}
18+
19+
function isNotLinkingMonomer(block) {
20+
return !isLinkingMonomer(block);
1721
}
1822

1923
function formatComponent(block) {
2024
const { chem_comp, chem_comp_atom, chem_comp_bond } = block.categories;
2125

2226
// Atoms info
2327
const atoms = [];
28+
const bonds = [];
29+
let atomCount = chem_comp_atom?.rowCount ?? 0;
30+
if (atomCount === 0) {
31+
console.info("No atoms found for", chem_comp.getField('id')?.str(0));
32+
return {
33+
name: chem_comp.getField('id')?.str(0),
34+
one_letter_code: chem_comp.getField('one_letter_code')?.str(0),
35+
full_name: chem_comp.getField('name')?.str(0),
36+
synonyms: chem_comp.getField('pdbx_synonyms')?.str(0),
37+
type: chem_comp.getField('type')?.str(0),
38+
formal_charge: chem_comp.getField('pdbx_formal_charge')?.int(0),
39+
atoms,
40+
bonds,
41+
}
42+
}
2443
for (let i = 0; i < chem_comp_atom.rowCount; i++) {
2544
atoms.push({
2645
name: chem_comp_atom.getField('atom_id').str(i),
@@ -33,9 +52,8 @@ function formatComponent(block) {
3352
}
3453

3554
// Bonds info
36-
const bonds = [];
3755
if (chem_comp_bond) {
38-
for (let i = 0; i < chem_comp_bond.rowCount; i++) {
56+
for (let i = 0; i < chem_comp_bond?.rowCount ?? 0; i++) {
3957
let bond = {
4058
name_a: chem_comp_bond.getField('atom_id_1').str(i),
4159
name_b: chem_comp_bond.getField('atom_id_2').str(i),
@@ -51,6 +69,7 @@ function formatComponent(block) {
5169
name: chem_comp.getField('id')?.str(0),
5270
one_letter_code: chem_comp.getField('one_letter_code')?.str(0),
5371
full_name: chem_comp.getField('name')?.str(0),
72+
synonyms: chem_comp.getField('pdbx_synonyms')?.str(0),
5473
type: chem_comp.getField('type')?.str(0),
5574
formal_charge: chem_comp.getField('pdbx_formal_charge')?.int(0),
5675
atoms,
@@ -67,10 +86,21 @@ async function run() {
6786
}
6887

6988
const components = parsed.result.blocks
70-
.filter(isValidComponent)
89+
.filter(isLinkingMonomer)
7190
.map(formatComponent);
7291

73-
console.log(JSON.stringify(components, null, 2));
92+
var transformStream = JSONStream.stringify();
93+
var outputStream = fs.createWriteStream('processed-components.json');
94+
transformStream.pipe(outputStream);
95+
components.forEach(transformStream.write);
96+
transformStream.end();
97+
98+
outputStream.on(
99+
"finish",
100+
function handleFinish() {
101+
console.log("Done");
102+
}
103+
);
74104
}
75105

76-
run();
106+
run();

‎devtools/environment-dev.yaml

+3-6
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,14 @@ channels:
44
- https://conda.anaconda.org/bioconda
55
dependencies:
66
- pandas
7-
- nose
8-
- openmm >=7.6.0,<7.7.0a
7+
- openmm >=7.6.0
98
- coverage
10-
- nose-timer
119
- netCDF4
12-
- rdkit ==2020.09.4
10+
- rdkit >=2020.09.4
1311
- nglview
14-
- ambertools ==20.15
12+
- ambertools >=20.15
1513
- networkx
1614
- lxml
17-
- nose-timer
1815
- gromacs
1916
- pytest
2017
- pytest-cov

‎get_hybridization_from_rdkit.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
extract the hybridization for each templated residue
44
"""
55
import json
6-
from pathlib import Path
76
from rdkit import Chem
7+
from tqdm import tqdm
88
from pathlib import Path
99
from parmed.rdkit import RDKit
10-
from parmed.modeller.standardtemplates import get_standard_residue_template_library
10+
from parmed.modeller.standardtemplates import get_standard_residue_template_library, get_nonstandard_ccd_residues
1111

12-
residues = get_standard_residue_template_library()
12+
# residues = get_standard_residue_template_library()
13+
residues = get_nonstandard_ccd_residues()
1314

1415
rdkit_map = dict()
1516

@@ -18,16 +19,16 @@
1819
Chem.SanitizeMol(mol, Chem.SANITIZE_SETHYBRIDIZATION)
1920
rdkit_map[name] = mol
2021

21-
with Path("ccd_residue_templates.json").open("r") as f:
22+
with Path("nonstandard_ccd_residue_templates.json").open("r") as f:
2223
data = json.load(f)
2324

24-
for res in data:
25+
for res in tqdm(data):
2526
if res["name"] not in rdkit_map:
2627
print(f"Residue {res['name']} is not in the RDKit map")
2728
continue
2829
assert len(res["atoms"]) == rdkit_map[res["name"]].GetNumAtoms()
2930
for atom_data, atom in zip(res["atoms"], rdkit_map[res["name"]].GetAtoms()):
3031
atom_data["hybridization"] = atom.GetHybridization()
3132

32-
with Path("ccd_residue_templates_with_hybridization.json").open("w") as f:
33+
with Path("nonstandard_ccd_residue_templates_with_hybridization.json").open("w") as f:
3334
json.dump(data, f)

0 commit comments

Comments
 (0)
Please sign in to comment.