1 package uk.ac.cam.spectra.cif;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.util.List;
6
7 import nu.xom.Document;
8 import nu.xom.Node;
9 import nu.xom.Nodes;
10 import nu.xom.Text;
11
12 import org.apache.log4j.Logger;
13 import org.xmlcml.cml.base.CMLElement;
14 import org.xmlcml.cml.base.CMLRuntimeException;
15 import org.xmlcml.cml.element.CMLMolecule;
16 import org.xmlcml.cml.legacy2cml.cif.CIFConverter;
17 import org.xmlcml.cml.tools.CrystalTool;
18 import org.xmlcml.cml.tools.DisorderTool;
19 import org.xmlcml.cml.tools.MoleculeTool;
20 import org.xmlcml.cml.tools.ValencyTool;
21 import org.xmlcml.euclid.RealRange;
22
23 import uk.ac.cam.spectra.Constants;
24 import uk.ac.cam.spectra.DataFileException;
25 import uk.ac.cam.spectra.DataFileType;
26 import uk.ac.cam.spectra.DataProcess;
27 import uk.ac.cam.spectra.SpectraMetadata;
28 import uk.ac.cam.spectra.Util;
29 import uk.ac.cam.spectra.Validation;
30 import uk.co.demon.ursus.cif.CIFException;
31 import uk.co.demon.ursus.cif.CIFParser;
32
33 import com.hp.hpl.jena.rdf.model.Model;
34 import com.hp.hpl.jena.rdf.model.ModelFactory;
35 import com.hp.hpl.jena.rdf.model.Resource;
36
37
38
39
40
41
42
43 public class CIF implements DataFileType {
44
45
46
47
48 private static final long serialVersionUID = 7900865193773288717L;
49
50 private static final double SYMMETRY_CONTACT_DISTANCE = 2.8;
51
52 private static final int ZERO = 0;
53
54 private static final Logger LOG = Logger.getLogger(CIF.class);
55
56
57
58
59
60
61
62
63
64 public void process(DataProcess process) {
65 File source = process.getSourceFile();
66 if (source == null) {
67 throw new IllegalArgumentException("No source file given");
68 }
69
70 CIFParser parser = new CIFParser();
71 uk.co.demon.ursus.cif.CIF cif = null;
72 Document cifdom = null;
73 File data = process.getSourceFile();
74
75 try {
76 cifdom = parser.parse(data);
77 cif = new uk.co.demon.ursus.cif.CIF(cifdom, true);
78 } catch (CIFException e) {
79 Validation v = new Validation();
80 v.getErrors().add(e.getMessage());
81 } catch (IOException e) {
82 throw new RuntimeException(e);
83 }
84
85 boolean validationSuccess = true;
86 if (process.getValidation() != null) {
87 validationSuccess = !process.getValidation().hasErrors();
88 }
89 if (validationSuccess && process.getPerformConversion()) {
90 CIFConverter converter = new CIFConverter();
91 converter.setControls("NO_GLOBAL", "SKIP_ERRORS", "SKIP_HEADER");
92 converter.setCIF(cif);
93 converter.processCIF();
94 List<CMLElement> cmlList = converter.getCMLElementList();
95 if (cmlList.size() > 1) {
96 throw new DataFileException(
97 "Multiple crystals (data blocks) per CIF are not supported. In "
98 + data);
99 } else if (cmlList.size() == 0) {
100 throw new DataFileException("No crystals found in " + data);
101 }
102 CMLElement element = cmlList.get(0);
103
104 improveCML(element, process);
105 process.setConvertedCml(new Document(element));
106 }
107
108 if (validationSuccess && process.getPerformExtraction()) {
109 Model model = ModelFactory.createDefaultModel();
110 Resource subject = model.createResource(process
111 .getMetadataPackageUri());
112 String chemFormula = queryUniqueText(cifdom,
113 "/cif/datablock/item[@name='_chemical_formula_moiety']/text()");
114 if (chemFormula == null || "?".equals(chemFormula)) {
115 LOG.debug("Looking for formula in sum field");
116 chemFormula = queryUniqueText(cifdom,
117 "/cif/datablock/item[@name='_chemical_formula_sum']/text()");
118 }
119 LOG.debug("Formula: " + chemFormula);
120 if (chemFormula != null) {
121 SpectraMetadata.chemical_formula.add(subject, chemFormula);
122 }
123
124 String systematicName = queryUniqueText(cifdom,
125 "/cif/datablock/item[@name='_chemical_name_systematic']/text()");
126 if (systematicName != null) {
127 SpectraMetadata.chemical_systematic_name.add(subject,
128 systematicName);
129 }
130 process.setExtractedMetadata(model);
131 }
132 }
133
134
135
136
137
138
139
140
141
142
143 private void improveCML(CMLElement cml, DataProcess p) {
144 Nodes moleculeNodes = cml.query(
145 "//cml:molecule[count(ancestor::cml:molecule)=0]",
146 Constants.CML_XPATH);
147 int count = 0;
148 for (int i = moleculeNodes.size() - 1; i >= 0; i--) {
149 CMLMolecule molecule = (CMLMolecule) moleculeNodes.get(i);
150 MoleculeTool moleculeTool = new MoleculeTool(molecule);
151 moleculeTool.createCartesiansFromFractionals();
152 if (molecule.getId() == null) {
153 String molName = "mol" + count++;
154 molecule.setId(molName);
155 }
156
157 try {
158 new DisorderTool(molecule).resolveDisorder();
159 } catch (CMLRuntimeException e) {
160 if (p.getPerformValidation()) {
161 p.getValidation().getWarnings().add(e.getMessage());
162 LOG.warn(e);
163 } else {
164 throw new DataFileException(e);
165 }
166 }
167 CrystalTool crystalTool = new CrystalTool(molecule);
168 CMLMolecule mergedMolecule = crystalTool
169 .calculateCrystallochemicalUnit(new RealRange(ZERO,
170 SYMMETRY_CONTACT_DISTANCE
171 * SYMMETRY_CONTACT_DISTANCE));
172 Util.print(new Document(mergedMolecule), System.out, true);
173 ValencyTool vt = new ValencyTool(mergedMolecule);
174 vt.adjustBondOrdersAndChargesToValency();
175
176 molecule.detach();
177 cml.appendChild(mergedMolecule);
178 }
179
180 }
181
182 public String convertName(String fileName) {
183 return fileName.replaceAll("\\.cif$", ".cml.xml");
184 }
185
186 private String queryUniqueText(Document cifdom, String query) {
187 Text txt = (Text) queryUniqueNode(cifdom, query);
188 return (txt == null) ? null : txt.getValue();
189 }
190
191 private Node queryUniqueNode(Document cifdom, String query) {
192 Nodes nds = cifdom.query(query);
193 if (nds.size() == 0) {
194 return null;
195 } else if (nds.size() > 1) {
196 throw new RuntimeException(query
197 + " did not return a unique result.");
198 }
199 return nds.get(0);
200 }
201
202 public String getMimeType() {
203 return "chemical/x-cif";
204 }
205
206 }