View Javadoc

1   package uk.ac.cam.spectra.cif;
2   
3   import java.io.File;
4   import java.io.IOException;
5   import java.util.List;
6   
7   import nu.xom.Document;
8   import nu.xom.Node;
9   import nu.xom.Nodes;
10  import nu.xom.Text;
11  
12  import org.apache.log4j.Logger;
13  import org.xmlcml.cml.base.CMLElement;
14  import org.xmlcml.cml.base.CMLRuntimeException;
15  import org.xmlcml.cml.element.CMLMolecule;
16  import org.xmlcml.cml.legacy2cml.cif.CIFConverter;
17  import org.xmlcml.cml.tools.CrystalTool;
18  import org.xmlcml.cml.tools.DisorderTool;
19  import org.xmlcml.cml.tools.MoleculeTool;
20  import org.xmlcml.cml.tools.ValencyTool;
21  import org.xmlcml.euclid.RealRange;
22  
23  import uk.ac.cam.spectra.Constants;
24  import uk.ac.cam.spectra.DataFileException;
25  import uk.ac.cam.spectra.DataFileType;
26  import uk.ac.cam.spectra.DataProcess;
27  import uk.ac.cam.spectra.SpectraMetadata;
28  import uk.ac.cam.spectra.Util;
29  import uk.ac.cam.spectra.Validation;
30  import uk.co.demon.ursus.cif.CIFException;
31  import uk.co.demon.ursus.cif.CIFParser;
32  
33  import com.hp.hpl.jena.rdf.model.Model;
34  import com.hp.hpl.jena.rdf.model.ModelFactory;
35  import com.hp.hpl.jena.rdf.model.Resource;
36  
37  /**
38   * Adapter to fit JUMBO CIF handling facilities into the SPECTRa tools.
39   * 
40   * @author ojd20
41   * 
42   */
43  public class CIF implements DataFileType {
44  
45      /**
46       * 
47       */
48      private static final long serialVersionUID = 7900865193773288717L;
49  
50      private static final double SYMMETRY_CONTACT_DISTANCE = 2.8;
51  
52      private static final int ZERO = 0;
53  
54      private static final Logger LOG = Logger.getLogger(CIF.class);
55  
56      /**
57       * Performs validation, conversion and extraction of a CIF file. The
58       * extraction and conversion are performed on an intermediate CIF DOM
59       * object, so there are no efficiencies in providing a pre-calculated CML.
60       * 
61       * @throws DataFileException
62       *             if there are problems solving the disorder for this file.
63       */
64      public void process(DataProcess process) {
65          File source = process.getSourceFile();
66          if (source == null) {
67              throw new IllegalArgumentException("No source file given");
68          }
69  
70          CIFParser parser = new CIFParser();
71          uk.co.demon.ursus.cif.CIF cif = null;
72          Document cifdom = null;
73          File data = process.getSourceFile();
74  
75          try {
76              cifdom = parser.parse(data);
77              cif = new uk.co.demon.ursus.cif.CIF(cifdom, true);
78          } catch (CIFException e) {
79              Validation v = new Validation();
80              v.getErrors().add(e.getMessage());
81          } catch (IOException e) {
82              throw new RuntimeException(e);
83          }
84  
85          boolean validationSuccess = true;
86          if (process.getValidation() != null) {
87              validationSuccess = !process.getValidation().hasErrors();
88          }
89          if (validationSuccess && process.getPerformConversion()) {
90              CIFConverter converter = new CIFConverter();
91              converter.setControls("NO_GLOBAL", "SKIP_ERRORS", "SKIP_HEADER");
92              converter.setCIF(cif);
93              converter.processCIF();
94              List<CMLElement> cmlList = converter.getCMLElementList();
95              if (cmlList.size() > 1) {
96                  throw new DataFileException(
97                          "Multiple crystals (data blocks) per CIF are not supported. In "
98                                  + data);
99              } else if (cmlList.size() == 0) {
100                 throw new DataFileException("No crystals found in " + data);
101             }
102             CMLElement element = cmlList.get(0);
103 
104             improveCML(element, process);
105             process.setConvertedCml(new Document(element));
106         }
107 
108         if (validationSuccess && process.getPerformExtraction()) {
109             Model model = ModelFactory.createDefaultModel();
110             Resource subject = model.createResource(process
111                     .getMetadataPackageUri());
112             String chemFormula = queryUniqueText(cifdom,
113                     "/cif/datablock/item[@name='_chemical_formula_moiety']/text()");
114             if (chemFormula == null || "?".equals(chemFormula)) {
115                 LOG.debug("Looking for formula in sum field");
116                 chemFormula = queryUniqueText(cifdom,
117                         "/cif/datablock/item[@name='_chemical_formula_sum']/text()");
118             }
119             LOG.debug("Formula: " + chemFormula);
120             if (chemFormula != null) {
121                 SpectraMetadata.chemical_formula.add(subject, chemFormula);
122             }
123 
124             String systematicName = queryUniqueText(cifdom,
125                     "/cif/datablock/item[@name='_chemical_name_systematic']/text()");
126             if (systematicName != null) {
127                 SpectraMetadata.chemical_systematic_name.add(subject,
128                         systematicName);
129             }
130             process.setExtractedMetadata(model);
131         }
132     }
133 
134     /**
135      * 
136      * @param cml
137      * @param p
138      * @throws DataFileException
139      *             if there's disorder and the process is set to validate.
140      * @throws CMLRuntimeException
141      *             if there's disorder and the process isn't set to validate.
142      */
143     private void improveCML(CMLElement cml, DataProcess p) {
144         Nodes moleculeNodes = cml.query(
145                 "//cml:molecule[count(ancestor::cml:molecule)=0]",
146                 Constants.CML_XPATH);
147         int count = 0;
148         for (int i = moleculeNodes.size() - 1; i >= 0; i--) {
149             CMLMolecule molecule = (CMLMolecule) moleculeNodes.get(i);
150             MoleculeTool moleculeTool = new MoleculeTool(molecule);
151             moleculeTool.createCartesiansFromFractionals();
152             if (molecule.getId() == null) {
153                 String molName = "mol" + count++;
154                 molecule.setId(molName);
155             }
156 
157             try {
158                 new DisorderTool(molecule).resolveDisorder();
159             } catch (CMLRuntimeException e) {
160                 if (p.getPerformValidation()) {
161                     p.getValidation().getWarnings().add(e.getMessage());
162                     LOG.warn(e);
163                 } else {
164                     throw new DataFileException(e);
165                 }
166             }
167             CrystalTool crystalTool = new CrystalTool(molecule);
168             CMLMolecule mergedMolecule = crystalTool
169                     .calculateCrystallochemicalUnit(new RealRange(ZERO,
170                             SYMMETRY_CONTACT_DISTANCE
171                                     * SYMMETRY_CONTACT_DISTANCE));
172             Util.print(new Document(mergedMolecule), System.out, true);
173             ValencyTool vt = new ValencyTool(mergedMolecule);
174             vt.adjustBondOrdersAndChargesToValency();
175 
176             molecule.detach();
177             cml.appendChild(mergedMolecule);
178         }
179 
180     }
181 
182     public String convertName(String fileName) {
183         return fileName.replaceAll("\\.cif$", ".cml.xml");
184     }
185 
186     private String queryUniqueText(Document cifdom, String query) {
187         Text txt = (Text) queryUniqueNode(cifdom, query);
188         return (txt == null) ? null : txt.getValue();
189     }
190 
191     private Node queryUniqueNode(Document cifdom, String query) {
192         Nodes nds = cifdom.query(query);
193         if (nds.size() == 0) {
194             return null;
195         } else if (nds.size() > 1) {
196             throw new RuntimeException(query
197                     + " did not return a unique result.");
198         }
199         return nds.get(0);
200     }
201 
202     public String getMimeType() {
203         return "chemical/x-cif";
204     }
205 
206 }