View Javadoc

1   package uk.ac.cam.spectra.nmr;
2   
3   import java.io.BufferedReader;
4   import java.io.File;
5   import java.io.FileReader;
6   import java.io.IOException;
7   import java.text.ParseException;
8   import java.text.SimpleDateFormat;
9   import java.util.Date;
10  import java.util.HashMap;
11  import java.util.List;
12  import java.util.Map;
13  import java.util.regex.Matcher;
14  import java.util.regex.Pattern;
15  
16  import nu.xom.Document;
17  
18  import org.apache.commons.io.FileUtils;
19  import org.apache.log4j.Logger;
20  import org.jcamp.parser.IErrorHandler;
21  import org.jcamp.parser.JCAMPException;
22  import org.jcamp.parser.JCAMPReader;
23  import org.jcamp.spectrum.Spectrum;
24  import org.xmlcml.cml.element.CMLSpectrum;
25  
26  import spok.JcampToCMLSpectrumMapper;
27  import uk.ac.cam.spectra.Constants;
28  import uk.ac.cam.spectra.DataFileType;
29  import uk.ac.cam.spectra.DataProcess;
30  import uk.ac.cam.spectra.LiteralProperty;
31  import uk.ac.cam.spectra.MetadataProperty;
32  import uk.ac.cam.spectra.SpectraMetadata;
33  import uk.ac.cam.spectra.Validation;
34  
35  import com.hp.hpl.jena.rdf.model.Model;
36  import com.hp.hpl.jena.rdf.model.ModelFactory;
37  import com.hp.hpl.jena.rdf.model.Resource;
38  
39  /**
40   * SPECTRa data file type bean for handling JCampDX files. Uses the sourceforge
41   * jcamp-dx library for parsing JCampDX and classes from bioclipse to conversion
42   * to CML. The metadata extraction is homespun and based on regular expressions.
43   * 
44   * @author ojd20
45   */
46  public class JCampDX implements DataFileType {
47  
48      /**
49       * 
50       */
51      private static final long serialVersionUID = 8018075397479897101L;
52  
53      private static final String DATE = "DATE";
54  
55      private static final Logger LOG = Logger.getLogger(JCampDX.class);
56  
57      private Map<String, MetadataProperty> factoryBindings = new HashMap<String, MetadataProperty>(
58              20);
59  
60      private static final Pattern LABEL_RE = Pattern.compile("^##(.*)=(.*)$");
61  
62      private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat(
63              "yy/MM/dd");
64  
65      public void init() {
66          factoryBindings.put("TITLE", new LiteralProperty(
67  				Constants.DC_NS, "description"));
68          factoryBindings.put("ORIGIN", SpectraMetadata.institution);
69          factoryBindings.put("MOLFORM", SpectraMetadata.chemical_formula);
70          factoryBindings.put("OWNER", new LiteralProperty(
71  				Constants.DC_NS, "rights"));
72      }
73  
74      /**
75       * 
76       */
77      public Spectrum parseAndValidate(DataProcess process) {
78          Validation validation = new Validation();
79          IErrorHandler handler = new JCampDXErrorHandler(validation);
80          JCAMPReader reader = JCAMPReader.getInstance();
81          reader.setErrorHandler(handler);
82          try {
83              File input = checkInput(process);
84              Spectrum spectrum = reader.createSpectrum(asString(input));
85              return spectrum;
86          } catch (JCAMPException e) {
87              try {
88                  handler.fatal(e.getMessage());
89              } catch (JCAMPException e1) {
90                  throw new RuntimeException(e1);
91              }
92          } finally {
93              process.setValidation(validation);
94          }
95          return null;
96      }
97  
98      private File checkInput(DataProcess process) {
99          File input = process.getSourceFile();
100         if (input == null) {
101             throw new IllegalStateException("No source file set on " + process);
102         }
103         return input;
104     }
105 
106     /**
107      * Reads the content of file (filename) into a string
108      * 
109      * @param filename
110      *            name and path of the file to load
111      * @return content of file in one string
112      */
113     private String asString(File file) {
114         StringBuilder buffer = new StringBuilder();
115         try {
116             FileReader fileReader = new FileReader(file);
117             BufferedReader input = new BufferedReader(fileReader);
118             int character;
119             while ((character = input.read()) != -1) {
120                 buffer.append((char) character);
121             }
122             input.close();
123         } catch (IOException e) {
124             LOG.error("Could not read jcamp file: " + e.getMessage(), e);
125             throw new RuntimeException("Could not read jcamp file: "
126                     + e.getMessage(), e);
127         }
128         return buffer.toString();
129 
130     }
131 
132     /**
133      * @todo File specific (technical) metadata
134      */
135     @SuppressWarnings("unchecked")
136     public Model extractMetadata(File jcampFile, String packageUri) {
137         Model model = ModelFactory.createDefaultModel();
138         Resource subject = model.createResource(packageUri);
139         try {
140             for (String s : (List<String>) FileUtils.readLines(jcampFile, null)) {
141                 Matcher m = LABEL_RE.matcher(s);
142                 if (!m.matches()) {
143                     continue;
144                 } else {
145                     String key = m.group(1).trim();
146                     String val = m.group(2).trim();
147 
148                     MetadataProperty tf = factoryBindings.get(key);
149                     if (tf != null) {
150                         tf.add(subject, val);
151                     } else if (DATE.equals(key)) {
152                         Date d = null;
153                         try {
154                             d = DATE_FORMAT.parse(val);
155                         } catch (ParseException e) {
156                             LOG.warn(e);
157                         }
158                         if (d != null) {
159                             SpectraMetadata.experiment_date.add(subject, d);
160                         }
161                     }
162                 }
163             }
164         } catch (IOException e) {
165             throw new RuntimeException("Problem when reading jcamp file: "
166                     + e.getMessage(), e);
167         }
168 
169         return model;
170     }
171 
172     public String convertName(String fileName) {
173         return new StringBuilder(fileName).delete(fileName.lastIndexOf("."),
174                 fileName.length()).append(".cml.xml").toString();
175     }
176 
177     public String getMimeType() {
178         return "chemical/x-jcamp-dx";
179     }
180 
181     /**
182      * Simple implementation, steps are currently independent.
183      */
184     public void process(DataProcess process) {
185         Spectrum spectrum = null;
186         if (process.getPerformConversion() || process.getPerformValidation()) {
187             spectrum = parseAndValidate(process);
188         }
189         if (process.getPerformExtraction()) {
190             process.setExtractedMetadata(extractMetadata(process
191                     .getSourceFile(), process.getMetadataPackageUri()));
192         }
193         if (process.getPerformConversion()
194                 && !process.getValidation().hasErrors()) {
195             convert(process, spectrum);
196         }
197     }
198 
199     private void convert(DataProcess process, Spectrum spectrum) {
200         CMLSpectrum cmlSpectrum = new JcampToCMLSpectrumMapper()
201                 .mapJcampToCMLSpectrum(spectrum);
202         process.setConvertedCml(new Document(cmlSpectrum));
203     }
204 
205 }