1 package uk.ac.cam.spectra.nmr;
2
3 import java.io.BufferedReader;
4 import java.io.File;
5 import java.io.FileReader;
6 import java.io.IOException;
7 import java.text.ParseException;
8 import java.text.SimpleDateFormat;
9 import java.util.Date;
10 import java.util.HashMap;
11 import java.util.List;
12 import java.util.Map;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15
16 import nu.xom.Document;
17
18 import org.apache.commons.io.FileUtils;
19 import org.apache.log4j.Logger;
20 import org.jcamp.parser.IErrorHandler;
21 import org.jcamp.parser.JCAMPException;
22 import org.jcamp.parser.JCAMPReader;
23 import org.jcamp.spectrum.Spectrum;
24 import org.xmlcml.cml.element.CMLSpectrum;
25
26 import spok.JcampToCMLSpectrumMapper;
27 import uk.ac.cam.spectra.Constants;
28 import uk.ac.cam.spectra.DataFileType;
29 import uk.ac.cam.spectra.DataProcess;
30 import uk.ac.cam.spectra.LiteralProperty;
31 import uk.ac.cam.spectra.MetadataProperty;
32 import uk.ac.cam.spectra.SpectraMetadata;
33 import uk.ac.cam.spectra.Validation;
34
35 import com.hp.hpl.jena.rdf.model.Model;
36 import com.hp.hpl.jena.rdf.model.ModelFactory;
37 import com.hp.hpl.jena.rdf.model.Resource;
38
39
40
41
42
43
44
45
46 public class JCampDX implements DataFileType {
47
48
49
50
51 private static final long serialVersionUID = 8018075397479897101L;
52
53 private static final String DATE = "DATE";
54
55 private static final Logger LOG = Logger.getLogger(JCampDX.class);
56
57 private Map<String, MetadataProperty> factoryBindings = new HashMap<String, MetadataProperty>(
58 20);
59
60 private static final Pattern LABEL_RE = Pattern.compile("^##(.*)=(.*)$");
61
62 private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat(
63 "yy/MM/dd");
64
65 public void init() {
66 factoryBindings.put("TITLE", new LiteralProperty(
67 Constants.DC_NS, "description"));
68 factoryBindings.put("ORIGIN", SpectraMetadata.institution);
69 factoryBindings.put("MOLFORM", SpectraMetadata.chemical_formula);
70 factoryBindings.put("OWNER", new LiteralProperty(
71 Constants.DC_NS, "rights"));
72 }
73
74
75
76
77 public Spectrum parseAndValidate(DataProcess process) {
78 Validation validation = new Validation();
79 IErrorHandler handler = new JCampDXErrorHandler(validation);
80 JCAMPReader reader = JCAMPReader.getInstance();
81 reader.setErrorHandler(handler);
82 try {
83 File input = checkInput(process);
84 Spectrum spectrum = reader.createSpectrum(asString(input));
85 return spectrum;
86 } catch (JCAMPException e) {
87 try {
88 handler.fatal(e.getMessage());
89 } catch (JCAMPException e1) {
90 throw new RuntimeException(e1);
91 }
92 } finally {
93 process.setValidation(validation);
94 }
95 return null;
96 }
97
98 private File checkInput(DataProcess process) {
99 File input = process.getSourceFile();
100 if (input == null) {
101 throw new IllegalStateException("No source file set on " + process);
102 }
103 return input;
104 }
105
106
107
108
109
110
111
112
113 private String asString(File file) {
114 StringBuilder buffer = new StringBuilder();
115 try {
116 FileReader fileReader = new FileReader(file);
117 BufferedReader input = new BufferedReader(fileReader);
118 int character;
119 while ((character = input.read()) != -1) {
120 buffer.append((char) character);
121 }
122 input.close();
123 } catch (IOException e) {
124 LOG.error("Could not read jcamp file: " + e.getMessage(), e);
125 throw new RuntimeException("Could not read jcamp file: "
126 + e.getMessage(), e);
127 }
128 return buffer.toString();
129
130 }
131
132
133
134
135 @SuppressWarnings("unchecked")
136 public Model extractMetadata(File jcampFile, String packageUri) {
137 Model model = ModelFactory.createDefaultModel();
138 Resource subject = model.createResource(packageUri);
139 try {
140 for (String s : (List<String>) FileUtils.readLines(jcampFile, null)) {
141 Matcher m = LABEL_RE.matcher(s);
142 if (!m.matches()) {
143 continue;
144 } else {
145 String key = m.group(1).trim();
146 String val = m.group(2).trim();
147
148 MetadataProperty tf = factoryBindings.get(key);
149 if (tf != null) {
150 tf.add(subject, val);
151 } else if (DATE.equals(key)) {
152 Date d = null;
153 try {
154 d = DATE_FORMAT.parse(val);
155 } catch (ParseException e) {
156 LOG.warn(e);
157 }
158 if (d != null) {
159 SpectraMetadata.experiment_date.add(subject, d);
160 }
161 }
162 }
163 }
164 } catch (IOException e) {
165 throw new RuntimeException("Problem when reading jcamp file: "
166 + e.getMessage(), e);
167 }
168
169 return model;
170 }
171
172 public String convertName(String fileName) {
173 return new StringBuilder(fileName).delete(fileName.lastIndexOf("."),
174 fileName.length()).append(".cml.xml").toString();
175 }
176
177 public String getMimeType() {
178 return "chemical/x-jcamp-dx";
179 }
180
181
182
183
184 public void process(DataProcess process) {
185 Spectrum spectrum = null;
186 if (process.getPerformConversion() || process.getPerformValidation()) {
187 spectrum = parseAndValidate(process);
188 }
189 if (process.getPerformExtraction()) {
190 process.setExtractedMetadata(extractMetadata(process
191 .getSourceFile(), process.getMetadataPackageUri()));
192 }
193 if (process.getPerformConversion()
194 && !process.getValidation().hasErrors()) {
195 convert(process, spectrum);
196 }
197 }
198
199 private void convert(DataProcess process, Spectrum spectrum) {
200 CMLSpectrum cmlSpectrum = new JcampToCMLSpectrumMapper()
201 .mapJcampToCMLSpectrum(spectrum);
202 process.setConvertedCml(new Document(cmlSpectrum));
203 }
204
205 }