1 package spok;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.util.Collection;
6 import java.util.Iterator;
7 import java.util.Properties;
8
9 import nu.xom.Attribute;
10 import nu.xom.Builder;
11 import nu.xom.Document;
12 import nu.xom.Element;
13 import nu.xom.Nodes;
14 import nu.xom.ParsingException;
15 import nu.xom.ValidityException;
16
17 import org.apache.commons.io.IOUtils;
18 import org.apache.log4j.Logger;
19 import org.jcamp.parser.Utils;
20 import org.jcamp.spectrum.IRSpectrum;
21 import org.jcamp.spectrum.MassSpectrum;
22 import org.jcamp.spectrum.NMRSpectrum;
23 import org.jcamp.spectrum.Peak1D;
24 import org.jcamp.spectrum.Spectrum;
25 import org.jcamp.spectrum.Spectrum1D;
26 import org.jcamp.spectrum.notes.Note;
27 import org.jcamp.spectrum.notes.NoteDescriptor;
28 import org.xmlcml.cml.element.CMLArray;
29 import org.xmlcml.cml.element.CMLConditionList;
30 import org.xmlcml.cml.element.CMLMetadata;
31 import org.xmlcml.cml.element.CMLMetadataList;
32 import org.xmlcml.cml.element.CMLPeak;
33 import org.xmlcml.cml.element.CMLPeakList;
34 import org.xmlcml.cml.element.CMLScalar;
35 import org.xmlcml.cml.element.CMLSpectrum;
36 import org.xmlcml.cml.element.CMLSpectrumData;
37 import org.xmlcml.cml.element.CMLSubstance;
38 import org.xmlcml.cml.element.CMLSubstanceList;
39 import org.xmlcml.cml.element.CMLXaxis;
40 import org.xmlcml.cml.element.CMLYaxis;
41
42
43
44
45
46
47
48
49
50
51
52
53 public class JcampToCMLSpectrumMapper {
54
55 private static final Logger LOG = Logger
56 .getLogger(JcampToCMLSpectrumMapper.class);
57
58
59
60
61
62
63
64
65
66 public CMLSpectrum mapJcampToCMLSpectrum(Spectrum spectrum) {
67 CMLSpectrum cmlSpectrum = new CMLSpectrum();
68 Spectrum1D spectrum1d = (Spectrum1D) spectrum;
69
70 if (spectrum instanceof NMRSpectrum) {
71 cmlSpectrum.setType("NMR");
72 } else if (spectrum instanceof MassSpectrum) {
73 cmlSpectrum.setType("massSpectrum");
74 } else if (spectrum instanceof IRSpectrum) {
75 cmlSpectrum.setType("infrared");
76 }
77 cmlSpectrum.setTitle(spectrum.getTitle());
78 if (spectrum1d.hasPeakTable()) {
79 cmlSpectrum.addPeakList(mapPeaks(spectrum1d));
80 }
81 if (spectrum1d.isFullSpectrum()) {
82 cmlSpectrum.addSpectrumData(mapContData(spectrum1d));
83 }
84
85 if (spectrum.getNotes().size() != 0) {
86 mapNotes(spectrum, cmlSpectrum);
87 }
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102 NoteDescriptor notedescriptor = new NoteDescriptor("SPECTRUMID");
103 String id = null;
104 if (spectrum.getNotes(notedescriptor) != null
105 && spectrum.getNotes(notedescriptor).size() > 0) {
106 Note note = (Note) spectrum.getNotes(notedescriptor).get(0);
107 id = note.getValue().toString();
108 }
109 if (id == null) {
110 id = GenerateId.GenerateId();
111 }
112 cmlSpectrum.setId(id);
113 return cmlSpectrum;
114 }
115
116
117
118
119
120
121
122
123 private CMLPeakList mapPeaks(Spectrum1D spectrum1d) {
124 CMLPeakList cmlPeaks = new CMLPeakList();
125 Peak1D[] peaks = (spectrum1d).getPeakTable();
126 for (int i = 0; i < peaks.length; i++) {
127 CMLPeak peak = new CMLPeak();
128 peak.setXValue(peaks[i].getPosition()[0]);
129 peak.setYValue(peaks[i].getHeight());
130 peak.setXUnits("jcampdx:" + spectrum1d.getXAxisLabel());
131 peak.setYUnits("jcampdx:" + spectrum1d.getYAxisLabel());
132 cmlPeaks.addPeak(peak);
133 }
134 return cmlPeaks;
135 }
136
137
138
139
140
141
142
143
144 private CMLSpectrumData mapContData(Spectrum1D spectrum1d) {
145 CMLSpectrumData xyData = new CMLSpectrumData();
146 double[] xData = spectrum1d.getXData().toArray();
147 double[] yData = spectrum1d.getYData().toArray();
148 CMLXaxis xAxis = new CMLXaxis();
149 CMLYaxis yAxis = new CMLYaxis();
150 xAxis.addArray(new CMLArray(xData));
151 yAxis.addArray(new CMLArray(yData));
152 xyData.addXaxis(xAxis);
153 xyData.addYaxis(yAxis);
154
155 String xLabel = spectrum1d.getXData().getLabel();
156 if (xLabel == null || xLabel.length() < 1) {
157 xLabel = spectrum1d.getXData().getUnit().getName();
158 }
159
160 String yLabel = spectrum1d.getYData().getLabel();
161 if (yLabel == null || yLabel.length() < 1) {
162 yLabel = spectrum1d.getYData().getUnit().getName();
163 }
164
165 xAxis.setTitle("jcampdx:" + xLabel);
166 yAxis.setTitle("jcampdx:" + yLabel);
167
168 return xyData;
169 }
170
171
172
173
174
175
176
177
178
179 private void mapNotes(Spectrum spectrum, CMLSpectrum cmlSpectrum) {
180 Builder builder = new Builder();
181 Document metadataMapping = null;
182
183 InputStream mappingIn = null;
184 try {
185 mappingIn = getClass().getClassLoader().getResourceAsStream(
186 "jcampMetadataMapping.xml");
187 metadataMapping = builder.build(mappingIn);
188 } catch (ValidityException e) {
189 e.printStackTrace();
190 } catch (ParsingException e) {
191 e.printStackTrace();
192 } catch (IOException e) {
193 e.printStackTrace();
194 } finally {
195 IOUtils.closeQuietly(mappingIn);
196 }
197 CMLMetadataList metadataList = new CMLMetadataList();
198 CMLConditionList conditionList = new CMLConditionList();
199 CMLSubstanceList substanceList = new CMLSubstanceList();
200
201 Properties notesProps = new Properties();
202 java.io.InputStream is = null;
203 try {
204 is = getClass().getClassLoader().getResourceAsStream(
205 "notes.properties");
206 if (is == null) {
207 LOG
208 .warn("No notes.properties could be loaded, no notes will be mapped");
209 return;
210 }
211 notesProps.load(is);
212 } catch (java.io.IOException e) {
213 e.printStackTrace();
214 } finally {
215 IOUtils.closeQuietly(is);
216 }
217
218 Collection notesCollection = spectrum.getNotes();
219 Iterator notesIterator = notesCollection.iterator();
220 while (notesIterator.hasNext()) {
221 Note note = (Note) notesIterator.next();
222 if (note.getValue() != " ") {
223 String key = (String) note.getDescriptor().getKey();
224 String jcamp = (String) notesProps.get(key + ".jcamp");
225 if (jcamp != null) {
226 key = jcamp;
227 }
228 boolean foundInAMappingFile = false;
229 String oldKey = key;
230
231 Element rootElem = metadataMapping.getRootElement();
232
233 Attribute prefixAttr = rootElem.getAttribute("prefix");
234
235 if (prefixAttr.getValue().compareTo("jcampdx") == 0) {
236 key = Utils.normalizeLabel(key);
237 if (key.startsWith(".")) {
238 key = "dot" + key.substring(1);
239 }
240 }
241
242 Nodes label = rootElem.query("//entry[@id='" + key + "']");
243 Nodes result = rootElem.query("//entry[@id='" + key
244 + "']/parent::*");
245 if (result.size() == 0) {
246 key = "dot" + key;
247 label = rootElem.query("//entry[@id='" + key + "']");
248 result = rootElem.query("//entry[@id='" + key
249 + "']/parent::*");
250 }
251 if (result.size() > 0) {
252 foundInAMappingFile = true;
253 String title;
254 if (label.size() == 1) {
255 title = ((Element) label.get(0))
256 .getAttributeValue("label");
257 } else {
258 title = sanitize(note.getDescriptor().getName());
259 }
260 String listName = ((Element) result.get(0))
261 .getAttributeValue("name");
262 if (listName.equals("conditionList")) {
263 CMLScalar condition = new CMLScalar();
264 condition.setId(key);
265 condition.setTitle(title);
266 condition.setValue(note.getValue().toString());
267 conditionList.appendChild(condition);
268 } else if (listName.equals("substanceList")) {
269 CMLSubstance substance = new CMLSubstance();
270 substance.setTitle(title);
271 substance.setId(key);
272 nu.xom.Text textNode = new nu.xom.Text(note.getValue()
273 .toString());
274 substance.appendChild(textNode);
275 substanceList.appendChild(substance);
276 } else {
277 CMLMetadata metadata = new CMLMetadata();
278 metadata.setName(prefixAttr.getValue() + ":" + title);
279 metadata.setTitle(title);
280 metadata.setId(key);
281 metadata.setContent(note.getValue().toString());
282 metadataList.appendChild(metadata);
283 }
284 continue;
285 } else {
286 key = oldKey;
287 }
288
289 if (!foundInAMappingFile) {
290
291 CMLMetadata metadata = new CMLMetadata();
292 String name = "jcampdx:"
293 + sanitize(note.getDescriptor().getName());
294 metadata.setName(name);
295 metadata.setTitle(sanitize(note.getDescriptor().getName()));
296 metadata.setId(key);
297 metadata.setContent(note.getValue().toString());
298 metadataList.appendChild(metadata);
299 }
300 }
301 }
302 LOG.debug("MetadataList[" + metadataList.getChildCount()
303 + "], ConditionList[" + conditionList.getChildCount()
304 + "], SubstanceList[" + substanceList.getChildCount() + "]");
305 if (metadataList.getChildCount() > 0) {
306 cmlSpectrum.addMetadataList(metadataList);
307 }
308 if (conditionList.getChildCount() > 0) {
309 cmlSpectrum.addConditionList(conditionList);
310 }
311 if (substanceList.getChildCount() > 0) {
312 cmlSpectrum.appendChild(substanceList);
313 }
314 }
315
316
317
318
319
320
321
322 public String sanitize(String key) {
323
324 if (key.length() < 2) {
325 return key;
326 }
327 StringBuffer sanatizedString = new StringBuffer();
328 int firstCharInt = 0;
329 char firstChar;
330 do {
331 firstChar = key.charAt(firstCharInt);
332 if (Character.isLetter(firstChar))
333 sanatizedString.append(firstChar);
334 firstCharInt++;
335 } while (Character.isLetter(firstChar)
336 && !(firstCharInt < key.length()));
337 for (int i = firstCharInt; i < key.length(); i++) {
338 char character = key.charAt(i);
339 if (Character.isDigit(character) || Character.isLetter(character)
340 || character == '_' || character == '.' || character == '-') {
341 sanatizedString.append(character);
342 }
343 }
344 return sanatizedString.toString();
345 }
346 }