View Javadoc

1   package spok;
2   
3   import java.io.IOException;
4   import java.io.InputStream;
5   import java.util.Collection;
6   import java.util.Iterator;
7   import java.util.Properties;
8   
9   import nu.xom.Attribute;
10  import nu.xom.Builder;
11  import nu.xom.Document;
12  import nu.xom.Element;
13  import nu.xom.Nodes;
14  import nu.xom.ParsingException;
15  import nu.xom.ValidityException;
16  
17  import org.apache.commons.io.IOUtils;
18  import org.apache.log4j.Logger;
19  import org.jcamp.parser.Utils;
20  import org.jcamp.spectrum.IRSpectrum;
21  import org.jcamp.spectrum.MassSpectrum;
22  import org.jcamp.spectrum.NMRSpectrum;
23  import org.jcamp.spectrum.Peak1D;
24  import org.jcamp.spectrum.Spectrum;
25  import org.jcamp.spectrum.Spectrum1D;
26  import org.jcamp.spectrum.notes.Note;
27  import org.jcamp.spectrum.notes.NoteDescriptor;
28  import org.xmlcml.cml.element.CMLArray;
29  import org.xmlcml.cml.element.CMLConditionList;
30  import org.xmlcml.cml.element.CMLMetadata;
31  import org.xmlcml.cml.element.CMLMetadataList;
32  import org.xmlcml.cml.element.CMLPeak;
33  import org.xmlcml.cml.element.CMLPeakList;
34  import org.xmlcml.cml.element.CMLScalar;
35  import org.xmlcml.cml.element.CMLSpectrum;
36  import org.xmlcml.cml.element.CMLSpectrumData;
37  import org.xmlcml.cml.element.CMLSubstance;
38  import org.xmlcml.cml.element.CMLSubstanceList;
39  import org.xmlcml.cml.element.CMLXaxis;
40  import org.xmlcml.cml.element.CMLYaxis;
41  
42  /**
43   * Maps a JCamp spectrum to a CMLSpectrum.
44   * 
45   * N.B. This class was lifted from bioclipse 2007-04-24. This is regrettable,
46   * but the spok functionality now has no existence independent of bioclipse,
47   * which drags in far too many dependencies.
48   * 
49   * @author Tobias Helmus
50   * @created 19. Dezember 2005
51   * 
52   */
53  public class JcampToCMLSpectrumMapper {
54  
55      private static final Logger LOG = Logger
56              .getLogger(JcampToCMLSpectrumMapper.class);
57  
58      /**
59       * Main method, responsible for calling the mapping methods and setting
60       * general settings
61       * 
62       * @param spectrum
63       *            the JCamp spectrum
64       * @return the CMLSpectrum element
65       */
66      public CMLSpectrum mapJcampToCMLSpectrum(Spectrum spectrum) {
67          CMLSpectrum cmlSpectrum = new CMLSpectrum();
68          Spectrum1D spectrum1d = (Spectrum1D) spectrum;
69  
70          if (spectrum instanceof NMRSpectrum) {
71              cmlSpectrum.setType("NMR");
72          } else if (spectrum instanceof MassSpectrum) {
73              cmlSpectrum.setType("massSpectrum");
74          } else if (spectrum instanceof IRSpectrum) {
75              cmlSpectrum.setType("infrared");
76          }
77          cmlSpectrum.setTitle(spectrum.getTitle());
78          if (spectrum1d.hasPeakTable()) {
79              cmlSpectrum.addPeakList(mapPeaks(spectrum1d));
80          }
81          if (spectrum1d.isFullSpectrum()) {
82              cmlSpectrum.addSpectrumData(mapContData(spectrum1d));
83          }
84  
85          if (spectrum.getNotes().size() != 0) {
86              mapNotes(spectrum, cmlSpectrum);
87          }
88  
89          /*
90           * else if (spectrum instanceof IRSpectrum) { SpokIRSpectrum irSpectrum =
91           * new SpokIRSpectrum(); irSpectrum.setTitle(spectrum.getTitle()); if
92           * (spectrum1d.hasPeakTable()) {
93           * nmrSpectrum.addPeakList(mapPeaks(spectrum1d)); } if
94           * (spectrum1d.isFullSpectrum()) {
95           * nmrSpectrum.addXYDataList(mapContData(spectrum1d)); }
96           * 
97           * if (spectrum.getNotes().size() != 0) {
98           * nmrSpectrum.setNotesTable(mapNotes(spectrum)); } spokSpectrum =
99           * (SpokSpectrum) irSpectrum; }
100          */
101 
102         NoteDescriptor notedescriptor = new NoteDescriptor("SPECTRUMID");
103         String id = null;
104         if (spectrum.getNotes(notedescriptor) != null
105                 && spectrum.getNotes(notedescriptor).size() > 0) {
106             Note note = (Note) spectrum.getNotes(notedescriptor).get(0);
107             id = note.getValue().toString();
108         }
109         if (id == null) {
110             id = GenerateId.GenerateId();
111         }
112         cmlSpectrum.setId(id);
113         return cmlSpectrum;
114     }
115 
116     /**
117      * Maps the peak list
118      * 
119      * @param spectrum1d
120      *            the JCamp spectrum1d
121      * @return a CMLPeakList element
122      */
123     private CMLPeakList mapPeaks(Spectrum1D spectrum1d) {
124         CMLPeakList cmlPeaks = new CMLPeakList();
125         Peak1D[] peaks = (spectrum1d).getPeakTable();
126         for (int i = 0; i < peaks.length; i++) {
127             CMLPeak peak = new CMLPeak();
128             peak.setXValue(peaks[i].getPosition()[0]);
129             peak.setYValue(peaks[i].getHeight());
130             peak.setXUnits("jcampdx:" + spectrum1d.getXAxisLabel());
131             peak.setYUnits("jcampdx:" + spectrum1d.getYAxisLabel());
132             cmlPeaks.addPeak(peak);
133         }
134         return cmlPeaks;
135     }
136 
137     /**
138      * Maps continuous data
139      * 
140      * @param spectrum1d
141      *            the JCamp spectrum1d
142      * @return a CMLSpectrumData element
143      */
144     private CMLSpectrumData mapContData(Spectrum1D spectrum1d) {
145         CMLSpectrumData xyData = new CMLSpectrumData();
146         double[] xData = spectrum1d.getXData().toArray();
147         double[] yData = spectrum1d.getYData().toArray();
148         CMLXaxis xAxis = new CMLXaxis();
149         CMLYaxis yAxis = new CMLYaxis();
150         xAxis.addArray(new CMLArray(xData));
151         yAxis.addArray(new CMLArray(yData));
152         xyData.addXaxis(xAxis);
153         xyData.addYaxis(yAxis);
154 
155         String xLabel = spectrum1d.getXData().getLabel();
156         if (xLabel == null || xLabel.length() < 1) {
157             xLabel = spectrum1d.getXData().getUnit().getName();
158         }
159 
160         String yLabel = spectrum1d.getYData().getLabel();
161         if (yLabel == null || yLabel.length() < 1) {
162             yLabel = spectrum1d.getYData().getUnit().getName();
163         }
164 
165         xAxis.setTitle("jcampdx:" + xLabel);
166         yAxis.setTitle("jcampdx:" + yLabel);
167 
168         return xyData;
169     }
170 
171     /**
172      * Map the notes/metadata
173      * 
174      * @param spectrum
175      *            the JCamp spectrum
176      * @param cmlSpectrum
177      * @return a CMLMetadataList element
178      */
179     private void mapNotes(Spectrum spectrum, CMLSpectrum cmlSpectrum) {
180         Builder builder = new Builder();
181         Document metadataMapping = null;
182 
183         InputStream mappingIn = null;
184         try {
185             mappingIn = getClass().getClassLoader().getResourceAsStream(
186                     "jcampMetadataMapping.xml");
187             metadataMapping = builder.build(mappingIn);
188         } catch (ValidityException e) {
189             e.printStackTrace();
190         } catch (ParsingException e) {
191             e.printStackTrace();
192         } catch (IOException e) {
193             e.printStackTrace();
194         } finally {
195             IOUtils.closeQuietly(mappingIn);
196         }
197         CMLMetadataList metadataList = new CMLMetadataList();
198         CMLConditionList conditionList = new CMLConditionList();
199         CMLSubstanceList substanceList = new CMLSubstanceList();
200 
201         Properties notesProps = new Properties();
202         java.io.InputStream is = null;
203         try {
204             is = getClass().getClassLoader().getResourceAsStream(
205                     "notes.properties");
206             if (is == null) {
207                 LOG
208                         .warn("No notes.properties could be loaded, no notes will be mapped");
209                 return;
210             }
211             notesProps.load(is);
212         } catch (java.io.IOException e) {
213             e.printStackTrace();
214         } finally {
215             IOUtils.closeQuietly(is);
216         }
217 
218         Collection notesCollection = spectrum.getNotes();
219         Iterator notesIterator = notesCollection.iterator();
220         while (notesIterator.hasNext()) {
221             Note note = (Note) notesIterator.next();
222             if (note.getValue() != " ") {
223                 String key = (String) note.getDescriptor().getKey();
224                 String jcamp = (String) notesProps.get(key + ".jcamp");
225                 if (jcamp != null) {
226                     key = jcamp;
227                 }
228                 boolean foundInAMappingFile = false;
229                 String oldKey = key;
230 
231                 Element rootElem = metadataMapping.getRootElement();
232 
233                 Attribute prefixAttr = rootElem.getAttribute("prefix");
234 
235                 if (prefixAttr.getValue().compareTo("jcampdx") == 0) {
236                     key = Utils.normalizeLabel(key);
237                     if (key.startsWith(".")) {
238                         key = "dot" + key.substring(1);
239                     }
240                 }
241 
242                 Nodes label = rootElem.query("//entry[@id='" + key + "']");
243                 Nodes result = rootElem.query("//entry[@id='" + key
244                         + "']/parent::*");
245                 if (result.size() == 0) {
246                     key = "dot" + key;
247                     label = rootElem.query("//entry[@id='" + key + "']");
248                     result = rootElem.query("//entry[@id='" + key
249                             + "']/parent::*");
250                 }
251                 if (result.size() > 0) {
252                     foundInAMappingFile = true;
253                     String title;
254                     if (label.size() == 1) {
255                         title = ((Element) label.get(0))
256                                 .getAttributeValue("label");
257                     } else {
258                         title = sanitize(note.getDescriptor().getName());
259                     }
260                     String listName = ((Element) result.get(0))
261                             .getAttributeValue("name");
262                     if (listName.equals("conditionList")) {
263                         CMLScalar condition = new CMLScalar();
264                         condition.setId(key);
265                         condition.setTitle(title);
266                         condition.setValue(note.getValue().toString());
267                         conditionList.appendChild(condition);
268                     } else if (listName.equals("substanceList")) {
269                         CMLSubstance substance = new CMLSubstance();
270                         substance.setTitle(title);
271                         substance.setId(key);
272                         nu.xom.Text textNode = new nu.xom.Text(note.getValue()
273                                 .toString());
274                         substance.appendChild(textNode);
275                         substanceList.appendChild(substance);
276                     } else {
277                         CMLMetadata metadata = new CMLMetadata();
278                         metadata.setName(prefixAttr.getValue() + ":" + title);
279                         metadata.setTitle(title);
280                         metadata.setId(key);
281                         metadata.setContent(note.getValue().toString());
282                         metadataList.appendChild(metadata);
283                     }
284                     continue;
285                 } else {
286                     key = oldKey;
287                 }
288 
289                 if (!foundInAMappingFile) {
290                     // key = key.substring(3);
291                     CMLMetadata metadata = new CMLMetadata();
292                     String name = "jcampdx:"
293                             + sanitize(note.getDescriptor().getName());
294                     metadata.setName(name);
295                     metadata.setTitle(sanitize(note.getDescriptor().getName()));
296                     metadata.setId(key);
297                     metadata.setContent(note.getValue().toString());
298                     metadataList.appendChild(metadata);
299                 }
300             }
301         }
302         LOG.debug("MetadataList[" + metadataList.getChildCount()
303                 + "], ConditionList[" + conditionList.getChildCount()
304                 + "], SubstanceList[" + substanceList.getChildCount() + "]");
305         if (metadataList.getChildCount() > 0) {
306             cmlSpectrum.addMetadataList(metadataList);
307         }
308         if (conditionList.getChildCount() > 0) {
309             cmlSpectrum.addConditionList(conditionList);
310         }
311         if (substanceList.getChildCount() > 0) {
312             cmlSpectrum.appendChild(substanceList);
313         }
314     }
315 
316     /**
317      * Ensures the output has the pattern "[A-Za-z][A-Za-z0-9_\.\-]*".
318      * 
319      * @param key
320      * @return
321      */
322     public String sanitize(String key) {
323         // assume key length > 1
324         if (key.length() < 2) {
325             return key;
326         }
327         StringBuffer sanatizedString = new StringBuffer();
328         int firstCharInt = 0;
329         char firstChar;
330         do {
331             firstChar = key.charAt(firstCharInt);
332             if (Character.isLetter(firstChar))
333                 sanatizedString.append(firstChar);
334             firstCharInt++;
335         } while (Character.isLetter(firstChar)
336                 && !(firstCharInt < key.length()));
337         for (int i = firstCharInt; i < key.length(); i++) {
338             char character = key.charAt(i);
339             if (Character.isDigit(character) || Character.isLetter(character)
340                     || character == '_' || character == '.' || character == '-') {
341                 sanatizedString.append(character);
342             } // else: bad char, skip
343         }
344         return sanatizedString.toString();
345     }
346 }