1 package uk.ac.cam.spectra.spectrasub;
2
3 import java.io.File;
4 import java.io.FileOutputStream;
5 import java.io.IOException;
6 import java.util.Arrays;
7 import java.util.HashMap;
8 import java.util.List;
9 import java.util.Map;
10
11 import nu.xom.Attribute;
12 import nu.xom.Builder;
13 import nu.xom.Document;
14 import nu.xom.Element;
15 import nu.xom.Nodes;
16 import nu.xom.ParsingException;
17 import nu.xom.ValidityException;
18
19 import org.apache.commons.io.IOUtils;
20 import org.apache.log4j.Logger;
21
22 import uk.ac.cam.spectra.Constants;
23 import uk.ac.cam.spectra.EmbargoMetadataEncoder;
24 import uk.ac.cam.spectra.MetadataProperty;
25 import uk.ac.cam.spectra.SpectraMetadata;
26 import uk.ac.cam.spectra.Util;
27
28 import com.hp.hpl.jena.rdf.model.Model;
29 import com.hp.hpl.jena.rdf.model.Property;
30 import com.hp.hpl.jena.rdf.model.RDFNode;
31 import com.hp.hpl.jena.rdf.model.Resource;
32 import com.hp.hpl.jena.rdf.model.SimpleSelector;
33 import com.hp.hpl.jena.rdf.model.Statement;
34 import com.hp.hpl.jena.rdf.model.StmtIterator;
35
36
37
38
39
40
41
42
43 public class DSpaceMETSPackager implements Packager {
44 private static abstract class Crosswalk {
45
46 private static final String TYPE_ATTR = "xsi:type";
47
48 public abstract List<Element> crosswalk(Statement stmt);
49
50
51
52
53
54
55
56 public List<Element> list(Element... e) {
57 return Arrays.asList(e);
58 }
59
60 public Attribute xsiType(String value) {
61 return new Attribute(TYPE_ATTR, Constants.XSI_NS,
62 value);
63 }
64 }
65 private static final String METS_MANIFEST_NAME = "mets.xml";
66
67 private static final String METS_MIME_TYPE = "text/xml";
68
69 private static final Logger LOG = Logger
70 .getLogger(DSpaceMETSPackager.class);
71
72 private static final String METS_DIV_EL = "div";
73
74 private static final String EBANK_DC_EL = "ebank_dc";
75
76 private static final String METS_XMLDATA_EL = "xmlData";
77
78 private static final String METS_LABEL_ATTR = "LABEL";
79
80 private static final String METS_MDWRAP_EL = "mdWrap";
81
82 private static final String XLINK_TYPE_ATTR = "type";
83
84 private static final String METS_ID_ATTR = "ID";
85
86 private static final Map<String, Crosswalk> EBANK_CROSSWALKS = new HashMap<String, Crosswalk>();
87
88 private static final Map<String, Crosswalk> BASE_CROSSWALKS = new HashMap<String, Crosswalk>();
89
90 private static void addCrosswalk(Map<String, Crosswalk> map,
91 MetadataProperty property, final String elName, final String ns,
92 final String type) {
93 map.put(property.toURIString(), new Crosswalk() {
94 public List<Element> crosswalk(Statement stmt) {
95 Element e = new Element(elName, ns);
96 if (type != null) {
97 e.addAttribute(xsiType(type));
98 }
99 e.appendChild(stmt.getLiteral().getLexicalForm());
100 return list(e);
101 }
102 });
103 }
104
105 static {
106 addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.experiment_ref,
107 "subject", Constants.DC_NS, "ebankterms:ExperimentRef");
108 addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.experiment_ref,
109 "subject", Constants.DC_NS, "ebankterms:ExperimentRef");
110 EBANK_CROSSWALKS.put(SpectraMetadata.chemical_formula.toURIString(),
111 new Crosswalk() {
112 public List<Element> crosswalk(Statement stmt) {
113 Element e = new Element("subject", Constants.DC_NS);
114 e.addAttribute(xsiType("ebankterms:ChemicalFormula"));
115 e.appendChild(stmt.getLiteral().getLexicalForm());
116 Element title = new Element("title", Constants.DC_NS);
117 title.appendChild(stmt.getLiteral().getLexicalForm());
118 return list(e, title);
119 }
120 });
121 addCrosswalk(EBANK_CROSSWALKS,
122 SpectraMetadata.chemical_systematic_name, "subject",
123 Constants.DC_NS, "spectraterms:SystematicName");
124 addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.inchi, "identifier",
125 Constants.DC_NS, "ebankterms:inchi");
126 addCrosswalk(BASE_CROSSWALKS, SpectraMetadata.experiment_date,
127 "experimentDate", Constants.SPECTRA_NS, "xsd:date");
128 addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.chemical_compound_class,
129 "subject", Constants.DC_NS, "ebankterms:CompoundClass");
130 addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.institution,
131 "publisher", Constants.DC_NS, null);
132 addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.chemist, "creator",
133 Constants.DC_NS, null);
134 }
135
136 private EmbargoMetadataEncoder embargoMetadataEncoder;
137
138 private DataPackageDAO packageDao;
139
140 public DataPackageDAO getPackageDao() {
141 return packageDao;
142 }
143
144 public void setPackageDao(DataPackageDAO packageDao) {
145 this.packageDao = packageDao;
146 }
147
148 public void archive(File packagingDir, File manifestFile) {
149 }
150
151 public String getManifestName(DataPackage pkg) {
152 return METS_MANIFEST_NAME;
153 }
154
155
156
157
158 public void transcodeAndSerialize(DataPackage pkg, File destinationFile)
159 throws IOException {
160 Element mets = generateMETS(pkg);
161
162 FileOutputStream fout = null;
163 try {
164 fout = new FileOutputStream(destinationFile);
165 Util.print(new Document(mets), fout, false);
166 } finally {
167 IOUtils.closeQuietly(fout);
168 }
169 }
170
171 public Element generateMETS(DataPackage pkg) {
172 checkState();
173 int id = 1;
174 Element mets = new Element("mets");
175 mets.setNamespaceURI(Constants.METS_NS);
176 mets.addNamespaceDeclaration("xsi", Constants.XSI_NS);
177 mets.addNamespaceDeclaration("xlink", Constants.XLINK_NS);
178 mets.addNamespaceDeclaration("premis", Constants.PREMIS_NS);
179 mets.addNamespaceDeclaration("ebank", Constants.EBANK_NS);
180 mets.addNamespaceDeclaration("ebankterms", Constants.EBANKTERMS_NS);
181 Attribute a = new Attribute(
182 "xsi:schemaLocation",
183 Constants.XSI_NS,
184 "http://www.loc.gov/METS/ "
185 + "http://www.loc.gov/standards/mets/mets.xsd "
186 + "http://www.loc.gov/standards/premis "
187 + "http://www.loc.gov/standards/premis/PREMIS-v1-0.xsd "
188 + "http://www.rdn.ac.uk/oai/ebank/20050808/ebankterms.xsd");
189 mets.addAttribute(a);
190 Attribute prof = new Attribute("PROFILE", "DSpace METS SIP Profile 1.0");
191 mets.addAttribute(prof);
192
193 Element fileSec = new Element("fileSec", Constants.METS_NS);
194 mets.appendChild(fileSec);
195
196 Element fileGrp = new Element("fileGrp", Constants.METS_NS);
197 fileSec.appendChild(fileGrp);
198 fileGrp.addAttribute(new Attribute("USE", "CONTENT"));
199
200 Element structMap = new Element("structMap", Constants.METS_NS);
201 mets.appendChild(structMap);
202 structMap.addAttribute(new Attribute(METS_ID_ATTR, "MainStructMap"));
203 Attribute type = new Attribute("TYPE", "LOGICAL");
204 structMap.addAttribute(type);
205 Attribute label = new Attribute(METS_LABEL_ATTR, "DSpace");
206 structMap.addAttribute(label);
207
208 Element structDiv = new Element(METS_DIV_EL, Constants.METS_NS);
209 structMap.appendChild(structDiv);
210 structDiv.addAttribute(new Attribute(METS_ID_ATTR,
211 "main-structural-div"));
212 Attribute dmdId = new Attribute("DMDID", "Main");
213 structDiv.addAttribute(dmdId);
214
215
216 for (DataFile df : pkg.getAddedDataFiles()) {
217 id = addDataFile(id, fileGrp, structDiv, df);
218 }
219
220 id = addDataFile(id, fileGrp, structDiv, pkg.getCmlFile());
221
222
223 Element mainDmd = new Element("dmdSec", Constants.METS_NS);
224 mets.appendChild(mainDmd);
225 mainDmd.addAttribute(new Attribute(METS_ID_ATTR, "Main"));
226
227 Element mdWrap = new Element(METS_MDWRAP_EL, Constants.METS_NS);
228 mainDmd.appendChild(mdWrap);
229 mdWrap.addAttribute(new Attribute(METS_LABEL_ATTR, "eBank Metadata"));
230 mdWrap.addAttribute(new Attribute("MIMETYPE", METS_MIME_TYPE));
231 mdWrap.addAttribute(new Attribute("MDTYPE", "DC"));
232
233 Element xmlData = new Element(METS_XMLDATA_EL, Constants.METS_NS);
234 mdWrap.appendChild(xmlData);
235
236 Element ebankDC = new Element(EBANK_DC_EL, Constants.EBANK_NS);
237 xmlData.appendChild(ebankDC);
238 Element xlinkType = new Element(XLINK_TYPE_ATTR, Constants.DC_NS);
239 ebankDC.appendChild(xlinkType);
240 xlinkType.appendChild("NMR Spectrum data holding");
241
242 Resource pRes = pkg.getMetadata().createResource(
243 pkg.getNormalizedURIString());
244 Model packageMetadata = pkg.getMetadata().query(
245 new SimpleSelector(pRes, (Property) null, (RDFNode) null));
246 LOG.debug("Crosswalking " + packageMetadata.size()
247 + " statements for package with new URI: "
248 + pkg.getNormalizedURIString());
249 for (StmtIterator si = packageMetadata.listStatements(); si.hasNext();) {
250 Statement s = si.nextStatement();
251 LOG.debug("Predicate: " + s.getPredicate());
252 Crosswalk c = EBANK_CROSSWALKS.get(s.getPredicate().getURI());
253 if (c != null) {
254 LOG.debug("Using :" + c);
255 for (Element e : c.crosswalk(s)) {
256 ebankDC.appendChild(e);
257 }
258 }
259 Crosswalk c2 = BASE_CROSSWALKS.get(s.getPredicate().getURI());
260 if (c2 != null) {
261 LOG.debug("Using: " + c2);
262 for (Element e : c2.crosswalk(s)) {
263 xmlData.appendChild(e);
264 }
265 }
266 }
267 Element embargo = embargoMetadataEncoder.asXML(pkg.getMetadata(), pkg
268 .getNormalizedURIString());
269 if (embargo != null) {
270 xmlData.appendChild(embargo);
271 }
272
273 return mets;
274 }
275
276 private void checkState() {
277 if (embargoMetadataEncoder == null) {
278 throw new IllegalStateException(
279 "METS packager requires an embargo encoder to be provided before execution");
280 }
281 }
282
283 private int addDataFile(int id, Element fileGrp, Element structDiv,
284 DataFile df) {
285
286 Element fileEl = new Element("file", Constants.METS_NS);
287 fileGrp.appendChild(fileEl);
288 int fileid = id++;
289 fileEl.addAttribute(new Attribute(METS_ID_ATTR, "id"
290 + String.valueOf(fileid)));
291 int fileMdId = id++;
292 fileEl.addAttribute(new Attribute("DMDID", "id"
293 + String.valueOf(fileMdId)));
294 fileEl.addAttribute(new Attribute("CHECKSUMTYPE", "MD5"));
295 fileEl.addAttribute(new Attribute("CHECKSUM", df.getMD5Sum()));
296 fileEl.addAttribute(new Attribute("MIMETYPE", df.getDataFileType()
297 .getMimeType()));
298 Element flocat = new Element("FLocat", Constants.METS_NS);
299 fileEl.appendChild(flocat);
300 flocat.addAttribute(new Attribute("LOCTYPE", "URL"));
301 Attribute xlinkType = new Attribute(XLINK_TYPE_ATTR, "simple");
302 xlinkType.setNamespace("xlink", Constants.XLINK_NS);
303 flocat.addAttribute(xlinkType);
304 Attribute xlinkHref = new Attribute("href", df.getFileName());
305 xlinkHref.setNamespace("xlink", Constants.XLINK_NS);
306 flocat.addAttribute(xlinkHref);
307
308
309 Element filediv = new Element(METS_DIV_EL, Constants.METS_NS);
310 structDiv.appendChild(filediv);
311
312 int structId = id++;
313 filediv.addAttribute(new Attribute(METS_ID_ATTR, "id"
314 + String.valueOf(structId)));
315 Element fptr = new Element("fptr", Constants.METS_NS);
316 filediv.appendChild(fptr);
317
318 Attribute fileId = new Attribute("FILEID", "id"
319 + String.valueOf(fileid));
320 fptr.addAttribute(fileId);
321
322
323 return id;
324 }
325
326 public void archive(DataPackage pkg, File manifestFile) {
327
328
329 }
330
331 public EmbargoMetadataEncoder getEmbargoMetadataEncoder() {
332 return embargoMetadataEncoder;
333 }
334
335 public void setEmbargoMetadataEncoder(
336 EmbargoMetadataEncoder embargoMetadataEncoder) {
337 this.embargoMetadataEncoder = embargoMetadataEncoder;
338 }
339
340 public FileAndType getFileAndType(String packageId, String filename)
341 throws NotFoundException {
342 DataPackage p = packageDao.load(packageId);
343 String manifestName = getManifestName(p);
344 FileAndType fat = new FileAndType();
345 if (manifestName.equals(filename)) {
346 fat.setMimeType(getManifestMimeType());
347 } else {
348 Document doc = null;
349 try {
350 doc = new Builder().build(packageDao.getPackagedFile(packageId,
351 getManifestName(p)));
352 } catch (ValidityException e) {
353 LOG.error("Problem building XOM from METS manifest: "
354 + e.getMessage(), e);
355 throw new RuntimeException(
356 "Problem building XOM from METS manifest: "
357 + e.getMessage(), e);
358 } catch (ParsingException e) {
359 LOG.error("Problem building XOM from METS manifest: "
360 + e.getMessage(), e);
361 throw new RuntimeException(
362 "Problem building XOM from METS manifest: "
363 + e.getMessage(), e);
364 } catch (IOException e) {
365 LOG.error("Problem building XOM from METS manifest: "
366 + e.getMessage(), e);
367 throw new RuntimeException(
368 "Problem building XOM from METS manifest: "
369 + e.getMessage(), e);
370 }
371 Nodes nds = doc.query("/mets:mets/mets:fileSec/mets:fileGrp/"
372 + "mets:file[mets:FLocat/@xlink:href='" + filename
373 + "']/@MIMETYPE", Constants.XPATH_CTX);
374 if (nds.size() == 0) {
375 fat.setMimeType("text/plain");
376 } else {
377 Attribute a = (Attribute) nds.get(0);
378 fat.setMimeType(a.getValue());
379 }
380 }
381 fat.setFile(packageDao.getPackagedFile(packageId, filename));
382 return fat;
383 }
384
385 public String getManifestMimeType() {
386 return METS_MIME_TYPE;
387 }
388
389 }