View Javadoc

1   package uk.ac.cam.spectra.spectrasub;
2   
3   import java.io.File;
4   import java.io.FileOutputStream;
5   import java.io.IOException;
6   import java.util.Arrays;
7   import java.util.HashMap;
8   import java.util.List;
9   import java.util.Map;
10  
11  import nu.xom.Attribute;
12  import nu.xom.Builder;
13  import nu.xom.Document;
14  import nu.xom.Element;
15  import nu.xom.Nodes;
16  import nu.xom.ParsingException;
17  import nu.xom.ValidityException;
18  
19  import org.apache.commons.io.IOUtils;
20  import org.apache.log4j.Logger;
21  
22  import uk.ac.cam.spectra.Constants;
23  import uk.ac.cam.spectra.EmbargoMetadataEncoder;
24  import uk.ac.cam.spectra.MetadataProperty;
25  import uk.ac.cam.spectra.SpectraMetadata;
26  import uk.ac.cam.spectra.Util;
27  
28  import com.hp.hpl.jena.rdf.model.Model;
29  import com.hp.hpl.jena.rdf.model.Property;
30  import com.hp.hpl.jena.rdf.model.RDFNode;
31  import com.hp.hpl.jena.rdf.model.Resource;
32  import com.hp.hpl.jena.rdf.model.SimpleSelector;
33  import com.hp.hpl.jena.rdf.model.Statement;
34  import com.hp.hpl.jena.rdf.model.StmtIterator;
35  
36  /**
37   * A packager that builds a DSpace SIP profile METS package for the contents and
38   * uses zip to archive the files.
39   * 
40   * @author jimdowning
41   * 
42   */
43  public class DSpaceMETSPackager implements Packager {
44  	private static abstract class Crosswalk {
45  
46  		private static final String TYPE_ATTR = "xsi:type";
47  
48  		public abstract List<Element> crosswalk(Statement stmt);
49  
50  		/**
51  		 * Convenience method for building a single element list.
52  		 * 
53  		 * @param e
54  		 * @return
55  		 */
56  		public List<Element> list(Element... e) {
57  			return Arrays.asList(e);
58  		}
59  
60  		public Attribute xsiType(String value) {
61  			return new Attribute(TYPE_ATTR, Constants.XSI_NS,
62  					value);
63  		}
64  	}
65  	private static final String METS_MANIFEST_NAME = "mets.xml";
66  
67  	private static final String METS_MIME_TYPE = "text/xml";
68  
69  	private static final Logger LOG = Logger
70  			.getLogger(DSpaceMETSPackager.class);
71  
72  	private static final String METS_DIV_EL = "div";
73  
74  	private static final String EBANK_DC_EL = "ebank_dc";
75  
76  	private static final String METS_XMLDATA_EL = "xmlData";
77  
78  	private static final String METS_LABEL_ATTR = "LABEL";
79  
80  	private static final String METS_MDWRAP_EL = "mdWrap";
81  
82  	private static final String XLINK_TYPE_ATTR = "type";
83  
84  	private static final String METS_ID_ATTR = "ID";
85  
86  	private static final Map<String, Crosswalk> EBANK_CROSSWALKS = new HashMap<String, Crosswalk>();
87  
88  	private static final Map<String, Crosswalk> BASE_CROSSWALKS = new HashMap<String, Crosswalk>();
89  
90  	private static void addCrosswalk(Map<String, Crosswalk> map,
91  			MetadataProperty property, final String elName, final String ns,
92  			final String type) {
93  		map.put(property.toURIString(), new Crosswalk() {
94  			public List<Element> crosswalk(Statement stmt) {
95  				Element e = new Element(elName, ns);
96  				if (type != null) {
97  					e.addAttribute(xsiType(type));
98  				}
99  				e.appendChild(stmt.getLiteral().getLexicalForm());
100 				return list(e);
101 			}
102 		});
103 	}
104 
105 	static {
106 		addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.experiment_ref,
107 				"subject", Constants.DC_NS, "ebankterms:ExperimentRef");
108 		addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.experiment_ref,
109 				"subject", Constants.DC_NS, "ebankterms:ExperimentRef");
110 		EBANK_CROSSWALKS.put(SpectraMetadata.chemical_formula.toURIString(),
111 				new Crosswalk() {
112 					public List<Element> crosswalk(Statement stmt) {
113 						Element e = new Element("subject", Constants.DC_NS);
114 						e.addAttribute(xsiType("ebankterms:ChemicalFormula"));
115 						e.appendChild(stmt.getLiteral().getLexicalForm());
116 						Element title = new Element("title", Constants.DC_NS);
117 						title.appendChild(stmt.getLiteral().getLexicalForm());
118 						return list(e, title);
119 					}
120 				});
121 		addCrosswalk(EBANK_CROSSWALKS,
122 				SpectraMetadata.chemical_systematic_name, "subject",
123 				Constants.DC_NS, "spectraterms:SystematicName");
124 		addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.inchi, "identifier",
125 				Constants.DC_NS, "ebankterms:inchi");
126 		addCrosswalk(BASE_CROSSWALKS, SpectraMetadata.experiment_date,
127 				"experimentDate", Constants.SPECTRA_NS, "xsd:date");
128 		addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.chemical_compound_class,
129 				"subject", Constants.DC_NS, "ebankterms:CompoundClass");
130 		addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.institution,
131 				"publisher", Constants.DC_NS, null);
132 		addCrosswalk(EBANK_CROSSWALKS, SpectraMetadata.chemist, "creator",
133 				Constants.DC_NS, null);
134 	}
135 
136 	private EmbargoMetadataEncoder embargoMetadataEncoder;
137 
138 	private DataPackageDAO packageDao;
139 
140 	public DataPackageDAO getPackageDao() {
141 		return packageDao;
142 	}
143 
144 	public void setPackageDao(DataPackageDAO packageDao) {
145 		this.packageDao = packageDao;
146 	}
147 
148 	public void archive(File packagingDir, File manifestFile) {
149 	}
150 
151 	public String getManifestName(DataPackage pkg) {
152 		return METS_MANIFEST_NAME;
153 	}
154 
155 	/**
156 	 * Convert the package's RDF metadata into METS.
157 	 */
158 	public void transcodeAndSerialize(DataPackage pkg, File destinationFile)
159 			throws IOException {
160 		Element mets = generateMETS(pkg);
161 
162 		FileOutputStream fout = null;
163 		try {
164 			fout = new FileOutputStream(destinationFile);
165 			Util.print(new Document(mets), fout, false);
166 		} finally {
167 			IOUtils.closeQuietly(fout);
168 		}
169 	}
170 
171 	public Element generateMETS(DataPackage pkg) {
172 		checkState();
173 		int id = 1;
174 		Element mets = new Element("mets");
175 		mets.setNamespaceURI(Constants.METS_NS);
176 		mets.addNamespaceDeclaration("xsi", Constants.XSI_NS);
177 		mets.addNamespaceDeclaration("xlink", Constants.XLINK_NS);
178 		mets.addNamespaceDeclaration("premis", Constants.PREMIS_NS);
179 		mets.addNamespaceDeclaration("ebank", Constants.EBANK_NS);
180 		mets.addNamespaceDeclaration("ebankterms", Constants.EBANKTERMS_NS);
181 		Attribute a = new Attribute(
182 				"xsi:schemaLocation",
183 				Constants.XSI_NS,
184 				"http://www.loc.gov/METS/ "
185 						+ "http://www.loc.gov/standards/mets/mets.xsd "
186 						+ "http://www.loc.gov/standards/premis "
187 						+ "http://www.loc.gov/standards/premis/PREMIS-v1-0.xsd "
188 						+ "http://www.rdn.ac.uk/oai/ebank/20050808/ebankterms.xsd");
189 		mets.addAttribute(a);
190 		Attribute prof = new Attribute("PROFILE", "DSpace METS SIP Profile 1.0");
191 		mets.addAttribute(prof);
192 
193 		Element fileSec = new Element("fileSec", Constants.METS_NS);
194 		mets.appendChild(fileSec);
195 
196 		Element fileGrp = new Element("fileGrp", Constants.METS_NS);
197 		fileSec.appendChild(fileGrp);
198 		fileGrp.addAttribute(new Attribute("USE", "CONTENT"));
199 
200 		Element structMap = new Element("structMap", Constants.METS_NS);
201 		mets.appendChild(structMap);
202 		structMap.addAttribute(new Attribute(METS_ID_ATTR, "MainStructMap"));
203 		Attribute type = new Attribute("TYPE", "LOGICAL");
204 		structMap.addAttribute(type);
205 		Attribute label = new Attribute(METS_LABEL_ATTR, "DSpace");
206 		structMap.addAttribute(label);
207 
208 		Element structDiv = new Element(METS_DIV_EL, Constants.METS_NS);
209 		structMap.appendChild(structDiv);
210 		structDiv.addAttribute(new Attribute(METS_ID_ATTR,
211 				"main-structural-div"));
212 		Attribute dmdId = new Attribute("DMDID", "Main");
213 		structDiv.addAttribute(dmdId);
214 
215 		// Add file information and metadata
216 		for (DataFile df : pkg.getAddedDataFiles()) {
217 			id = addDataFile(id, fileGrp, structDiv, df);
218 		}
219 
220 		id = addDataFile(id, fileGrp, structDiv, pkg.getCmlFile());
221 
222 		// Generate the package metadata block
223 		Element mainDmd = new Element("dmdSec", Constants.METS_NS);
224 		mets.appendChild(mainDmd);
225 		mainDmd.addAttribute(new Attribute(METS_ID_ATTR, "Main"));
226 
227 		Element mdWrap = new Element(METS_MDWRAP_EL, Constants.METS_NS);
228 		mainDmd.appendChild(mdWrap);
229 		mdWrap.addAttribute(new Attribute(METS_LABEL_ATTR, "eBank Metadata"));
230 		mdWrap.addAttribute(new Attribute("MIMETYPE", METS_MIME_TYPE));
231 		mdWrap.addAttribute(new Attribute("MDTYPE", "DC"));
232 
233 		Element xmlData = new Element(METS_XMLDATA_EL, Constants.METS_NS);
234 		mdWrap.appendChild(xmlData);
235 
236 		Element ebankDC = new Element(EBANK_DC_EL, Constants.EBANK_NS);
237 		xmlData.appendChild(ebankDC);
238 		Element xlinkType = new Element(XLINK_TYPE_ATTR, Constants.DC_NS);
239 		ebankDC.appendChild(xlinkType);
240 		xlinkType.appendChild("NMR Spectrum data holding");
241 
242 		Resource pRes = pkg.getMetadata().createResource(
243 				pkg.getNormalizedURIString());
244 		Model packageMetadata = pkg.getMetadata().query(
245 				new SimpleSelector(pRes, (Property) null, (RDFNode) null));
246 		LOG.debug("Crosswalking " + packageMetadata.size()
247 				+ " statements for package with new URI: "
248 				+ pkg.getNormalizedURIString());
249 		for (StmtIterator si = packageMetadata.listStatements(); si.hasNext();) {
250 			Statement s = si.nextStatement();
251 			LOG.debug("Predicate: " + s.getPredicate());
252 			Crosswalk c = EBANK_CROSSWALKS.get(s.getPredicate().getURI());
253 			if (c != null) {
254 				LOG.debug("Using :" + c);
255 				for (Element e : c.crosswalk(s)) {
256 					ebankDC.appendChild(e);
257 				}
258 			}
259 			Crosswalk c2 = BASE_CROSSWALKS.get(s.getPredicate().getURI());
260 			if (c2 != null) {
261 				LOG.debug("Using: " + c2);
262 				for (Element e : c2.crosswalk(s)) {
263 					xmlData.appendChild(e);
264 				}
265 			}
266 		}
267 		Element embargo = embargoMetadataEncoder.asXML(pkg.getMetadata(), pkg
268 				.getNormalizedURIString());
269 		if (embargo != null) {
270 			xmlData.appendChild(embargo);
271 		}
272 
273 		return mets;
274 	}
275 
276 	private void checkState() {
277 		if (embargoMetadataEncoder == null) {
278 			throw new IllegalStateException(
279 					"METS packager requires an embargo encoder to be provided before execution");
280 		}
281 	}
282 
283 	private int addDataFile(int id, Element fileGrp, Element structDiv,
284 			DataFile df) {
285 		// Produce the file element for this file.
286 		Element fileEl = new Element("file", Constants.METS_NS);
287 		fileGrp.appendChild(fileEl);
288 		int fileid = id++;
289 		fileEl.addAttribute(new Attribute(METS_ID_ATTR, "id"
290 				+ String.valueOf(fileid)));
291 		int fileMdId = id++;
292 		fileEl.addAttribute(new Attribute("DMDID", "id"
293 				+ String.valueOf(fileMdId)));
294 		fileEl.addAttribute(new Attribute("CHECKSUMTYPE", "MD5"));
295 		fileEl.addAttribute(new Attribute("CHECKSUM", df.getMD5Sum()));
296 		fileEl.addAttribute(new Attribute("MIMETYPE", df.getDataFileType()
297 				.getMimeType()));
298 		Element flocat = new Element("FLocat", Constants.METS_NS);
299 		fileEl.appendChild(flocat);
300 		flocat.addAttribute(new Attribute("LOCTYPE", "URL"));
301 		Attribute xlinkType = new Attribute(XLINK_TYPE_ATTR, "simple");
302 		xlinkType.setNamespace("xlink", Constants.XLINK_NS);
303 		flocat.addAttribute(xlinkType);
304 		Attribute xlinkHref = new Attribute("href", df.getFileName());
305 		xlinkHref.setNamespace("xlink", Constants.XLINK_NS);
306 		flocat.addAttribute(xlinkHref);
307 
308 		// Add a div into the main structural div for the file.
309 		Element filediv = new Element(METS_DIV_EL, Constants.METS_NS);
310 		structDiv.appendChild(filediv);
311 		// set id
312 		int structId = id++;
313 		filediv.addAttribute(new Attribute(METS_ID_ATTR, "id"
314 				+ String.valueOf(structId)));
315 		Element fptr = new Element("fptr", Constants.METS_NS);
316 		filediv.appendChild(fptr);
317 		// set file id
318 		Attribute fileId = new Attribute("FILEID", "id"
319 				+ String.valueOf(fileid));
320 		fptr.addAttribute(fileId);
321 
322 		// Produce file specific metadata.
323 		return id;
324 	}
325 
326 	public void archive(DataPackage pkg, File manifestFile) {
327 		// TODO Auto-generated method stub
328 
329 	}
330 
331 	public EmbargoMetadataEncoder getEmbargoMetadataEncoder() {
332 		return embargoMetadataEncoder;
333 	}
334 
335 	public void setEmbargoMetadataEncoder(
336 			EmbargoMetadataEncoder embargoMetadataEncoder) {
337 		this.embargoMetadataEncoder = embargoMetadataEncoder;
338 	}
339 
340 	public FileAndType getFileAndType(String packageId, String filename)
341 			throws NotFoundException {
342 		DataPackage p = packageDao.load(packageId);
343 		String manifestName = getManifestName(p);
344 		FileAndType fat = new FileAndType();
345 		if (manifestName.equals(filename)) {
346 			fat.setMimeType(getManifestMimeType());
347 		} else {
348 			Document doc = null;
349 			try {
350 				doc = new Builder().build(packageDao.getPackagedFile(packageId,
351 						getManifestName(p)));
352 			} catch (ValidityException e) {
353 				LOG.error("Problem building XOM from METS manifest: "
354 						+ e.getMessage(), e);
355 				throw new RuntimeException(
356 						"Problem building XOM from METS manifest: "
357 								+ e.getMessage(), e);
358 			} catch (ParsingException e) {
359 				LOG.error("Problem building XOM from METS manifest: "
360 						+ e.getMessage(), e);
361 				throw new RuntimeException(
362 						"Problem building XOM from METS manifest: "
363 								+ e.getMessage(), e);
364 			} catch (IOException e) {
365 				LOG.error("Problem building XOM from METS manifest: "
366 						+ e.getMessage(), e);
367 				throw new RuntimeException(
368 						"Problem building XOM from METS manifest: "
369 								+ e.getMessage(), e);
370 			}
371 			Nodes nds = doc.query("/mets:mets/mets:fileSec/mets:fileGrp/"
372 					+ "mets:file[mets:FLocat/@xlink:href='" + filename
373 					+ "']/@MIMETYPE", Constants.XPATH_CTX);
374 			if (nds.size() == 0) {
375 				fat.setMimeType("text/plain");
376 			} else {
377 				Attribute a = (Attribute) nds.get(0);
378 				fat.setMimeType(a.getValue());
379 			}
380 		}
381 		fat.setFile(packageDao.getPackagedFile(packageId, filename));
382 		return fat;
383 	}
384 
385 	public String getManifestMimeType() {
386 		return METS_MIME_TYPE;
387 	}
388 
389 }