1 package uk.ac.cam.spectra.spectrasub;
2
3 import java.io.File;
4 import java.io.FileOutputStream;
5 import java.io.IOException;
6 import java.io.Serializable;
7 import java.net.URI;
8 import java.util.ArrayList;
9 import java.util.Date;
10 import java.util.HashSet;
11 import java.util.List;
12 import java.util.Set;
13
14 import nu.xom.Document;
15 import nu.xom.ParsingException;
16 import nu.xom.ValidityException;
17
18 import org.apache.commons.io.IOUtils;
19 import org.apache.log4j.Logger;
20 import org.xmlcml.cml.base.CMLBuilder;
21 import org.xmlcml.cml.element.CMLCml;
22
23 import uk.ac.cam.spectra.Combine;
24 import uk.ac.cam.spectra.Constants;
25 import uk.ac.cam.spectra.DateProperty;
26 import uk.ac.cam.spectra.IntegerProperty;
27 import uk.ac.cam.spectra.MetadataProperty;
28 import uk.ac.cam.spectra.Util;
29
30 import com.hp.hpl.jena.rdf.model.Literal;
31 import com.hp.hpl.jena.rdf.model.Model;
32 import com.hp.hpl.jena.rdf.model.ModelFactory;
33 import com.hp.hpl.jena.rdf.model.Property;
34 import com.hp.hpl.jena.rdf.model.RDFNode;
35 import com.hp.hpl.jena.rdf.model.ResIterator;
36 import com.hp.hpl.jena.rdf.model.Resource;
37 import com.hp.hpl.jena.rdf.model.SimpleSelector;
38 import com.hp.hpl.jena.rdf.model.Statement;
39 import com.hp.hpl.jena.rdf.model.StmtIterator;
40
41
42
43
44
45
46
47 public class DataPackage implements Serializable {
48
49
50
51
52 private static final long serialVersionUID = -7484947842430789642L;
53
54 private static final Logger LOG = Logger.getLogger(DataPackage.class);
55
56 public DataPackage() {
57 ;
58 }
59
60 String id;
61
62 private transient Model metadata = ModelFactory.createDefaultModel();
63
64 private List<DataFile> addedDataFiles = new ArrayList<DataFile>();
65
66 private List<DataFile> remainingDataFiles = new ArrayList<DataFile>();
67
68 private DataFile nextUpload;
69
70 private DataFile cmlFile;
71
72 private URI uri;
73
74 public List<DataFile> getAddedDataFiles() {
75 return addedDataFiles;
76 }
77
78 public void setAddedDataFiles(List<DataFile> completedUploads) {
79 this.addedDataFiles = completedUploads;
80 }
81
82 public List<DataFile> getRemainingDataFiles() {
83 return remainingDataFiles;
84 }
85
86 public void setRemainingDataFiles(final List<DataFile> remainingUploads) {
87 if (remainingUploads == null) {
88 throw new IllegalArgumentException("Argument cannot be null");
89 }
90 this.remainingDataFiles.clear();
91 LOG.debug("Adding all of "+ remainingUploads);
92 this.remainingDataFiles.addAll(remainingUploads);
93 LOG.debug("remaining data files: "+ remainingDataFiles);
94 }
95
96 public String getId() {
97 return id;
98 }
99
100 public void setId(String id) {
101 this.id = id;
102 }
103
104 public void add(DataFile df) {
105 remainingDataFiles.add(df);
106 }
107
108 public synchronized DataFile getNextUpload() {
109 if (remainingDataFiles.isEmpty()) {
110 return null;
111 } else {
112 if (nextUpload == null) {
113 nextUpload = remainingDataFiles.get(0);
114 }
115 return nextUpload;
116 }
117 }
118
119 public synchronized void doneNext() {
120 if (remainingDataFiles.isEmpty()) {
121 throw new IllegalStateException("No uploads left to be done...");
122 }
123 addedDataFiles.add(nextUpload);
124 remainingDataFiles.remove(0);
125 nextUpload = null;
126 }
127
128 public boolean equals(Object o) {
129 if (o == null) {
130 return false;
131 }
132 if (!(o instanceof DataPackage)) {
133 return false;
134 }
135 DataPackage pp = (DataPackage) o;
136 if (getId() == null) {
137 return pp.getId() == null;
138 } else {
139 return getId().equals(pp.getId());
140 }
141 }
142
143 public int hashCode() {
144 return (id == null) ? 0 : id.hashCode();
145 }
146
147 public void addMetadata(Model md, Combine c) {
148 LOG.debug(c + " " + md.size() + " statements to current metadata ("
149 + metadata.size() + "). Combination mode: " + c);
150 for (ResIterator ri = md.listSubjects(); ri.hasNext();) {
151 Resource rNew = ri.nextResource();
152 Resource rCurrent = metadata.createResource(rNew.getURI());
153
154 Set<Property> props = new HashSet<Property>();
155 for (StmtIterator si = rNew.listProperties(); si.hasNext();) {
156 Statement sNew = si.nextStatement();
157 props.add(sNew.getPredicate());
158 }
159
160 propLoop: for (Property p : props) {
161 Property pCurrent = metadata.createProperty(p.getURI());
162
163 Set<RDFNode> oldVals = new HashSet<RDFNode>();
164 List<Statement> oldStatements = new ArrayList<Statement>();
165 for (StmtIterator si = rCurrent.listProperties(pCurrent); si
166 .hasNext();) {
167 Statement s = si.nextStatement();
168 oldVals.add(s.getObject());
169 oldStatements.add(s);
170 }
171
172
173 Set<RDFNode> newVals = new HashSet<RDFNode>();
174 for (StmtIterator si = rNew.listProperties(p); si.hasNext();) {
175 Statement s = si.nextStatement();
176 newVals.add(s.getObject());
177 }
178
179
180 if (!oldVals.isEmpty() && c == Combine.underwrite) {
181 continue propLoop;
182 }
183 metadata.remove(oldStatements);
184
185
186 if (c == Combine.combine) {
187 newVals.addAll(oldVals);
188 }
189 for (RDFNode n : newVals) {
190 rCurrent.addProperty(pCurrent, n);
191 }
192 }
193 }
194 LOG.debug("Final metadata size: " + metadata.size());
195 }
196
197 public Model getMetadata() {
198 return metadata;
199 }
200
201 public String toString() {
202 return new StringBuilder().append("DataPackage done: ").append(
203 addedDataFiles).append(" remaining: ").append(
204 remainingDataFiles).toString();
205 }
206
207 protected void setMetadata(Model model) {
208 metadata = model;
209 }
210
211 public URI getUri() {
212 return uri;
213 }
214
215 public void setUri(URI newUri) {
216 if (newUri == null) {
217 throw new IllegalArgumentException("new URI may not be null");
218 }
219 if (uri != null) {
220 if(normalize(newUri).equals(getNormalizedURIString())) {
221 return;
222 }
223 Resource newR = metadata.createResource(normalize(newUri));
224 Resource oldR = metadata.createResource(getNormalizedURIString());
225 LOG.info("Replacing properties of "+ oldR +" with equivalent properties of "+ newR);
226 for (StmtIterator si = metadata.listStatements(oldR,
227 (Property) null, (RDFNode) null); si.hasNext();) {
228 Statement s = si.nextStatement();
229 newR.addProperty(s.getPredicate(), s.getObject());
230 }
231 oldR.removeProperties();
232 }
233 uri = newUri;
234 }
235
236 public String getValue(MetadataProperty prop) {
237 List<String> results = getValues(prop);
238 if (results.size() > 1) {
239 throw new RuntimeException("More than one value for " + prop);
240 } else {
241 return results.size() > 0 ? results.get(0) : null;
242 }
243 }
244
245 public List<String> getValues(MetadataProperty prop) {
246 List<String> results = new ArrayList<String>();
247 Resource subj = metadata.createResource(getNormalizedURIString());
248 Property pred = metadata.createProperty(prop.toURI().toString());
249
250 LOG.debug("Looking for metadata using : " + subj + " , " + pred);
251 for (StmtIterator si = metadata.listStatements(new SimpleSelector(subj,
252 pred, (RDFNode) null)); si.hasNext();) {
253 Statement s = si.nextStatement();
254 RDFNode nd = s.getObject();
255 if (prop.valid(nd, metadata)) {
256 if (!nd.isLiteral()) {
257 LOG.warn(nd + " is not a literal object - skipping");
258 } else {
259 Literal l = (Literal) nd;
260
261 results.add(l.getLexicalForm());
262 }
263 }
264 }
265 return results;
266 }
267
268
269
270
271
272
273
274 public String getNormalizedURIString() {
275 return normalize(uri);
276 }
277
278 private String normalize(URI u) {
279 if (!"file".equals(u.getScheme())) {
280 return u.toString();
281 } else {
282 return u.toString().replaceFirst("^file:", "file://");
283 }
284 }
285
286 public Integer getIntegerValue(IntegerProperty prop) {
287 List<Integer> results = getIntegerValues(prop);
288 if (results.size() > 1) {
289 throw new RuntimeException("More than one integer value for "
290 + prop);
291 } else {
292 return results.size() > 0 ? results.get(0) : null;
293 }
294 }
295
296 public List<Integer> getIntegerValues(IntegerProperty prop) {
297 List<Integer> results = new ArrayList<Integer>();
298 Resource subj = metadata.createResource(getNormalizedURIString());
299 Property pred = metadata.createProperty(prop.toURI().toString());
300 for (StmtIterator si = metadata.listStatements(new SimpleSelector(subj,
301 pred, (RDFNode) null)); si.hasNext();) {
302 Statement s = si.nextStatement();
303 RDFNode nd = s.getObject();
304 if (prop.valid(nd, metadata)) {
305 results.add(prop.decodeInteger(nd));
306 }
307 }
308 return results;
309 }
310
311 public boolean getBooleanValue(MetadataProperty prop) {
312 List<Boolean> results = getBooleanValues(prop);
313 if (results.size() > 1) {
314 throw new RuntimeException("More than one boolean value for "
315 + prop);
316 } else {
317 return results.size() > 0 ? results.get(0) : null;
318 }
319 }
320
321 public List<Boolean> getBooleanValues(MetadataProperty prop) {
322 if (!Constants.XSI_BOOLEAN.equals(prop.getXsiType())) {
323 throw new RuntimeException(prop
324 + " is not a boolean property (type: " + prop.getXsiType());
325 }
326 List<Boolean> results = new ArrayList<Boolean>();
327 Resource subj = metadata.createResource(getNormalizedURIString());
328 Property pred = metadata.createProperty(prop.toURI().toString());
329 for (StmtIterator si = metadata.listStatements(new SimpleSelector(subj,
330 pred, (RDFNode) null)); si.hasNext();) {
331 Statement s = si.nextStatement();
332 RDFNode nd = s.getObject();
333 if (prop.valid(nd, metadata)) {
334 if (!nd.isLiteral()) {
335 LOG.warn(nd + " is not a literal object - skipping");
336 } else {
337 Literal l = (Literal) nd;
338 if (Constants.XSI_BOOLEAN.equals(l.getDatatypeURI())) {
339 results.add(l.getBoolean());
340 }
341 }
342 }
343 }
344 return results;
345 }
346
347 public Date getDateValue(DateProperty prop) {
348 List<Date> results = getDateValues(prop);
349 if (results.size() > 1) {
350 throw new RuntimeException("More than one Date value for " + prop);
351 } else {
352 return results.size() > 0 ? results.get(0) : null;
353 }
354 }
355
356 public List<Date> getDateValues(DateProperty prop) {
357 if (!Constants.XSI_DATE.equals(prop.getXsiType())) {
358 throw new RuntimeException(prop + " is not a Date property (type: "
359 + prop.getXsiType());
360 }
361 List<Date> results = new ArrayList<Date>();
362 Resource subj = metadata.createResource(getNormalizedURIString());
363 Property pred = prop.asProperty(metadata);
364 for (StmtIterator si = metadata.listStatements(new SimpleSelector(subj,
365 pred, (RDFNode) null)); si.hasNext();) {
366 Statement s = si.nextStatement();
367 RDFNode nd = s.getObject();
368 if (prop.valid(nd, metadata)) {
369 if (!nd.isLiteral()) {
370 LOG.warn(nd + " is not a literal object - skipping");
371 } else {
372 Literal l = (Literal) nd;
373 if (Constants.XSI_DATE.equals(l.getDatatypeURI())) {
374 results.add(prop.decodeDate(l));
375 } else {
376 LOG.warn("Found a date (" + l.getLexicalForm()
377 + "), but datatype was " + l.getDatatypeURI()
378 + " and " + Constants.XSI_DATE + " was needed");
379 }
380 }
381 }
382 }
383 return results;
384
385 }
386
387 public DataFile getCmlFile() {
388 return cmlFile;
389 }
390
391 public void setCmlFile(DataFile cmlFile) {
392 this.cmlFile = cmlFile;
393 }
394
395 public void append(Document newCml) {
396 Document oldCml = null;
397 CMLCml root = null;
398 File f = cmlFile.getFile();
399 if (f == null) {
400 throw new RuntimeException("File has not been set for " + cmlFile
401 + " cannot continue");
402 }
403 if (f.length() != 0L) {
404 LOG.debug("CML file is not empty, parsing as CML");
405 try {
406 oldCml = new CMLBuilder().build(f);
407 root = (CMLCml) oldCml.getRootElement();
408 } catch (ValidityException e) {
409 LOG.error("Problem while parsing CML: " + e.getMessage(), e);
410 throw new RuntimeException("Problem while parsing CML: "
411 + e.getMessage(), e);
412 } catch (ParsingException e) {
413 LOG.error("Problem while parsing CML: " + e.getMessage(), e);
414 throw new RuntimeException("Problem while parsing CML: "
415 + e.getMessage(), e);
416 } catch (IOException e) {
417 LOG.error("Problem while parsing CML: " + e.getMessage(), e);
418 throw new RuntimeException("Problem while parsing CML: "
419 + e.getMessage(), e);
420 }
421 } else {
422 LOG.debug("CML file is empty. Creating new Document");
423 root = new CMLCml();
424 oldCml = new Document(root);
425 }
426
427 root.appendChild(newCml.getRootElement());
428 FileOutputStream out = null;
429 try {
430 out = new FileOutputStream(f);
431 Util.print(oldCml, out, false);
432 } catch (IOException e) {
433 LOG.error("Problem saving cml to filesystem " + e.getMessage(), e);
434 throw new RuntimeException("Problem saving cml to filesystem "
435 + e.getMessage(), e);
436 } finally {
437 IOUtils.closeQuietly(out);
438 }
439 }
440
441 }