/**
* This work was crfield name: eated" by participants in the DataONE project, and is
* jointly copyrighted by participating institutions in DataONE. For
* more information on DataONE, see our web site at http://dataone.org.
*
* Copyright 2021
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* $Id$
*/
package org.dataone.cn.index;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.*;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.dataone.cn.indexer.parser.JsonLdSubprocessor;
import org.dataone.cn.indexer.resourcemap.RdfXmlProcessorTest;
import org.dataone.service.types.v1.NodeReference;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.springframework.core.io.Resource;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
import com.github.jsonldjava.core.JsonLdProcessor;
import com.github.jsonldjava.utils.JsonUtils;
/**
* Test the json-ld subprocessor
* @author tao
*
*/
@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
public class JsonLdSubprocessorTest extends DataONESolrJettyTestBase {
/* Log it */
private static Log log = LogFactory.getLog(JsonLdSubprocessorTest.class);
/* The schema.org object */
private Resource schemaOrgDoc;
private String schemaOrgDocPid = "bco-dmo.jsonld";
private Resource schemaOrgDoc2;
private String schemaOrgDoc2Pid = "doi_A10.5061_dryad.m8s2r36.jsonld";
private Resource schemaOrgDocSOSO;
private String schemaOrgDocSOSOPid = "ESIP-SOSO-v1.2.0-example-full.jsonld";
private Resource schemaOrgTestWithoutVocab;
private String schemaOrgTestWithoutVocabPid = "context-http-without-vocab.jsonld";
private Resource schemaOrgTestDocHttpVocab;
private String schemaOrgTestDocHttpVocabPid = "context-http-vocab.jsonld";
private Resource schemaOrgTestDocHttpsVocab;
private String schemaOrgTestDocHttpsVocabPid = "context-https-vocab.jsonld";
private Resource schemaOrgTestDocHttp;
private String schemaOrgTestDocHttpPid = "context-http.jsonld";
private Resource schemaOrgTestDocHttps;
private String schemaOrgTestDocHttpsPid = "context-https.jsonld";
private Resource schemaOrgTestDocDryad1;
private String schemaOrgTestDocDryad1Pid = "doi.org_10.5061_dryad.5qb78.jsonld";
private Resource schemaOrgTestDocDryad2;
private String schemaOrgTestDocDryad2Pid = "doi.org_10.5061_dryad.41sk145.jsonld";
private Resource schemaOrgTesHakaiDeep;
private String schemaOrgTesHakaiDeepPid = "hakai-deep-schema.jsonld";
/* An instance of the RDF/XML Subprocessor */
private JsonLdSubprocessor jsonLdSubprocessor;
/* Store a map of expected Solr fields and their values for testing */
private HashMap expectedFields = new HashMap();
private static final int SLEEPTIME = 8000;
private static final int SLEEP = 2000;
private static final int TIMES = 10;
/**
* For each test, set up the Solr service and test data
*
* @throws Exception
*/
@Before
public void setUp() throws Exception {
// Start up the embedded Jetty server and Solr service
super.setUp();
schemaOrgDoc = (Resource) context.getBean("schemaOrgTestDoc");
schemaOrgDoc2 = (Resource) context.getBean("schemaOrgTestDoc2");
schemaOrgDocSOSO = (Resource) context.getBean("schemaOrgTestDocSOSO");
schemaOrgTestWithoutVocab = (Resource) context.getBean("schemaOrgTestWithoutVocab");
schemaOrgTestDocHttpVocab = (Resource) context.getBean("schemaOrgTestHttpVocab");
schemaOrgTestDocHttpsVocab = (Resource) context.getBean("schemaOrgTestHttpsVocab");
schemaOrgTestDocHttp = (Resource) context.getBean("schemaOrgTestHttp");
schemaOrgTestDocHttps = (Resource) context.getBean("schemaOrgTestHttps");
schemaOrgTestDocDryad1 = (Resource) context.getBean("schemaOrgTestDryad1");
schemaOrgTestDocDryad2 = (Resource) context.getBean("schemaOrgTestDryad2");
schemaOrgTesHakaiDeep = (Resource) context.getBean("schemaOrgTesHakaiDeep");
// instantiate the subprocessor
jsonLdSubprocessor = (JsonLdSubprocessor) context.getBean("jsonLdSubprocessor");
}
/**
* For each test, clean up, bring down the Solr service
*/
@After
public void tearDown() throws Exception {
super.tearDown();
}
/**
* Test the end to end index processing a schema.org 'Dataset' document
*
* @throws Exception
*/
//@Ignore
@Test
public void testInsertSchemaOrg() throws Exception {
//index the object
String id = schemaOrgDocPid;
indexObjectToSolr(id, schemaOrgDoc);
Thread.sleep(SLEEPTIME);
// now process the tasks
//processor.processIndexTaskQueue();
for (int i=0; i resources = new ArrayList<>();
resources.add(schemaOrgTestDocHttp);
resources.add(schemaOrgTestDocHttps);
resources.add(schemaOrgTestDocHttpVocab);
resources.add(schemaOrgTestDocHttpsVocab);
// Insert the schema.org file into the task queue
ArrayList ids = new ArrayList<>();
ids.add(schemaOrgTestDocHttpPid);
ids.add(schemaOrgTestDocHttpsPid);
ids.add(schemaOrgTestDocHttpVocabPid);
ids.add(schemaOrgTestDocHttpsVocabPid);
int i = -1;
String thisId;
for (Resource res : resources) {
i++;
thisId = ids.get(i);
log.info("processing doc with id: " + thisId);
indexObjectToSolr(thisId, res);
Thread.sleep(SLEEPTIME);
// now process the tasks
//processor.processIndexTaskQueue();
for (int j=0; j resources = new ArrayList<>();
resources.add(schemaOrgTestDocDryad1);
resources.add(schemaOrgTestDocDryad2);
// Insert the schema.org file into the task queue
ArrayList ids = new ArrayList<>();
ids.add(schemaOrgTestDocDryad1Pid);
ids.add(schemaOrgTestDocDryad2Pid);
String thisId;
int iDoc = 0;
thisId = ids.get(iDoc);
indexObjectToSolr(thisId, resources.get(iDoc));
Thread.sleep(SLEEPTIME);
// now process the tasks
//processor.processIndexTaskQueue();
for (int i=0; i
* Some Solr fields (e.g. text) are derived by concatenating multiple source fields together into a single value. Because of the
* RDF serialization and retrieval by SPARQL, there is no guarentee that the resulting string will be the same as any previous
* result. Therefore, the only way to check that the value could be the same is to compare the resulting string length, which sould
* always be the same, regardless of the order of component strings that comprise it. This isn't a perfect test, as it doesn't
* definitively prove the string is correct, just that it could be correct.
*
*
* @throws Exception
*/
protected boolean compareFieldLength(String id, String fieldName, int expectedLength) throws SolrServerException, IOException {
boolean equal = true;
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.set("q", "id:" + ClientUtils.escapeQueryChars(id));
solrParams.set("fl", "*");
QueryResponse qr = getSolrClient().query(solrParams);
SolrDocument result = qr.getResults().get(0);
String testResult = (String) result.getFirstValue(fieldName);
int fieldLength = testResult.length();
System.out.println("++++++++++++++++ the string length of solr result for the string field " + fieldName + " is " + fieldLength);
System.out.println("++++++++++++++++ the expected string length for the field " + fieldName + " is " + expectedLength);
return (fieldLength == expectedLength);
}
@Test
public void testIsHttps() throws Exception {
File file = schemaOrgTestWithoutVocab.getFile();
Object object = JsonUtils.fromInputStream(new FileInputStream(file), "UTF-8");
List list = JsonLdProcessor.expand(object);
assertTrue(!(jsonLdSubprocessor.isHttps(list)));
file = schemaOrgDoc.getFile();
object = JsonUtils.fromInputStream(new FileInputStream(file), "UTF-8");
list = JsonLdProcessor.expand(object);
assertTrue(jsonLdSubprocessor.isHttps(list));
}
@Test
public void testHakaiDeep() throws Exception {
String id = schemaOrgTesHakaiDeepPid;
indexObjectToSolr(id, schemaOrgTesHakaiDeep);
Thread.sleep(2*SLEEPTIME);
// now process the tasks
//processor.processIndexTaskQueue();
for (int i=0; i