/** * This work was crfield name: eated" by participants in the DataONE project, and is * jointly copyrighted by participating institutions in DataONE. For * more information on DataONE, see our web site at http://dataone.org. * * Copyright 2021 * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * $Id$ */ package org.dataone.cn.index; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.PrintWriter; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.*; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.util.ClientUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.ModifiableSolrParams; import org.dataone.cn.indexer.parser.JsonLdSubprocessor; import org.dataone.cn.indexer.resourcemap.RdfXmlProcessorTest; import org.dataone.service.types.v1.NodeReference; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import org.springframework.core.io.Resource; import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; import com.github.jsonldjava.core.JsonLdProcessor; import com.github.jsonldjava.utils.JsonUtils; /** * Test the json-ld subprocessor * @author tao * */ @ThreadLeakScope(ThreadLeakScope.Scope.NONE) public class JsonLdSubprocessorTest extends DataONESolrJettyTestBase { /* Log it */ private static Log log = LogFactory.getLog(JsonLdSubprocessorTest.class); /* The schema.org object */ private Resource schemaOrgDoc; private String schemaOrgDocPid = "bco-dmo.jsonld"; private Resource schemaOrgDoc2; private String schemaOrgDoc2Pid = "doi_A10.5061_dryad.m8s2r36.jsonld"; private Resource schemaOrgDocSOSO; private String schemaOrgDocSOSOPid = "ESIP-SOSO-v1.2.0-example-full.jsonld"; private Resource schemaOrgTestWithoutVocab; private String schemaOrgTestWithoutVocabPid = "context-http-without-vocab.jsonld"; private Resource schemaOrgTestDocHttpVocab; private String schemaOrgTestDocHttpVocabPid = "context-http-vocab.jsonld"; private Resource schemaOrgTestDocHttpsVocab; private String schemaOrgTestDocHttpsVocabPid = "context-https-vocab.jsonld"; private Resource schemaOrgTestDocHttp; private String schemaOrgTestDocHttpPid = "context-http.jsonld"; private Resource schemaOrgTestDocHttps; private String schemaOrgTestDocHttpsPid = "context-https.jsonld"; private Resource schemaOrgTestDocDryad1; private String schemaOrgTestDocDryad1Pid = "doi.org_10.5061_dryad.5qb78.jsonld"; private Resource schemaOrgTestDocDryad2; private String schemaOrgTestDocDryad2Pid = "doi.org_10.5061_dryad.41sk145.jsonld"; private Resource schemaOrgTesHakaiDeep; private String schemaOrgTesHakaiDeepPid = "hakai-deep-schema.jsonld"; /* An instance of the RDF/XML Subprocessor */ private JsonLdSubprocessor jsonLdSubprocessor; /* Store a map of expected Solr fields and their values for testing */ private HashMap expectedFields = new HashMap(); private static final int SLEEPTIME = 8000; private static final int SLEEP = 2000; private static final int TIMES = 10; /** * For each test, set up the Solr service and test data * * @throws Exception */ @Before public void setUp() throws Exception { // Start up the embedded Jetty server and Solr service super.setUp(); schemaOrgDoc = (Resource) context.getBean("schemaOrgTestDoc"); schemaOrgDoc2 = (Resource) context.getBean("schemaOrgTestDoc2"); schemaOrgDocSOSO = (Resource) context.getBean("schemaOrgTestDocSOSO"); schemaOrgTestWithoutVocab = (Resource) context.getBean("schemaOrgTestWithoutVocab"); schemaOrgTestDocHttpVocab = (Resource) context.getBean("schemaOrgTestHttpVocab"); schemaOrgTestDocHttpsVocab = (Resource) context.getBean("schemaOrgTestHttpsVocab"); schemaOrgTestDocHttp = (Resource) context.getBean("schemaOrgTestHttp"); schemaOrgTestDocHttps = (Resource) context.getBean("schemaOrgTestHttps"); schemaOrgTestDocDryad1 = (Resource) context.getBean("schemaOrgTestDryad1"); schemaOrgTestDocDryad2 = (Resource) context.getBean("schemaOrgTestDryad2"); schemaOrgTesHakaiDeep = (Resource) context.getBean("schemaOrgTesHakaiDeep"); // instantiate the subprocessor jsonLdSubprocessor = (JsonLdSubprocessor) context.getBean("jsonLdSubprocessor"); } /** * For each test, clean up, bring down the Solr service */ @After public void tearDown() throws Exception { super.tearDown(); } /** * Test the end to end index processing a schema.org 'Dataset' document * * @throws Exception */ //@Ignore @Test public void testInsertSchemaOrg() throws Exception { //index the object String id = schemaOrgDocPid; indexObjectToSolr(id, schemaOrgDoc); Thread.sleep(SLEEPTIME); // now process the tasks //processor.processIndexTaskQueue(); for (int i=0; i resources = new ArrayList<>(); resources.add(schemaOrgTestDocHttp); resources.add(schemaOrgTestDocHttps); resources.add(schemaOrgTestDocHttpVocab); resources.add(schemaOrgTestDocHttpsVocab); // Insert the schema.org file into the task queue ArrayList ids = new ArrayList<>(); ids.add(schemaOrgTestDocHttpPid); ids.add(schemaOrgTestDocHttpsPid); ids.add(schemaOrgTestDocHttpVocabPid); ids.add(schemaOrgTestDocHttpsVocabPid); int i = -1; String thisId; for (Resource res : resources) { i++; thisId = ids.get(i); log.info("processing doc with id: " + thisId); indexObjectToSolr(thisId, res); Thread.sleep(SLEEPTIME); // now process the tasks //processor.processIndexTaskQueue(); for (int j=0; j resources = new ArrayList<>(); resources.add(schemaOrgTestDocDryad1); resources.add(schemaOrgTestDocDryad2); // Insert the schema.org file into the task queue ArrayList ids = new ArrayList<>(); ids.add(schemaOrgTestDocDryad1Pid); ids.add(schemaOrgTestDocDryad2Pid); String thisId; int iDoc = 0; thisId = ids.get(iDoc); indexObjectToSolr(thisId, resources.get(iDoc)); Thread.sleep(SLEEPTIME); // now process the tasks //processor.processIndexTaskQueue(); for (int i=0; i * Some Solr fields (e.g. text) are derived by concatenating multiple source fields together into a single value. Because of the * RDF serialization and retrieval by SPARQL, there is no guarentee that the resulting string will be the same as any previous * result. Therefore, the only way to check that the value could be the same is to compare the resulting string length, which sould * always be the same, regardless of the order of component strings that comprise it. This isn't a perfect test, as it doesn't * definitively prove the string is correct, just that it could be correct. *

* * @throws Exception */ protected boolean compareFieldLength(String id, String fieldName, int expectedLength) throws SolrServerException, IOException { boolean equal = true; ModifiableSolrParams solrParams = new ModifiableSolrParams(); solrParams.set("q", "id:" + ClientUtils.escapeQueryChars(id)); solrParams.set("fl", "*"); QueryResponse qr = getSolrClient().query(solrParams); SolrDocument result = qr.getResults().get(0); String testResult = (String) result.getFirstValue(fieldName); int fieldLength = testResult.length(); System.out.println("++++++++++++++++ the string length of solr result for the string field " + fieldName + " is " + fieldLength); System.out.println("++++++++++++++++ the expected string length for the field " + fieldName + " is " + expectedLength); return (fieldLength == expectedLength); } @Test public void testIsHttps() throws Exception { File file = schemaOrgTestWithoutVocab.getFile(); Object object = JsonUtils.fromInputStream(new FileInputStream(file), "UTF-8"); List list = JsonLdProcessor.expand(object); assertTrue(!(jsonLdSubprocessor.isHttps(list))); file = schemaOrgDoc.getFile(); object = JsonUtils.fromInputStream(new FileInputStream(file), "UTF-8"); list = JsonLdProcessor.expand(object); assertTrue(jsonLdSubprocessor.isHttps(list)); } @Test public void testHakaiDeep() throws Exception { String id = schemaOrgTesHakaiDeepPid; indexObjectToSolr(id, schemaOrgTesHakaiDeep); Thread.sleep(2*SLEEPTIME); // now process the tasks //processor.processIndexTaskQueue(); for (int i=0; i