/** * '$RCSfile$' * Purpose: A Class that implements session utility methods * Copyright: 2008 Regents of the University of California and the * National Center for Ecological Analysis and Synthesis * Authors: Michael Daigle * * '$Author: daigle $' * '$Date: 2008-08-22 16:23:38 -0700 (Fri, 22 Aug 2008) $' * '$Revision: 4297 $' * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package edu.ucsb.nceas.metacat.service; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; import java.io.StringReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Hashtable; import java.util.Vector; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.commons.io.IOUtils; import org.apache.log4j.Logger; import org.xml.sax.SAXException; import edu.ucsb.nceas.metacat.DocumentImpl; import edu.ucsb.nceas.metacat.MetaCatServlet; import edu.ucsb.nceas.metacat.client.MetacatException; import edu.ucsb.nceas.metacat.database.DBConnection; import edu.ucsb.nceas.metacat.database.DBConnectionPool; import edu.ucsb.nceas.metacat.properties.PropertyService; import edu.ucsb.nceas.metacat.shared.BaseService; import edu.ucsb.nceas.metacat.shared.ServiceException; import edu.ucsb.nceas.utilities.FileUtil; import edu.ucsb.nceas.utilities.PropertyNotFoundException; import edu.ucsb.nceas.utilities.StringUtil; public class XMLSchemaService extends BaseService { public static final String NAMESPACEKEYWORD = "xmlns"; public static final String SCHEMA_DIR = "/schema/"; private static XMLSchemaService xmlSchemaService = null; private static Logger logMetacat = Logger.getLogger(XMLSchemaService.class); private static boolean useFullSchemaValidation = false; // private static String documentNamespace = null; // all schema objects that represent schemas registered in the db that // actually have files on disk. It doesn't include the schemas without namespace private static Vector registeredSchemaList = new Vector(); // all non-amespace schema objects that represent schemas registered in the db that // actually have files on disk. It doesn't include the schemas with namespaces private static Vector registeredNoNamespaceSchemaList = new Vector(); // a convenience list that holds the names of registered namespaces. private static Vector nameSpaceList = new Vector(); // a convenience string that holds all name spaces and locations in a space // delimited format. Those items don't have a format id. This is the old way we handle the schema location private static String nameSpaceAndLocationStringWithoutFormatId = ""; //this hash table is design for schema variants. Two schemas have the same name space, //but they have different content (location). So we different format id to //distinguish them. The key of the hash table is the format id, the values is all the namespace schema location //delimited string for this format id. private static Hashtable formatId_NamespaceLocationHash = new Hashtable(); /** * private constructor since this is a singleton */ private XMLSchemaService() { _serviceName = "XMLSchemaService"; try { doRefresh(); } catch (ServiceException e) { logMetacat.debug(e.getMessage()); } } /** * Get the single instance of XMLService. * * @return the single instance of XMLService */ public static XMLSchemaService getInstance() { if (xmlSchemaService == null) { xmlSchemaService = new XMLSchemaService(); } return xmlSchemaService; } public boolean refreshable() { return true; } /** * refresh the persistant values in this service. */ public synchronized void doRefresh() throws ServiceException { logMetacat.debug("XMLService.doRefresh - refreshing the schema service."); try { populateRegisteredSchemaList(); populateRegisteredNoNamespaceSchemaList(); setUseFullSchemaValidation(); createRegisteredNameSpaceList(); createRegisteredNameSpaceAndLocationString(); } catch (PropertyNotFoundException pnfe) { logMetacat.error("XMLService.doRefresh - Could not find property: xml.useFullSchemaValidation. " + "Setting to false."); } } public void stop() throws ServiceException { return; } /** * Gets the registered schema list. This list holds schemas that exist in * the xml_catalog table that also have associated files in the schema * directory. * * @return a list of XMLSchema objects holding registered schema information */ public Vector getRegisteredSchemaList() { return registeredSchemaList; } /** * Gets the registered schema list without the name space. This list * holds schemas without the name space that exist in the xml_catalog table * that also have associated files in the schema directory. * @return a list of XMLNoNamespaceSchema objects */ public Vector getRegisteredNoNamespaceSchemaList() { return registeredNoNamespaceSchemaList; } /** * Gets the name space and location string. This is a convenience method. * The string will have space delimited namespaces and locations that are * held in the registered schema list. This is the old way Metacat worked. * Usually, we will call the method getNameSapceAndLocation(String formatId) first. * If the method return null, we will call this method. * * @return a string that holds space delimited registered namespaces and * locations. */ public String getNameSpaceAndLocationStringWithoutFormatId() { return nameSpaceAndLocationStringWithoutFormatId; } /** * Get the all schema-location pairs registered for the formatId. * The null will be returned, if we can find it. * @param formatId * @return */ public String getNameSpaceAndLocation(String formatId) { if(formatId == null) { return null; } else { return formatId_NamespaceLocationHash.get(formatId); } } /** * Gets a list of name spaces. This is a convenience method. The list will * have all namespaces that are held in the registered schema list. * * @return a list that holds registered namespaces. */ public Vector getNameSpaceList() { return nameSpaceList; } /** * Report whether xml parsing is set to use full schema parsing. If full * schema parsing is true, new schemas will be validated before being * entered into the database and file system. * * @return true if the xml.useFullSchemaValidation property is set to true, * false otherwise. */ public boolean useFullSchemaValidation() { return useFullSchemaValidation; } /** * sets the UseFullSchemaValidation variable. The only way this should be * set is in the constructor or the refresh methods. */ private void setUseFullSchemaValidation() throws PropertyNotFoundException { String strUseFullSchemaValidation = PropertyService.getProperty("xml.useFullSchemaValidation"); useFullSchemaValidation = Boolean.valueOf(strUseFullSchemaValidation); } /** * Populate the list of registered schemas. This reads all schemas in the * xml_catalog table and then makes sure the schema actually exists and is * readable on disk. */ public void populateRegisteredSchemaList() { DBConnection conn = null; int serialNumber = -1; PreparedStatement pstmt = null; ResultSet resultSet = null; registeredSchemaList = new Vector(); // get the system id from the xml_catalog table for all schemas. String sql = "SELECT public_id, system_id, format_id FROM xml_catalog where " + "entry_type ='" + XMLSchema.getType() + "'"; try { // check out DBConnection conn = DBConnectionPool .getDBConnection("XMLService.populateRegisteredSchemaList"); serialNumber = conn.getCheckOutSerialNumber(); pstmt = conn.prepareStatement(sql); logMetacat.debug("XMLService.populateRegisteredSchemaList - Selecting schemas: " + pstmt.toString()); pstmt.execute(); resultSet = pstmt.getResultSet(); // make sure the schema actually exists on the file system. If so, // add it to the registered schema list. while (resultSet.next()) { String fileNamespace = resultSet.getString(1); String fileLocation = resultSet.getString(2); String formatId = resultSet.getString(3); logMetacat.debug("XMLService.populateRegisteredSchemaList - Registering schema: " + fileNamespace + " " + fileLocation+ " and format id "+formatId); XMLSchema xmlSchema = new XMLSchema(fileNamespace, fileLocation, formatId); if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) { continue;//skip the external schemas. /*//System.out.println("processing an http schemal location"); logMetacat.debug("XMLService.populateRegisteredSchemaList - Processing http schema location: " + fileLocation); xmlSchema.setExternalFileUri(fileLocation); //cache the file try { URL u = new URL(fileLocation); //System.out.println("downloading " + fileLocation); logMetacat.debug("XMLService.populateRegisteredSchemaList - Downloading http based schema..."); HttpURLConnection connection = (HttpURLConnection) u.openConnection(); connection.setDoOutput(true); connection.setRequestMethod("GET"); connection.setReadTimeout(5000); //System.out.println("the ============== the read timeout is ================"+connection.getReadTimeout()); //System.out.println("the ============== the connection timeout is ================"+connection.getConnectTimeout()); connection.connect(); String schema = IOUtils.toString(connection.getInputStream()); String deployDir = PropertyService.getProperty("application.deployDir"); String contextName = PropertyService.getProperty("application.context"); String filename = fileLocation.substring(fileLocation.lastIndexOf("/"), fileLocation.length()); File schemaFile = new File(deployDir + "/" + contextName + "/" + "schema/" + filename); //System.out.println("writing schema to " + schemaFile.getAbsolutePath()); FileWriter fw = new FileWriter(schemaFile); fw.write(schema); fw.flush(); fw.close(); logMetacat.debug("XMLService.populateRegisteredSchemaList - Schema downloaded to " + schemaFile.getAbsolutePath()); fileLocation = "/schema/" + filename; //System.out.println("fileLocation set to " + fileLocation); logMetacat.debug("XMLService.populateRegisteredSchemaList - fileLocation set to " + fileLocation); xmlSchema.setFileName(fileLocation); } catch(MalformedURLException me) { logMetacat.warn("Could not cache a registered schema at " + fileLocation + " because a connection could not be made to the given url: " + me.getMessage()); } catch (IOException ioe) { logMetacat.warn("Could not cache a registered schema at " + fileLocation + " because an IOException occured: " + ioe.getMessage()); } catch(PropertyNotFoundException pnfe) { logMetacat.warn("Could not cache a registered schema at " + fileLocation + " because the property 'application.tempDir' could not be found."); } xmlSchema.setFileName(fileLocation);*/ } else { xmlSchema.setFileName(fileLocation); } if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) { registeredSchemaList.add(xmlSchema); } else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) { //the schema resides on a different server, to validate, we need to go get it //registeredSchemaList.add(xmlSchema); logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered schema list."); } else { logMetacat.warn("XMLService.populateRegisteredSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered " + " in the database but does not exist on the file system. So we don't add it to the registered schema list."); } } } catch (SQLException e) { logMetacat.error("XMLService.populateRegisteredSchemaList - SQL Error: " + e.getMessage()); } finally { try { pstmt.close(); }// try catch (SQLException sqlE) { logMetacat.error("XMLSchemaService.populateRegisteredSchemaList - Error in XMLService.populateRegisteredSchemaList(): " + sqlE.getMessage()); } DBConnectionPool.returnDBConnection(conn, serialNumber); } } /* * Populate the list of registered no-namespace schemas. This reads all no-namespace schemas in the * xml_catalog table and then makes sure the schema actually exists and is * readable on disk. */ private void populateRegisteredNoNamespaceSchemaList() { DBConnection conn = null; int serialNumber = -1; PreparedStatement pstmt = null; ResultSet resultSet = null; registeredNoNamespaceSchemaList = new Vector(); // get the system id from the xml_catalog table for all schemas. String sql = "SELECT no_namespace_schema_location, system_id, format_id FROM xml_catalog where " + "entry_type ='" + XMLNoNamespaceSchema.getType()+ "'"; try { // check out DBConnection conn = DBConnectionPool .getDBConnection("XMLService.populateRegisteredNoNamespaceSchemaList"); serialNumber = conn.getCheckOutSerialNumber(); pstmt = conn.prepareStatement(sql); logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - Selecting schemas: " + pstmt.toString()); pstmt.execute(); resultSet = pstmt.getResultSet(); // make sure the schema actually exists on the file system. If so, // add it to the registered schema list. while (resultSet.next()) { String noNamespaceSchemaLocationURI = resultSet.getString(1); String fileLocation = resultSet.getString(2); String formatId = resultSet.getString(3); logMetacat.debug("XMLService.populateRegisteredNoNamespaceSchemaList - try to register schema: " + noNamespaceSchemaLocationURI + "(no namespace-schema-location-uri) " + fileLocation+ " and format id "+formatId); XMLNoNamespaceSchema xmlSchema = new XMLNoNamespaceSchema(noNamespaceSchemaLocationURI, fileLocation, formatId); if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) { continue;//skip the external schemas. } else { xmlSchema.setFileName(fileLocation); } if (FileUtil.getFileStatus(xmlSchema.getLocalFileDir()) >= FileUtil.EXISTS_READABLE) { registeredNoNamespaceSchemaList.add(xmlSchema); } else if(fileLocation.startsWith("http://") || fileLocation.startsWith("https://")) { //the schema resides on a different server, to validate, we need to go get it //registeredSchemaList.add(xmlSchema); logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + fileLocation + " resides on a different server. So we don't add it to the registered no-namespace schema list."); } else { logMetacat.warn("XMLService.populateRegisteredNoNamespaceSchemaList - Schema file: " + xmlSchema.getLocalFileDir() + " is registered " + " in the database but does not exist on the file system. So we don't add it to the registered no-namespace schema list."); } } } catch (SQLException e) { e.printStackTrace(); logMetacat.error("XMLService.populateRegisteredNoNamespaceSchemaList - SQL Error: " + e.getMessage()); } finally { try { pstmt.close(); }// try catch (SQLException sqlE) { logMetacat.error("XMLSchemaService.populateRegisteredNoNamespaceSchemaList - Error in close the pstmt: " + sqlE.getMessage()); } DBConnectionPool.returnDBConnection(conn, serialNumber); } } /** * create a space delimited string of all namespaces and locations * in the registered schema list. */ private static void createRegisteredNameSpaceAndLocationString() { boolean firstRowWithoutFormatid = true; boolean firstRowWithFormatid = true; nameSpaceAndLocationStringWithoutFormatId = ""; formatId_NamespaceLocationHash = new Hashtable(); for (XMLSchema xmlSchema : registeredSchemaList) { String formatId = xmlSchema.getFormatId(); if( formatId == null ||formatId.trim().equals("")) { //this is to handle the old way - no schema variants if (!firstRowWithoutFormatid) { nameSpaceAndLocationStringWithoutFormatId += " "; } nameSpaceAndLocationStringWithoutFormatId += xmlSchema.getFileNamespace() + " " + xmlSchema.getLocalFileUri(); firstRowWithoutFormatid = false; } else { //it has a format id on the xml_catalog table. It is a variant. if(!formatId_NamespaceLocationHash.containsKey(xmlSchema.getFormatId())) { //the hash table hasn't stored the value. So put it on the hash. formatId_NamespaceLocationHash.put(formatId, xmlSchema.getFileNamespace() + " " + xmlSchema.getLocalFileUri()); } else { //the hash table already has it. We will attache the new pair to the exist value String value = formatId_NamespaceLocationHash.get(formatId); value += " "+ xmlSchema.getFileNamespace() + " " + xmlSchema.getLocalFileUri(); formatId_NamespaceLocationHash.put(formatId, value); } } } } /** * create a lsit of all namespaces in the registered schema list. */ private static void createRegisteredNameSpaceList() { nameSpaceList = new Vector(); for (XMLSchema xmlSchema : registeredSchemaList) { nameSpaceList.add(xmlSchema.getFileNamespace()); } } /** * Checks to see that all schemas are registered. If a single one in the * list is not, this will return false. * * @param schemaList * a list of schemas as they appear in xml. * @return true if all schemas are registered. */ public static boolean areAllSchemasRegistered(Vector schemaList) { for (XMLSchema xmlSchema : schemaList) { if ( ! isSchemaRegistered(xmlSchema)) { return false; } } return true; } /** * Returns true if the schema is registered. * * @param schema * a single schema as it appears in xml * @return true if the schema is registered, false otherwise. */ public static boolean isSchemaRegistered(XMLSchema xmlSchema) { for (XMLSchema registeredXmlSchema : registeredSchemaList) { if (registeredXmlSchema.getLocalFileUri().equals( xmlSchema.getLocalFileUri()) && registeredXmlSchema.getFileNamespace().equals( xmlSchema.getFileNamespace())) { return true; } } return false; } /** * Test if the given namespace registered in Metacat * @param namespace the namespace will be tested * @return true if the namespace is registered; otherwise false. */ public static boolean isNamespaceRegistered(String namespace) { boolean registered = false; if(namespace != null && !namespace.trim().equals("")) { if(nameSpaceList != null && !nameSpaceList.isEmpty()) { for (String registeredNamespace : nameSpaceList) { logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Loop the registered namespaces in Metacat: "+ registeredNamespace+" to compare the given namespace "+namespace); if (registeredNamespace != null && registeredNamespace.equals(namespace)) { registered = true; break; } } } else { logMetacat.error("XMLSchemaService.isNamespaceRegistered - The registered namespace list is null or empty! So we will reject any document which needs validataion"); } } else { logMetacat.debug("XMLSchemaService.isNamespaceRegistered - The given namespace is null or blank. So it is not registered."); } logMetacat.debug("XMLSchemaService.isNamespaceRegistered - Is the namespace "+namespace+" registered in Metacat? "+registered); return registered; } /** * Get the namespace-schemaLocation pairs string based on given formatId and namespace. * The algorithm is: * 1. Look up all pairs of namespace--schemalocation for the given formatId in the xml_catalog table. If we find it, return all of the pairs. * 2. If we can't find anything on the step 1, look up the record for the given namespace. If we find it, return all of pairs namespace-location without formatid. * 3. Return null if we can't find anything. * @param formatId the given format id * @param namespace the given namespace * @return the string of the namespace-schemaLocation pairs (separated by white spaces). The null will be returned, if we can't find one. */ public String findNamespaceAndSchemaLocalLocation(String formatId, String namespace) throws MetacatException{ String location = null; location = getNameSpaceAndLocation(formatId); logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based the format id "+formatId+" is "+location); if(location == null) { //can't find it for given formId. Now we look up namespace logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the location based on the format id "+formatId+" is null and we will lookup the given namespace "+namespace); if(isNamespaceRegistered(namespace)) { location = getNameSpaceAndLocationStringWithoutFormatId(); logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is registered in Metacat"); } else { logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - the given namespace "+namespace+" is NOT registered in Metacat"); } } if(location == null) { logMetacat.error("XMLSchemaService.findNamespaceAndSchemaLocation - We can't find the schema location for the namespace "+namespace+" and format id "+formatId+". This means they are not registered in Metacat."); throw new MetacatException("The namespace "+namespace+" and the format id "+formatId+ " are not registered in the Metacat. So the object using the namespace was rejected since Metacat can't validate the xml instance. Please contact the Metacat operator to register them."); } logMetacat.debug("XMLSchemaService.findNamespaceAndSchemaLocation - The final location string for the namespace "+namespace+" and format id "+formatId+" is "+location); return location; } /** * Get the local (official) location for a no-namespace schema based on the given format id or no-name-space schema location uri. * The format id has the higher priority * 1. Compare the given format id with all registered no-namespace schema. If a match is found, return it. * 2. If the step 1 return null, compare the given noNamespaceSchemaLocationuri. * @param formatId * @param noNamespaceSchemaLocation * @return */ public String findNoNamespaceSchemaLocalLocation(String formatId, String noNamespaceSchemaLocation) throws MetacatException { String location = null; logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id for determining the schema local location is "+formatId); logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocationURI for determining the schema local location is "+noNamespaceSchemaLocation); if(registeredNoNamespaceSchemaList != null && !registeredNoNamespaceSchemaList.isEmpty()) { if((formatId != null && !formatId.trim().equals(""))) { logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given format id "+formatId+ "is not null and let's compare format id first."); for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) { if(schema != null) { String registeredFormatId = schema.getFormatId(); logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has the format id "+registeredFormatId); if(registeredFormatId != null && !registeredFormatId.trim().equals("")) { logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered format id "+registeredFormatId+ "is not null as well. Compare it"); if(formatId.equals(registeredFormatId)) { logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered format id is the same: "+formatId+". Match sucessfully!"); location = schema.getLocalFileUri(); break; } } } } } if(location == null) { logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - we can't find any regisered no-namespace schema has the foramtid "+formatId+ " (if it is null, this means there is no given format id.) Let's compare the noNamespaceSchemaLocaionURL which the given value is "+noNamespaceSchemaLocation); if(noNamespaceSchemaLocation != null && !noNamespaceSchemaLocation.trim().equals("")) { logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation+ "is not null and let's compare it."); for(XMLNoNamespaceSchema schema : registeredNoNamespaceSchemaList) { if(schema != null) { String registeredSchemaLocationURI = schema.getNoNamespaceSchemaLocation(); logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered no-namespace schema has noNamespaceSchemaLocation uri "+registeredSchemaLocationURI); if(registeredSchemaLocationURI != null && !registeredSchemaLocationURI.trim().equals("")) { logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the registered registeredSchemaLocation URI "+registeredSchemaLocationURI+ "is not null as well. Compare it"); if(noNamespaceSchemaLocation.equals(registeredSchemaLocationURI)) { logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the given and registered noNamespaceSchemaLocation is the same: "+noNamespaceSchemaLocation+". Match sucessfully!"); location = schema.getLocalFileUri(); break; } } } } } } } else { logMetacat.warn("XMLSchemaService.findNoNamespaceSchemaLocalLocation - there is no registered no-namespace schema in the Metacat"); } if(location == null) { logMetacat.error("XMLSchemaService.findNoNamespaceSchemaLocalLocation - We can't find Metacat local schema location for the noNamespaceLocation "+noNamespaceSchemaLocation+ " and format id "+formatId+". This means they are not registered in Metacat."); throw new MetacatException("The noNamespaceSchemaLocation "+noNamespaceSchemaLocation+" or the format id "+formatId+ " is not registered in the Metacat. So the object using them was rejected since Metacat can't validate the xml instance. Please contact the Metacat operator to register them."); } logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocalLocation - the schema location is "+location+" (if it is null, this means it is not registered) for the format id "+formatId+ " or noNamespaceSchemaLocation URI "+noNamespaceSchemaLocation); return location; } /** * See if schemas have been specified in the xml:schemalocation attribute. * If so, return a vector of the system ids. * * @param xml * the document we want to look in for schema location * @return a vector of XMLSchema objects, or an empty vector if none are * found */ public static Vector findSchemasInXML(StringReader xml) throws IOException { Logger logMetacat = Logger.getLogger(MetaCatServlet.class); Vector schemaList = new Vector(); // no xml. return empty vector if (xml == null) { logMetacat.debug("XMLSchemaService.findSchemasInXML - Returning empty schemaList."); return schemaList; } // Get the "second line" from the xml String targetLine = getSchemaLine(xml); // see if there is a match for xsi.schemaLocation. If so, extract the // schemas. if (targetLine != null) { String regex = "(\\p{Graph}*):schemaLocation=\"([^\"]*)\""; Pattern pattern = Pattern.compile(regex, Pattern.CASE_INSENSITIVE | Pattern.DOTALL); Matcher matcher = pattern.matcher(targetLine); int i = 0; while (i < targetLine.length()) { if (!matcher.find(i)) { break; } String uri = matcher.group(2); uri = StringUtil.replaceTabsNewLines(uri); uri = StringUtil.replaceDuplicateSpaces(uri); // each namespace could have several schema locations. parsedUri // will hold a list of uri and files. Vector parsedUri = StringUtil.toVector(uri, ' '); for (int j = 0; j < parsedUri.size(); j = j + 2) { if (j + 1 >= parsedUri.size()) { throw new IOException( "Odd number of elements found when parsing schema location: " + targetLine + ". There should be an even number of uri/files in location."); } String formatId = null; XMLSchema xmlSchema = new XMLSchema(parsedUri.get(j), parsedUri .get(j + 1), formatId); schemaList.add(xmlSchema); } i = matcher.end(); } } logMetacat.debug("XMLSchemaService.findSchemasInXML - Schemas for xml are " + schemaList.toString()); return schemaList; } /** * Returns the namespace for an xml document. * @param xml * the document to search * @return a string holding the namespace. Null will be returned if there is no namespace. * @throws SAXException * @throws PropertyNotFoundException */ public static String findDocumentNamespace(StringReader xml) throws IOException, PropertyNotFoundException, SAXException { String namespace = null; /*String eml2_0_0NameSpace = DocumentImpl.EML2_0_0NAMESPACE; String eml2_0_1NameSpace = DocumentImpl.EML2_0_1NAMESPACE; String eml2_1_0NameSpace = DocumentImpl.EML2_1_0NAMESPACE; String eml2_1_1NameSpace = DocumentImpl.EML2_1_1NAMESPACE;*/ if (xml == null) { logMetacat.debug("XMLSchemaService.findDocumentNamespace - XML doc is null. There is no namespace."); return namespace; } XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml); namespaceParser.parse(); namespace = namespaceParser.getNamespace(); /*String targetLine = getSchemaLine(xml); // the prefix is at the beginning of the doc String prefix = null; String regex1 = "^\\s*(\\p{Graph}+):\\p{Graph}*\\s+"; Pattern pattern = Pattern.compile(regex1, Pattern.CASE_INSENSITIVE); Matcher matcher = pattern.matcher(targetLine); if (matcher.find()) { prefix = matcher.group(1).trim(); } // if a prefix was found, we are looking for xmlns:="namespace" // if no prefix was found, we will look for the default namespace. String regex2; if (prefix != null) { logMetacat.debug("XMLSchemaService.findDocumentNamespace - found the prefix for the document "+prefix); regex2 = "xmlns:" + prefix + "=['\"]([^\"])*['\"]"; } else { //regex2 = "xmlns:.*=['\"](.*)['\"]"; logMetacat.debug("XMLSchemaService.findDocumentNamespace - can't found the prefix for the document, so we look for the default namespace"); regex2 = "xmlns=['\"](.*)['\"]"; } Pattern pattern2 = Pattern.compile(regex2, Pattern.CASE_INSENSITIVE); Matcher matcher2 = pattern2.matcher(targetLine); if (matcher2.find()) { logMetacat.debug("XMLSchemaService.findDocumentNamespace - it has either a prefix or a default namespace"); System.out.println("the match group 0"+" is "+matcher2.group()); namespace = matcher2.group(1); System.out.println("the match group "+" is "+namespace); if (namespace.indexOf(eml2_0_0NameSpace) != -1) { namespace = eml2_0_0NameSpace; } else if (namespace.indexOf(eml2_0_1NameSpace) != -1) { namespace = eml2_0_1NameSpace; } else if (namespace.indexOf(eml2_1_0NameSpace) != -1) { namespace = eml2_1_0NameSpace; } else if (namespace.indexOf(eml2_1_1NameSpace) != -1) { namespace = eml2_1_1NameSpace; } }*/ logMetacat.debug("XMLSchemaService.findDocumentNamespace - the namespace (null means no namespace) in the document is "+namespace); return namespace; } /** * Get the attribute value of the noNamespaceSchemaLcation of the given xml * @param xml the xml obect needs to be searched * @return the attribute value of the noNamespaceSchemaLcation. The null will return if it can't be found. * @throws SAXException * @throws PropertyNotFoundException * @throws IOException */ public static String findNoNamespaceSchemaLocationAttr(StringReader xml) throws PropertyNotFoundException, SAXException, IOException { String noNamespaceSchemaLocation = null; XMLNamespaceParser namespaceParser = new XMLNamespaceParser(xml); namespaceParser.parse(); noNamespaceSchemaLocation = namespaceParser.getNoNamespaceSchemaLocation(); logMetacat.debug("XMLSchemaService.findNoNamespaceSchemaLocation - the noNamespaceSchemaLocation (null means no namespace) in the document is "+noNamespaceSchemaLocation); return noNamespaceSchemaLocation; } /** * Return the line from xml that holds the metadata like namespace and * schema location * * @param xml * the document to parse * @return the "second" line of the document */ private static String getSchemaLine(StringReader xml) throws IOException { Logger logMetacat = Logger.getLogger(MetaCatServlet.class); // find the line String secondLine = null; int count = 0; final int TARGETNUM = 1; StringBuffer buffer = new StringBuffer(); boolean comment = false; boolean processingInstruction = false; char thirdPreviousCharacter = '?'; char secondPreviousCharacter = '?'; char previousCharacter = '?'; char currentCharacter = '?'; int tmp = xml.read(); while (tmp != -1) { currentCharacter = (char)tmp; //in a comment if (currentCharacter == '-' && previousCharacter == '-' && secondPreviousCharacter == '!' && thirdPreviousCharacter == '<') { comment = true; } //out of comment if (comment && currentCharacter == '>' && previousCharacter == '-' && secondPreviousCharacter == '-') { comment = false; } //in a processingInstruction if (currentCharacter == '?' && previousCharacter == '<') { processingInstruction = true; } //out of processingInstruction if (processingInstruction && currentCharacter == '>' && previousCharacter == '?') { processingInstruction = false; } //this is not comment or a processingInstruction if (currentCharacter != '!' && previousCharacter == '<' && !comment && !processingInstruction) { count++; } // get target line if (count == TARGETNUM && currentCharacter != '>') { buffer.append(currentCharacter); } if (count == TARGETNUM && currentCharacter == '>') { break; } thirdPreviousCharacter = secondPreviousCharacter; secondPreviousCharacter = previousCharacter; previousCharacter = currentCharacter; tmp = xml.read(); } secondLine = buffer.toString(); logMetacat.debug("XMLSchemaService.getSchemaLine - the second line string is: " + secondLine); xml.reset(); return secondLine; } /** * Get a schema file name from the schema uri. * * @param uri * the uri from which to extract the file name * @return a string holding the file name */ public static String getSchemaFileNameFromUri(String uri) { // get filename from systemId String filename = uri; if (filename != null && !(filename.trim()).equals("")) { int slash = Math.max(filename.lastIndexOf('/'), filename.lastIndexOf('\\')); if (slash > -1) { filename = filename.substring(slash + 1); } } return filename; } /** * Get a base url from the schema url. If url=http://www.example.com/example.xsd, * http://www.example.com/ will be returned. * * @param uri * the uri from which to extract the base url * @return a string holding the base url. null will be return if it is not url. */ public static String getBaseUrlFromSchemaURL(String url) { String baseURL = null; if (url != null && (url.indexOf("http://") != -1 || url.indexOf("https://") !=-1)) { int slash = url.lastIndexOf('/'); if (slash > -1) { baseURL = url.substring(0,slash+1); } } return baseURL; } }