/** * '$RCSfile: EMLParser.java,v $' * Copyright: 1997-2002 Regents of the University of California, * University of New Mexico, and * Arizona State University * Sponsors: National Center for Ecological Analysis and Synthesis and * Partnership for Interdisciplinary Studies of Coastal Oceans, * University of California Santa Barbara * Long-Term Ecological Research Network Office, * University of New Mexico * Center for Environmental Studies, Arizona State University * Other funding: National Science Foundation (see README for details) * The David and Lucile Packard Foundation * For Details: http://knb.ecoinformatics.org/ * * '$Author: walbridge $' * '$Date: 2008-11-05 21:08:45 $' * '$Revision: 1.16 $' * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ package org.ecoinformatics.eml; import java.io.*; import java.net.URL; import java.util.*; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.ext.DeclHandler; import org.xml.sax.ext.LexicalHandler; import org.xml.sax.helpers.DefaultHandler; import org.xml.sax.helpers.AttributesImpl; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLReaderFactory; import org.xml.sax.InputSource; import org.apache.xpath.XPathAPI; import org.apache.xerces.parsers.DOMParser; import org.w3c.dom.Attr; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.NodeList; import org.w3c.dom.Document; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.DocumentType; import org.apache.xerces.dom.DocumentTypeImpl; import org.apache.xpath.objects.XObject; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.*; import javax.xml.transform.stream.*; import javax.xml.transform.dom.*; import edu.ucsb.nceas.configxml.*; /** * This is a SAX parser to validate EML packages. This parser will validate * an EML package with references based on the following rules: * */ public class EMLParser { private String parserName; private ConfigXML config; private Key[] keys; private Keyref[] keyrefs; private Hashtable idHash = new Hashtable(); private Hashtable idrefHash = new Hashtable(); private File xml; /** * parses an eml file * @param xml the eml input stream to parse */ public EMLParser(File xml) { //this(xml, new File("lib/config.xml")); this.xml = xml; URL configFile = getClass().getResource("/config.xml"); try { config = new ConfigXML(configFile.openStream()); } catch(Exception e) { throw new EMLParserException("Config file not found: " + e.getMessage()); } parseConfig(); parseKeys(); parseKeyrefs(); } /** * parses an eml file * @param xml the eml file to parse * @param configFile the alternate config file to use */ public EMLParser(File xml, File configFile) throws EMLParserException { this.xml = xml; try { config = new ConfigXML(configFile.getAbsolutePath()); } catch(Exception e) { throw new EMLParserException("Config file not found: " + e.getMessage()); } parseConfig(); parseKeys(); parseKeyrefs(); } /** * parses an eml reader * @param xmlReader the xml need to parse * @param configFile the alternate config file to use */ public EMLParser(String xmlString) throws EMLParserException, IOException { if (xmlString == null || xmlString.equals("")) { throw new EMLParserException("The string need to be parse is null"); } URL configFile = getClass().getResource("/config.xml"); try { config = new ConfigXML(configFile.openStream()); } catch(Exception e) { throw new EMLParserException("Config file not found: " + e.getMessage()); } // catch the String reader parseConfig(); parseKeys(xmlString); parseKeyrefs(xmlString); } /** * make sure all ids are unique and hash the keys */ private void parseKeys() { for(int i=0; i"); s.push(node); } while(!s.empty()) { String node = (String)s.pop(); xml.append(""); } return builder.parse(new InputSource(new StringReader(xml.toString()))); } catch(Exception e) { throw new EMLParserException("Error building document fragment: " + e.getMessage()); } } private void resolveKeys() { } /** * Gets the content of a path in an xml file(form input stream) */ public static NodeList getPathContent(InputStream is, String xpath) throws Exception { InputSource in = new InputSource(is); return getPathContent(in, xpath); } /** * Gets the conten of a path in an xml document(from Reader) */ public static NodeList getPathContent(StringReader read, String xpath) throws Exception { InputSource in = new InputSource(read); DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); dfactory.setNamespaceAware(false); Document doc = dfactory.newDocumentBuilder().parse(in); // Set up an identity transformer to use as serializer. Transformer serializer = TransformerFactory.newInstance().newTransformer(); serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); // Use the simple XPath API to select a nodeIterator. NodeList nl = XPathAPI.selectNodeList(doc, xpath); return nl; //return getPathContent(in, xpath); } private static NodeList getPathContent(InputSource in, String xpath) throws Exception { DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); dfactory.setNamespaceAware(false); Document doc = dfactory.newDocumentBuilder().parse(in); // Set up an identity transformer to use as serializer. Transformer serializer = TransformerFactory.newInstance().newTransformer(); serializer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); // Use the simple XPath API to select a nodeIterator. NodeList nl = XPathAPI.selectNodeList(doc, xpath); return nl; } private void parseConfig() { try { //parse the keys and keyrefs out of the config file NodeList keyNL = config.getPathContent("//key"); keys = new Key[keyNL.getLength()]; NodeList keyrefNL = config.getPathContent("//keyref"); keyrefs = new Keyref[keyrefNL.getLength()]; //get the keys for(int i=0; i 0 && !args[0].equals("-q")) { System.out.println("EML Parser version 1.0"); System.out.println("Note that this parser DOES NOT VALIDATE your eml file "); System.out.println("agains the schema. It only validates the ids and "); System.out.println("references. To validate your eml file against the "); System.out.println("schema, use SAXValidate or another xml parser."); System.out.println("Usage: java org.ecoinformatics.eml.EMLParser [-q] [] "); System.out.println("-----------------------------------------------------------------------"); } if(args.length > 3) { System.out.println("Invalid number of arguments."); } String configfile = ""; String emlfile = ""; if(args.length == 3) { configfile = args[1]; emlfile = args[2]; System.out.println("emlfile: " + emlfile + " configfile: " + configfile); } else if(args.length == 2) { if(args[0].equals("-q")) { emlfile = args[1]; } else { configfile = args[0]; emlfile = args[1]; } } else if(args.length == 1) { emlfile = args[0]; } else if(args.length == 0) { System.out.println("Usage: java org.ecoinformatics.eml.EMLParser [-q] [] "); System.out.println(" -q = quiet mode, little or no output"); System.out.println(" = use an alternate config file. The default is lib/config.xml"); System.out.println(" = the EML file to parse"); System.exit(0); } try { if(configfile.equals("")) { EMLParser parser = new EMLParser(new File(emlfile)); FileReader xmldoc = new FileReader(emlfile); char [] ch = new char [4096]; StringWriter writer = new StringWriter(); int readNum = xmldoc.read(ch); while (readNum != -1) { writer.write(ch, 0, readNum); readNum = xmldoc.read(ch); } String str = writer.toString(); EMLParser readerParser = new EMLParser(str); } else { EMLParser parser = new EMLParser(new File(emlfile), new File(configfile)); } System.out.println(emlfile + " has valid ids and references."); } catch(Exception e) { System.out.println("Error: " + e.getMessage()); } } /** * class to represent a key */ private class Key { protected String selector; //xpath expression for the selector protected String field; //xpath expression for the field in the selector protected String name; //name of the key Key(String name, String selector, String field) { this.name = name; this.selector = selector; this.field = field; } public String toString() { String s = "name: " + name + " selector: " + selector + " field: " + field; return s; } } /** * class to represent a keyref */ private class Keyref { protected String name; //name of the keyref protected String refer; //the key that we are refering to protected String selector; //the selector for the keyref protected String field; //the field in the selector Keyref(String name, String refer, String selector, String field) { this.name = name; this.refer = refer; this.selector = selector; this.field = field; } public String toString() { String s = "name: " + name + " refer: " + refer + " selector: " + selector + " field: " + field; return s; } } }