Share this page 

Validate XML using a DTDTag(s): XML


Consider this XML file howto.xml :
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE howto SYSTEM "howto.dtd">
<howto>
  <topic>
      <title>Java</title>
      <url>http://www.rgagnon.com/topics/java-xml.html</url>
  </topic>
    <topic>
      <title>PowerBuilder</title>
      <url>http://www.rgagnon.com/topics/pb-powerscript.htm</url>
  </topic>
  <topic>
        <title>Javascript</title>
        <url>http://www.rgagnon.com/topics/js-language.html</url>
  </topic>
  <topic>
        <title>VBScript</title>
        <url>http://www.rgagnon.com/topics/wsh-vbs.html</url>
  </topic>
</howto>
A referenced to the external DTD file howto.dtd is present.

The external howto.dtd :

<!ELEMENT howto (topic*)>
<!ELEMENT topic (title,url)>
<!ELEMENT title (#PCDATA)>
<!ELEMENT url (#PCDATA)>
NOTE : The DTD can be inside the XML document.
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE howto [
<!ELEMENT howto (topic*)>
<!ELEMENT topic (title,url)>
<!ELEMENT title (#PCDATA)>
<!ELEMENT url (#PCDATA)>
]>
<howto>
  <topic>
      <title>Java</title>
      <url>http://www.rgagnon.com/topics/java-xml.html</url>
  </topic>
...
</howto>

The code to validate an XML file using the declared DTD :

import java.io.IOException;
// DOM
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
// SAX
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.XMLReader;

import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.InputSource;

public class XMLUtils {

  private XMLUtils() {}
   
  // validate using DOM (DTD as defined in the XML)
  public static boolean validateWithDTDUsingDOM(String xml) 
    throws ParserConfigurationException, IOException
  {
    try {
      DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
      factory.setValidating(true);
      factory.setNamespaceAware(true);

      DocumentBuilder builder = factory.newDocumentBuilder();

      builder.setErrorHandler(
          new ErrorHandler() {
            public void warning(SAXParseException e) throws SAXException {
              System.out.println("WARNING : " + e.getMessage()); // do nothing
            }

            public void error(SAXParseException e) throws SAXException {
              System.out.println("ERROR : " + e.getMessage());
              throw e;
            }

            public void fatalError(SAXParseException e) throws SAXException {
              System.out.println("FATAL : " + e.getMessage());
              throw e;
            }
          }
          );
      builder.parse(new InputSource(xml));
      return true;
    }
    catch (ParserConfigurationException pce) {
      throw pce;
    } 
    catch (IOException io) {
      throw io;
    }
    catch (SAXException se){
      return false;
    }
  }
  
  
  // validate using SAX (DTD as defined in the XML)
  public static boolean validateWithDTDUsingSAX(String xml) 
    throws ParserConfigurationException, IOException
  {
    try {
      
      SAXParserFactory factory = SAXParserFactory.newInstance();
      factory.setValidating(true);
      factory.setNamespaceAware(true);

      SAXParser parser = factory.newSAXParser();

      XMLReader reader = parser.getXMLReader();
      reader.setErrorHandler(
          new ErrorHandler() {
            public void warning(SAXParseException e) throws SAXException {
              System.out.println("WARNING : " + e.getMessage()); // do nothing
            }

            public void error(SAXParseException e) throws SAXException {
              System.out.println("ERROR : " + e.getMessage());
              throw e;
            }

            public void fatalError(SAXParseException e) throws SAXException {
              System.out.println("FATAL : " + e.getMessage());
              throw e;
            }
          }
          );
      reader.parse(new InputSource( xml ));
      return true;
    }
    catch (ParserConfigurationException pce) {
      throw pce;
    } 
    catch (IOException io) {
      throw io;
    }
    catch (SAXException se){
      return false;
    }
  }
  
  public static void main (String args[]) throws Exception{ 
    
    System.out.println(XMLUtils.validateWithDTDUsingDOM("c:/temp/howto.xml"));
    System.out.println(XMLUtils.validateWithDTDUsingSAX("c:/temp/howto.xml"));
    /*
      output :
               true
               true
    */           
  }
}

NOTES :
  • The DOM is faster than SAX but DOM reads the entire structure in memory so the memory consumption is bigger.
  • DTD is the old way to validate an XML structure. The preferred way is to use an XML schema (XSD) which provides a more complete validation process.
  • To validate using an XML Schema (XSD), see this HowTo