An Introduction to DOM Parser (with JAXP)

by K. Yue
Created: June 2002
copyright 2002-4

0. Resources

1. Introduction

2. The JAXP DOM API

Example:

RootElementName.java: print out the name of the root element of the XML document.

import javax.xml.parsers.*;

import org.xml.sax.SAXException; 
import org.xml.sax.SAXParseException; 

import java.io.*;
import org.w3c.dom.*;

public class RootElementName{
    static Document document;

    public static void main(String argv[])
    {
        if (argv.length != 1) {
            System.err.println("Usage: java RootElementName xmlfilename");
            System.exit(1);
        }

        DocumentBuilderFactory factory =
            DocumentBuilderFactory.newInstance();
        try {
           DocumentBuilder builder = factory.newDocumentBuilder();
           document = builder.parse(new File(argv[0]));
           System.out.println("Root element: " +
              document.getDocumentElement().getTagName() + "\n");
        } catch (Throwable e) {
           e.printStackTrace();
        }
    } // main
}

Try it on:

<?xml version="1.0" encoding="UTF-8"?>
<root a="1" b="2" c="3">
   <!-- Some comments -->
   <?somePi ?>
   <p x="1">
      Hello
      <q />
      <r />
      <s x="1" y="2"><u x="1">ok</u></s>
      Good bye
   </p>
   <p x="1" y="2" z="3" w="4" />
</root>

2.1 The DOM parser

Example:

Using properties of DocumentBuilderFactory and DocumentBuilder. (This is not a complete program.)

public static void main(String argv[])
{
   if (argv.length < 1) {
      System.err.println("Usage: Java yourprogram xmlfilename");
      System.exit(1);
   }

   try {
      DocumentBuilderFactory factory =
         DocumentBuilderFactory.newInstance();

      // Set builder properties.
      factory.setCoalescing(true);
      factory.setIgnoringElementContentWhitespace(true);

      DocumentBuilder builder = factory.newDocumentBuilder();

      // Set Error Handler
      builder.setErrorHandler(new yourprogram());

      document = builder.parse(new File(argv[0]));
      System.out.println("XML contents ==> \n\n");
      // process the document.
      process(document.getDocumentElement(), 0);
    } catch (Throwable e) {
      e.printStackTrace();
   }
} // main

Example:

To create a DOM parser that is validating:

public class MyProg extends DefaultHandler {
   static Document document;

   public static void main(String argv[]) {
   // ...
   try {
      DocumentBuilderFactory factory =
      DocumentBuilderFactory.newInstance();
      // ...
      // Set builder property.
      factory.setValidating(true);

      DocumentBuilder builder = factory.newDocumentBuilder();
      // Set Error Handler
      builder.setErrorHandler(new MyProg());
      // ...

2.2 Document and DocumentFragment

DocumentFragment

Example:

Running

DocumentRoot.java: display the children of the document root.

import javax.xml.parsers.*;

import org.xml.sax.*; 
import org.xml.sax.helpers.*; 

import java.io.*;
import org.w3c.dom.*;

public class DocumentRoot extends DefaultHandler {
   static Document document;

   public static void main(String argv[])
   {
      if (argv.length != 1) {
         System.err.println("Usage: Java DocumentRoot xmlfilename");
         System.exit(1);
      }

      try {
           DocumentBuilderFactory factory =
              DocumentBuilderFactory.newInstance();

         DocumentBuilder builder = factory.newDocumentBuilder();

         //   Set Error Handler
         builder.setErrorHandler(new DocumentRoot());
           document = builder.parse(new File(argv[0]));
         System.out.println("Number of children of the document root ==> " +
            document.getChildNodes().getLength());

         NodeList nodes = document.getChildNodes();
         for (int i=0; i<nodes.getLength() ; i++)
         {   System.out.println("Child " + i + " type " + nodes.item(i).getNodeType() + " => " + nodes.item(i));
         }
       } catch (Throwable e) {

        e.printStackTrace();
      }
   } // main
}

on

<?xml version="1.0"?>
<!DOCTYPE familytree SYSTEM "ft.dtd">
<!-- This xml has comments and pi before root node. -->
<?br ?>

<familytree>
<!-- other contents -->
</familytree>

gives:

Number of children of the document root ==> 4
Child 0 type 10 => [familytree: null]
Child 1 type 8 => [#comment: This xml has comments and pi before root node. ]
Child 2 type 7 => [br: ]
Child 3 type 1 => [familytree: null]

2.3 The Node Interface

Example:

short nodeType = node.getNodeType();
if (nodeType == Node.ELEMENT_NODE) {
   // ...

Example

Use Document and DocumentFragment to create a DOM document.

import javax.xml.parsers.*;

import org.xml.sax.*; 
import org.xml.sax.helpers.*; 

import java.io.*;
import org.w3c.dom.*;

public class DocumentFragmentTest extends DefaultHandler {
    static Document document;

    public static void main(String argv[])
    {
        try {
           DocumentBuilderFactory factory =
              DocumentBuilderFactory.newInstance();

         //   Set builder property.
         factory.setValidating(true);
         factory.setCoalescing(true);
         factory.setIgnoringElementContentWhitespace(true);

         DocumentBuilder builder = factory.newDocumentBuilder();

         //   Set Error Handler
           document = builder.newDocument();
         DocumentFragment df = document.createDocumentFragment();
         Element root = document.createElement("examples");
         for (int i=0; i<5; i++)
         {   Element node = document.createElement("exam");
            Text text = document.createTextNode("quotation #" + (i+1));
            node.appendChild(text);
            df.appendChild(node);
         }
           root.appendChild(df);
         document.appendChild(root);
         //  print contents
         printNode(document, 0);

      } catch (Throwable e) {
           //
           e.printStackTrace();
        }
    } // main

   // print the content of a node.
   private static void printNode(Node node, int level)
   {   short nodeType = node.getNodeType();
      if (nodeType == Node.ELEMENT_NODE)
      {   int i;
         printSpaces(level * 3);
         System.out.print("ELement: " + ((Element) node).getTagName() + "\n");

         NamedNodeMap attributes = node.getAttributes();
         int numAttributes = attributes.getLength();
         for (i=0; i<numAttributes; i++)
         {
            printSpaces((level+1) * 3);
            Attr attr = (Attr) attributes.item(i);
            System.out.println("Attribute: " + attr.getName() + ": " + attr.getValue());
         }  

         NodeList childNodes = node.getChildNodes();
         int numChildNodes = childNodes.getLength();
         for (i=0; i< numChildNodes; i++)
         {  
            printNode(childNodes.item(i), level+1);
         }
      }
      else if (nodeType == Node.TEXT_NODE || nodeType == Node.CDATA_SECTION_NODE || nodeType == Node.COMMENT_NODE)
      {   printSpaces(level * 3);
         String data = ((CharacterData) node).getData();
         System.out.print((nodeType == Node.TEXT_NODE) ? "text" : ((nodeType == node.CDATA_SECTION_NODE) ? "cdata" : "comment"));
         System.out.println("(" + data.length() + "): \"" + data + "\"");
      }
      else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE)
      {   printSpaces(level * 3);
         System.out.print("Processing Instruction: " + ((ProcessingInstruction) node).getTarget() + " => " +
               ((ProcessingInstruction) node).getData() + "\n");
      }
      else if (nodeType == Node.DOCUMENT_NODE)
      {   //   Document Root
         System.out.println("Document Root.");
         NodeList childNodes = node.getChildNodes();
         int numChildNodes = childNodes.getLength();
         for (int i=0; i< numChildNodes; i++)
         {  
            printNode(childNodes.item(i), level+1);
         }
      }
      else if (nodeType == Node.DOCUMENT_TYPE_NODE)
      {   //   Document Root
         printSpaces(level * 3);
         System.out.println("DocType: " + ((DocumentType) node).getName() +
            ", public id: " + ((DocumentType) node).getPublicId() +
            ", system id: " + ((DocumentType) node).getSystemId());
      }
      else if (nodeType == Node.ENTITY_NODE)
      {   //   Document Root
         printSpaces(level * 3);
         System.out.print("Entity: " + ((Entity) node).getNotationName() + "\n");
      }
      else if (nodeType == Node.ENTITY_REFERENCE_NODE)
      {   //   Document Root
         printSpaces(level * 3);
         System.out.print("Entity: " + ((EntityReference) node).getNodeName() + "\n");
      }

   }   //   printNode

   private static void printSpaces(int count) {
      int i;
      for (i=0; i<count; i++)
      {
         System.out.print(" ");
      }
   }
}

Example:

NumberOfRootChildren.java:

The following version to count the number of children (text and element) of the root nodes may not work correctly. Why?

import javax.xml.parsers.*;

import org.xml.sax.*; 
import org.xml.sax.helpers.*; 

import java.io.*;
import org.w3c.dom.*;

//  
public class NumberOfRootChildren extends DefaultHandler {
    static Document document;

    public static void main(String argv[])
    {
        if (argv.length != 1) {
            System.err.println("Usage: Java NumberOfRootChildren xmlfilename");
            System.exit(1);
        }

        try {
           DocumentBuilderFactory factory =
              DocumentBuilderFactory.newInstance();

         //   Set builder property.
         factory.setValidating(false);
         factory.setCoalescing(true);

         factory.setIgnoringElementContentWhitespace(true);

         DocumentBuilder builder = factory.newDocumentBuilder();

         //   Set Error Handler
         builder.setErrorHandler(new NumberOfRootChildren());
           document = builder.parse(new File(argv[0]));
         Element root = document.getDocumentElement();

         System.out.println("Number of children of the root element => " +
            root.getChildNodes().getLength());
        } catch (Throwable e) {
           //
           e.printStackTrace();
        }
    } // main
}

Can you spot the problems and correct it?

2.4 The Element interface

2.5 Other interfaces

Example:

NumberOfElement.java: counts the number of elements in an XML document. Can you write a version to count the number of attributes?

import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.parsers.*;

import java.io.*;
import org.apache.xml.serialize.*;

/**

   Show the number of elements in an XML document
*/
public class NumberOfElements {
   static Document document;

   //   main body
   public static void main(String argv[])
   {
      if (argv.length != 1) {
         System.err.println("Usage: Java NumberOfElements inputxmlfilename");
         System.exit(1);
      }

      DocumentBuilderFactory factory =
         DocumentBuilderFactory.newInstance();
      try {
         //  Parse input XML file.
         DocumentBuilder builder = factory.newDocumentBuilder();
         document = builder.parse(new File(argv[0]));

         System.out.println("Number of elements in " + argv[0] +
               " = " +   numElements(document.getDocumentElement()));
      } catch (Throwable e) {
         //   Print error messages.
         System.err.println("Sorry, cannot parse the input XML file " + argv[0] + ".");
         e.printStackTrace();
      }
   } // main

   //  
   private static int numElements(org.w3c.dom.Element element) {
      if (element == null) return 0;

      int result = 1;      //   self
      NodeList childNodes = element.getChildNodes();
      for (int i=0; i < childNodes.getLength(); i++) {
         if (childNodes.item(i).getNodeType() == Node.ELEMENT_NODE) {
            result += numElements((Element) childNodes.item(i));
         }   //   if
      }   //   for
      return result;
   }   //   numElements
}   //   NumberOfElements

Example:

Insert a B element before every A element then print out the total number of elements in the result XML tree.

import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.parsers.*;

import java.io.*;
import org.apache.xml.serialize.*;

/**

   Show the number of elements in an XML document
*/
public class AddBBeforeA {
   static Document document;

   //   main body
   public static void main(String argv[])
   {
      if (argv.length != 1) {
         System.err.println("Usage: Java NumberOfElements inputxmlfilename");
         System.exit(1);
      }

      DocumentBuilderFactory factory =
         DocumentBuilderFactory.newInstance();
      try {
         //  Parse input XML file.
         DocumentBuilder builder = factory.newDocumentBuilder();
         document = builder.parse(new File(argv[0]));

         Element root = document.getDocumentElement();
         System.out.println("Number of elements in " + argv[0] +
               " initially = " +   numElements(root));
         addBBeforeA(root);
         System.out.println("Number of elements in " + argv[0] +
               " Before adding <B> Before <A> = " +   numElements(root));
      } catch (Throwable e) {
         //   Print error messages.
         System.err.println("Sorry, cannot parse the input XML file " + argv[0] + ".");
         e.printStackTrace();
      }
   } // main

   //  
   private static int numElements(org.w3c.dom.Element element) {
      if (element == null) return 0;

      int result = 1;      //   self
      NodeList childNodes = element.getChildNodes();
      for (int i=0; i < childNodes.getLength(); i++) {
         if (childNodes.item(i).getNodeType() == Node.ELEMENT_NODE) {
            result += numElements((Element) childNodes.item(i));
         }   //   if
      }   //   for
      return result;
   }   //   numElements

   //
   private static void addBBeforeA (org.w3c.dom.Element element) {
      if (element == null) return;
      NodeList childNodes = element.getChildNodes();
      Element bNode = document.createElement("B");

      int numChildren = childNodes.getLength();
           
      for (int i=0; i < numChildren; i++) {
         if (childNodes.item(i).getNodeType() == Node.ELEMENT_NODE &&
            "A".equals(((Element) childNodes.item(i)).getNodeName())) {
            element.insertBefore(bNode, childNodes.item(i));
         }   //   if
      }   //   for
   }   //   addBBeforeA
  
}   //   AddBBeforeA

For input file:

<?xml version="1.0" ?>
<root>
   <C />
   <A />
   <C />
   <D />
</root>

The output is:

Number of elements in AddBBeforeAtest1.xml initially = 5
Number of elements in AddBBeforeAtest1.xml Before adding <B> Before <A> = 6

Can you spot any potential problems?

3. Other DOM Topics

3.1 Namespace Support



Dr. Kwok-Bun Yue
Professor, Computer Science and Computer Information Systems
Chair, Division of Computing and Mathematics
University of Houston-Clear Lake
2700 Bay Area Boulevard
Houston, TX 77058
Yue's Home  Yue's home page     Yue's email  yue@uhcl.edu     phone  281-283-3864