An Introduction to DOM Parser (with JAXP)
by K. Yue
Created: June 2002
copyright 2002-4
0. Resources
1. Introduction
2. The JAXP DOM API
Example:
RootElementName.java: print out the name of the root element of the XML document.
import javax.xml.parsers.*;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import java.io.*;
import org.w3c.dom.*;
public class RootElementName{
static Document document;
public static void main(String argv[])
{
if (argv.length != 1) {
System.err.println("Usage:
java RootElementName xmlfilename");
System.exit(1);
}
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
try {
DocumentBuilder
builder = factory.newDocumentBuilder();
document = builder.parse(new
File(argv[0]));
System.out.println("Root
element: " +
document.getDocumentElement().getTagName() + "\n");
} catch (Throwable e) {
e.printStackTrace();
}
} // main
}
Try it on:
<?xml version="1.0" encoding="UTF-8"?>
<root a="1" b="2" c="3">
<!-- Some comments -->
<?somePi ?>
<p x="1">
Hello
<q />
<r />
<s x="1" y="2"><u x="1">ok</u></s>
Good bye
</p>
<p x="1" y="2" z="3" w="4" />
</root>
2.1 The DOM parser
Example:
Using properties of DocumentBuilderFactory and DocumentBuilder. (This is not a complete program.)
public static void main(String argv[])
{
if (argv.length < 1) {
System.err.println("Usage: Java yourprogram
xmlfilename");
System.exit(1);
}
try {
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
// Set builder properties.
factory.setCoalescing(true);
factory.setIgnoringElementContentWhitespace(true);
DocumentBuilder builder = factory.newDocumentBuilder();
// Set Error Handler
builder.setErrorHandler(new yourprogram());
document = builder.parse(new File(argv[0]));
System.out.println("XML contents ==>
\n\n");
// process the document.
process(document.getDocumentElement(), 0);
} catch (Throwable e) {
e.printStackTrace();
}
} // main
Example:
To create a DOM parser that is validating:
public class MyProg extends DefaultHandler {
static Document document;
public static void main(String argv[]) {
// ...
try {
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
// ...
// Set builder property.
factory.setValidating(true);
DocumentBuilder builder = factory.newDocumentBuilder();
// Set Error Handler
builder.setErrorHandler(new MyProg());
// ...
2.2 Document and DocumentFragment
DocumentFragment
Example:
Running
DocumentRoot.java: display the children of the document root.
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.io.*;
import org.w3c.dom.*;
public class DocumentRoot extends DefaultHandler {
static Document document;
public static void main(String argv[])
{
if (argv.length != 1) {
System.err.println("Usage:
Java DocumentRoot xmlfilename");
System.exit(1);
}
try {
DocumentBuilderFactory
factory =
DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
// Set Error Handler
builder.setErrorHandler(new
DocumentRoot());
document = builder.parse(new
File(argv[0]));
System.out.println("Number
of children of the document root ==> " +
document.getChildNodes().getLength());
NodeList nodes = document.getChildNodes();
for (int i=0; i<nodes.getLength()
; i++)
{ System.out.println("Child
" + i + " type " + nodes.item(i).getNodeType() + " => " + nodes.item(i));
}
} catch (Throwable e) {
e.printStackTrace();
}
} // main
}
on
<?xml version="1.0"?>
<!DOCTYPE familytree SYSTEM "ft.dtd">
<!-- This xml has comments and pi before root node. -->
<?br ?>
<familytree>
<!-- other contents -->
</familytree>
gives:
Number of children of the document root ==> 4
Child 0 type 10 => [familytree: null]
Child 1 type 8 => [#comment: This xml has comments and pi before root node.
]
Child 2 type 7 => [br: ]
Child 3 type 1 => [familytree: null]
2.3 The Node Interface
Example:
short nodeType = node.getNodeType();
if (nodeType == Node.ELEMENT_NODE) {
// ...
Example
Use Document and DocumentFragment to create a DOM document.
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.io.*;
import org.w3c.dom.*;
public class DocumentFragmentTest extends DefaultHandler {
static Document document;
public static void main(String argv[])
{
try {
DocumentBuilderFactory
factory =
DocumentBuilderFactory.newInstance();
// Set builder
property.
factory.setValidating(true);
factory.setCoalescing(true);
factory.setIgnoringElementContentWhitespace(true);
DocumentBuilder builder = factory.newDocumentBuilder();
// Set Error Handler
document = builder.newDocument();
DocumentFragment df = document.createDocumentFragment();
Element root = document.createElement("examples");
for (int i=0; i<5; i++)
{ Element node
= document.createElement("exam");
Text text
= document.createTextNode("quotation #" + (i+1));
node.appendChild(text);
df.appendChild(node);
}
root.appendChild(df);
document.appendChild(root);
// print contents
printNode(document, 0);
} catch (Throwable e) {
//
e.printStackTrace();
}
} // main
// print the content of a node.
private static void printNode(Node node, int level)
{ short nodeType = node.getNodeType();
if (nodeType == Node.ELEMENT_NODE)
{ int i;
printSpaces(level * 3);
System.out.print("ELement:
" + ((Element) node).getTagName() + "\n");
NamedNodeMap attributes = node.getAttributes();
int numAttributes = attributes.getLength();
for (i=0; i<numAttributes;
i++)
{
printSpaces((level+1)
* 3);
Attr attr
= (Attr) attributes.item(i);
System.out.println("Attribute:
" + attr.getName() + ": " + attr.getValue());
}
NodeList childNodes = node.getChildNodes();
int numChildNodes = childNodes.getLength();
for (i=0; i< numChildNodes;
i++)
{
printNode(childNodes.item(i),
level+1);
}
}
else if (nodeType == Node.TEXT_NODE || nodeType
== Node.CDATA_SECTION_NODE || nodeType == Node.COMMENT_NODE)
{ printSpaces(level * 3);
String data = ((CharacterData)
node).getData();
System.out.print((nodeType
== Node.TEXT_NODE) ? "text" : ((nodeType == node.CDATA_SECTION_NODE) ? "cdata"
: "comment"));
System.out.println("(" + data.length()
+ "): \"" + data + "\"");
}
else if (nodeType == Node.PROCESSING_INSTRUCTION_NODE)
{ printSpaces(level * 3);
System.out.print("Processing
Instruction: " + ((ProcessingInstruction) node).getTarget() + " => " +
((ProcessingInstruction) node).getData() + "\n");
}
else if (nodeType == Node.DOCUMENT_NODE)
{ // Document Root
System.out.println("Document
Root.");
NodeList childNodes = node.getChildNodes();
int numChildNodes = childNodes.getLength();
for (int i=0; i< numChildNodes;
i++)
{
printNode(childNodes.item(i),
level+1);
}
}
else if (nodeType == Node.DOCUMENT_TYPE_NODE)
{ // Document Root
printSpaces(level * 3);
System.out.println("DocType:
" + ((DocumentType) node).getName() +
", public
id: " + ((DocumentType) node).getPublicId() +
", system
id: " + ((DocumentType) node).getSystemId());
}
else if (nodeType == Node.ENTITY_NODE)
{ // Document Root
printSpaces(level * 3);
System.out.print("Entity: "
+ ((Entity) node).getNotationName() + "\n");
}
else if (nodeType == Node.ENTITY_REFERENCE_NODE)
{ // Document Root
printSpaces(level * 3);
System.out.print("Entity: "
+ ((EntityReference) node).getNodeName() + "\n");
}
} // printNode
private static void printSpaces(int count) {
int i;
for (i=0; i<count; i++)
{
System.out.print(" ");
}
}
}
Example:
NumberOfRootChildren.java:
The following version to count the number of children (text and element) of the root nodes may not work correctly. Why?
import javax.xml.parsers.*;
import org.xml.sax.*;
import org.xml.sax.helpers.*;
import java.io.*;
import org.w3c.dom.*;
//
public class NumberOfRootChildren extends DefaultHandler {
static Document document;
public static void main(String argv[])
{
if (argv.length != 1) {
System.err.println("Usage:
Java NumberOfRootChildren xmlfilename");
System.exit(1);
}
try {
DocumentBuilderFactory
factory =
DocumentBuilderFactory.newInstance();
// Set builder
property.
factory.setValidating(false);
factory.setCoalescing(true);
factory.setIgnoringElementContentWhitespace(true);
DocumentBuilder builder = factory.newDocumentBuilder();
// Set Error Handler
builder.setErrorHandler(new
NumberOfRootChildren());
document = builder.parse(new
File(argv[0]));
Element root = document.getDocumentElement();
System.out.println("Number
of children of the root element => " +
root.getChildNodes().getLength());
} catch (Throwable e) {
//
e.printStackTrace();
}
} // main
}
Can you spot the problems and correct it?
2.4 The Element interface
2.5 Other interfaces
Example:
NumberOfElement.java: counts the number of elements in an XML document. Can you write a version to count the number of attributes?
import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.parsers.*;
import java.io.*;
import org.apache.xml.serialize.*;
/**
Show the number of elements in an XML document
*/
public class NumberOfElements {
static Document document;
// main body
public static void main(String argv[])
{
if (argv.length != 1) {
System.err.println("Usage:
Java NumberOfElements inputxmlfilename");
System.exit(1);
}
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
try {
// Parse input XML file.
DocumentBuilder builder = factory.newDocumentBuilder();
document = builder.parse(new
File(argv[0]));
System.out.println("Number
of elements in " + argv[0] +
" = " + numElements(document.getDocumentElement()));
} catch (Throwable e) {
// Print error
messages.
System.err.println("Sorry,
cannot parse the input XML file " + argv[0] + ".");
e.printStackTrace();
}
} // main
//
private static int numElements(org.w3c.dom.Element element) {
if (element == null) return 0;
int result = 1;
// self
NodeList childNodes = element.getChildNodes();
for (int i=0; i < childNodes.getLength();
i++) {
if (childNodes.item(i).getNodeType()
== Node.ELEMENT_NODE) {
result +=
numElements((Element) childNodes.item(i));
} //
if
} // for
return result;
} // numElements
} // NumberOfElements
Example:
Insert a B element before every A element then print out the total number of elements in the result XML tree.
import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.parsers.*;
import java.io.*;
import org.apache.xml.serialize.*;
/**
Show the number of elements in an XML document
*/
public class AddBBeforeA {
static Document document;
// main body
public static void main(String argv[])
{
if (argv.length != 1) {
System.err.println("Usage:
Java NumberOfElements inputxmlfilename");
System.exit(1);
}
DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
try {
// Parse input XML file.
DocumentBuilder builder = factory.newDocumentBuilder();
document = builder.parse(new
File(argv[0]));
Element root = document.getDocumentElement();
System.out.println("Number
of elements in " + argv[0] +
" initially = " + numElements(root));
addBBeforeA(root);
System.out.println("Number
of elements in " + argv[0] +
" Before adding <B> Before <A> = " + numElements(root));
} catch (Throwable e) {
// Print error
messages.
System.err.println("Sorry,
cannot parse the input XML file " + argv[0] + ".");
e.printStackTrace();
}
} // main
//
private static int numElements(org.w3c.dom.Element element) {
if (element == null) return 0;
int result = 1;
// self
NodeList childNodes = element.getChildNodes();
for (int i=0; i < childNodes.getLength();
i++) {
if (childNodes.item(i).getNodeType()
== Node.ELEMENT_NODE) {
result +=
numElements((Element) childNodes.item(i));
} //
if
} // for
return result;
} // numElements
//
private static void addBBeforeA (org.w3c.dom.Element element) {
if (element == null) return;
NodeList childNodes = element.getChildNodes();
Element bNode = document.createElement("B");
int numChildren = childNodes.getLength();
for (int i=0; i < numChildren; i++) {
if (childNodes.item(i).getNodeType()
== Node.ELEMENT_NODE &&
"A".equals(((Element)
childNodes.item(i)).getNodeName())) {
element.insertBefore(bNode,
childNodes.item(i));
} //
if
} // for
} // addBBeforeA
} // AddBBeforeA
For input file:
<?xml version="1.0" ?>
<root>
<C />
<A />
<C />
<D />
</root>
The output is:
Number of elements in AddBBeforeAtest1.xml initially = 5
Number of elements in AddBBeforeAtest1.xml Before adding <B> Before <A>
= 6
Can you spot any potential problems?
3. Other DOM Topics
3.1 Namespace Support
Dr. Kwok-Bun Yue
Professor, Computer Science and Computer Information Systems
Chair, Division of Computing and Mathematics
University of Houston-Clear Lake
2700 Bay Area Boulevard
Houston, TX 77058
Yue's home page
yue@uhcl.edu
281-283-3864