tonglin0325的个人主页

使用java原生API,DOM4J,JDOM和SAX解析XML文件

解析
 XML 有两种方式: SAX 和 DOM 。它们各有利弊。

        DOM 是把 XML 文档全部装载到内存中,然后当成一树进行处理。其好处是当成树处理起来比较方便,但弊端是如果 XML 文件比较大时,会对内存消耗比较大;

        SAX 是逐行扫描 XML 文档,逐行解析,而且可以在处理 XML 文档过程中的任意时刻中止处理过程,比如找到我们的目标节点,剩下的 XML 文档内容就可以不读了,直接结束。其弊端是操作起来相对不方便,而且对 XML 文档进行处理,如果修改、新增、删除等操作比较不方便。

        SAX 是事件驱动型 XML 解析的一个标准接口。它的工作原理是读到文档的开始与结束、标签元素的开始与结束、内容实体等地方时,触发相应的函数,我们就可以在相应的函数中进行我们所要进行的处理。

 

1.使用Java API解析DOM解析

 

 

只在跟节点下面建立一个子节点

1
2
3
4
5
6
7
8
9
10
11
12
13
<?xml version="1.0" encoding="UTF-8"?>
<addresslist>
<linkman>
<name>张三</name>
<email>www.baidu.com</email>
</linkman>

<linkman>
<name>李四</name>
<email>www.sina.com</email>
</linkman>
</addresslist>

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

//Function : DOM_demo
public class DOM_demo {

public static void main(String[] args) throws Exception{
// TODO 自动生成的方法存根
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); //建立DocumentBuilderFactory
DocumentBuilder builder = factory.newDocumentBuilder(); //建立DocumentBuilder
Document doc = null;
try{
doc = builder.parse("/home/common/software/coding/HelloWord/JavaWeb/bin/dom_name.xml");
}catch(SAXException e){
e.printStackTrace();
}catch(IOException e){
e.printStackTrace();
}
NodeList nl = doc.getElementsByTagName("name"); //查找name节点
System.out.println("姓名:"+nl.item(1).getFirstChild().getNodeValue()); //输出第1个节点的内容
}

}

 

一些DOM操作,循环输出节点信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

//Function : DOM_demo
public class DOM_demo {

public static void main(String[] args) throws Exception{
// TODO 自动生成的方法存根
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); //建立DocumentBuilderFactory
DocumentBuilder builder = factory.newDocumentBuilder(); //建立DocumentBuilder
Document doc = null;
try{
doc = builder.parse("/home/common/software/coding/HelloWord/JavaWeb/bin/dom_name.xml");
}catch(SAXException e){
e.printStackTrace();
}catch(IOException e){
e.printStackTrace();
}

// NodeList nl = doc.getElementsByTagName("name"); //查找name节点
// System.out.println("姓名:"+nl.item(1).getFirstChild().getNodeValue()); //输出第1个节点的内容

NodeList lm = doc.getElementsByTagName("linkman"); //查找linkman节点
for(int i=0;i<lm.getLength();i++){
Element e = (Element)lm.item(i); //取得每一个元素
System.out.println("姓名:"+e.getElementsByTagName("name").item(0).getFirstChild().getNodeValue());
System.out.println("邮箱:"+e.getElementsByTagName("email").item(0).getFirstChild().getNodeValue());
}
}

}

 

生成XML文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import java.io.File;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;

//Function : DOM_demo
public class DOM_demo {

public static void main(String[] args) throws Exception{
// TODO 自动生成的方法存根
//建立DocumentBuilderFactory
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
//建立DocumentBuilder
DocumentBuilder builder = factory.newDocumentBuilder();
//定义Document接口对象,通过DocumentBuilder类进行DOM树的转换操作
Document doc = null;
doc = builder.newDocument();

//建立各个操作节点
Element addresslist = doc.createElement("addresslist");
Element linkman = doc.createElement("linkman");
Element name = doc.createElement("name");
Element email = doc.createElement("email");
//设置节点的文本内容,即为每一个节点添加文本节点
name.appendChild(doc.createTextNode("王五"));
email.appendChild(doc.createTextNode("www.soho.com"));
//设置节点关系
linkman.appendChild(name); //子节点
linkman.appendChild(email); //子节点
addresslist.appendChild(linkman); //子节点
doc.appendChild(addresslist); //文档上保存节点
//输出文档到文件中
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = null;
try{
t = tf.newTransformer();
}catch(TransformerConfigurationException e1){
e1.printStackTrace();
}
t.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); //设置编码
DOMSource source = new DOMSource(doc); //输出文档
StreamResult result = new StreamResult(new File("/home/common/software/coding/HelloWord/JavaWeb/bin/dom_name_output.xml")) ;
try{
t.transform(source, result); //输出
}catch(TransformerException e){
e.printStackTrace();
}
}

}

 

2.使用DOM4J解析XML

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

//Function : DOM4JWriter
public class DOM4JWriter {

public static void main(String[] args) {
// TODO 自动生成的方法存根
Document doc = DocumentHelper.createDocument(); //创建文档

Element addresslist = doc.addElement("addresslist"); //定义节点
Element linkman = addresslist.addElement("linkman"); //定义子节点
Element name = addresslist.addElement("name"); //定义子节点
Element email = addresslist.addElement("email"); //定义子节点
name.setText("张三"); //设置name节点内容
email.setText("www.baidu.com"); //设置email节点内容
//设置输出格式
OutputFormat format = OutputFormat.createPrettyPrint();
//设置输出编码
format.setEncoding("UTF-8");
try{
XMLWriter writer = new XMLWriter(new FileOutputStream(new File("/home/common/software/coding/HelloWord/JavaWeb/bin/name_out.xml")),format);
writer.write(doc); //输出内容
writer.close(); //关闭输出流
}catch(IOException e){
e.printStackTrace();
}
}

}

 

解析输出文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import java.io.File;
import java.util.Iterator;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

//Function : DOM4JReader
public class DOM4JReader {

public static void main(String[] args) {
// TODO 自动生成的方法存根
File file = new File("/home/common/software/coding/HelloWord/JavaWeb/bin/name_out.xml");
SAXReader reader = new SAXReader(); //建立SAX解析读取
Document doc = null;
try{
doc = reader.read(file); //读取文档
}catch(DocumentException e){
e.printStackTrace();
}
Element root = doc.getRootElement(); //取得根元素
Iterator iter = root.elementIterator(); //取得全部的子节点
while(iter.hasNext()){
Element linkman = (Element) iter.next(); //取得每一个linkman
System.out.println("姓名:"+linkman.elementText("name"));
System.out.println("邮件:"+linkman.elementText("email"));
}

}

}

 

3.使用JDOM解析XML文件

 

JavaDOC的网址:http://www.jdom.org/docs/apidocs/index.html

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import java.io.FileOutputStream;

import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.output.XMLOutputter;
import org.xml.sax.Attributes;

public class WriteXML {

public static void main(String[] args) {
// TODO 自动生成的方法存根
//建立各个操作节点
Element addresslist = new Element("addresslist");
Element linkman = new Element("linkman");
Element name = new Element("name");
Element email = new Element("email");
//定义属性
Attribute id = new Attribute("id","zs");
//声明一个Document对象
Document doc = new Document(addresslist);
//设置元素的内容
name.setText("张三");
name.setAttribute(id); //设置name的属性
email.setText("www.baidu.com");
//设置linkman的子节点
linkman.addContent(name);
linkman.addContent(email);
//将linkman加入根节点中
addresslist.addContent(linkman);
//用来输出XML文件
XMLOutputter out = new XMLOutputter();
//设置输出的编码
out.setFormat(out.getFormat().setEncoding("UTF-8"));
//输出XML文件
try{
out.output(doc, new FileOutputStream("/home/common/software/coding/HelloWord/JavaWeb/bin/address.xml"));
}catch(Exception e){
e.printStackTrace();
}
}

}

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import java.util.List;

import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;

//Function : ReadXML
public class ReadXML {

public static void main(String[] args) throws Exception{
// TODO 自动生成的方法存根
//建立SAX解析
SAXBuilder builder = new SAXBuilder();
//找到Document
Document read_doc = builder.build("/home/common/software/coding/HelloWord/JavaWeb/bin/address.xml");
//读取根元素
Element stu = read_doc.getRootElement();
//得到全部linkman子元素
List list = stu.getChildren("linkman");
for(int i=0;i<list.size();i++){
Element e = (Element)list.get(i); //取得全部的linkman子元素
String name = e.getChildText("name");
String id = e.getChild("name").getAttribute("id").getValue();
String email = e.getChildText("email");

System.out.println("---------联系人---------");
System.out.println("姓名:"+name+",编号:"+id);
System.out.println("邮箱:"+email);
System.out.println("------------------");
}
}

}

 

4.使用SAX解析XML文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class MySAX extends DefaultHandler{ //定义SAX解析器
public void startDocument() throws SAXException{ //文档开始
System.out.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
}

public void endDocument() throws SAXException{ //文档结束
System.out.println("\n 文档读取结束。。。");
}

public void startElement(String uri,String localName,String name,Attributes attributes) throws SAXException{
System.out.print("<");
System.out.print(name);
if(attributes != null){
for(int i=0;i<attributes.getLength();i++){
System.out.print(" "+attributes.getQName(i)+"=\""+attributes.getValue(i)+"\"");
}
System.out.print(">");
}
}

public void character(char[] ch,int start,int lenght) throws SAXException{
System.out.print(new String(ch,start,lenght));
}

public void endElement(String uri,String localName,String name) throws SAXException{
System.out.print("</");
System.out.print(name);
System.out.print(">");
}

}

 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

public class testSAX {

public static void main(String[] args) throws Exception{
// TODO 自动生成的方法存根
//建立SAX解析工厂
SAXParserFactory factory = SAXParserFactory.newInstance();
//构造解析器
SAXParser parser = factory.newSAXParser();
//解析XML,使用HANDLER
parser.parse("/home/common/software/coding/HelloWord/JavaWeb/bin/dom_name.xml", new MySAX());
}

}