码迷,mamicode.com
首页 > 编程语言 > 详细

python解析xml文件操作的例子

时间:2014-10-08 08:09:14      阅读:252      评论:0      收藏:0      [点我收藏+]

标签:des   style   blog   http   color   io   ar   for   文件   

python解析xml文件操作实例,操作XML文件的常见技巧

xml文件内容:

<?xml version="1.0" ?> 
<!--Simple xml document__chapter 8-->
<book> 
<title> 
sample xml thing 
</title> 
<author> 
<name> 
<first> 
ma 
</first> 
<last> 
xiaoju 
</last> 
</name> 
<affiliation> 
Springs Widgets, Inc. 
</affiliation> 
</author> 
<chapter number="1"> 
<title> 
First 
</title> 
<para> 
I think widgets are greate.You should buy lots of them forom 
<company> 
Spirngy Widgts, Inc 
</company> 
</para> 
</chapter> 
</book> 

python代码

from xml.dom import minidom, Node 
import re, textwrap ## www.jbxue.com

class SampleScanner: 
""""""

def __init__(self, doc): 
"""Constructor"""
assert(isinstance(doc, minidom.Document)) 
for child in doc.childNodes: 
if child.nodeType == Node.ELEMENT_NODE and \ 
child.tagName == "book": 
self.handle_book(child) 

def handle_book(self, node): 

for child in node.childNodes: 
if child.nodeType != Node.ELEMENT_NODE: 
continue
if child.tagName == "title": 
print "Book titile is:", self.gettext(child.childNodes) 
if child.tagName == "author": 
self.handle_author(child) 
if child.tagName == "chapter": 
self.handle_chapter(child) 

def handle_chapter(self, node): 
number = node.getAttribute("number") 
print "number:", number 
title_node = node.getElementsByTagName("title") 
print "title:", self.gettext(title_node) 

for child in node.childNodes: 
if child.nodeType != Node.ELEMENT_NODE: 
continue
if child.tagName == "para": 
self.handle_chapter_para(child) 

def handle_chapter_para(self, node): 
company = "" 
company = self.gettext(node.getElementsByTagName("company")) 
print "chapter:para:company", company 

def handle_author(self, node): 
for child in node.childNodes: 
if child.nodeType != Node.ELEMENT_NODE: 
continue
if child.tagName == "name": 
self.handle_author_name(child) 
if child.tagName == "affiliation": 
print "affiliation:", self.gettext(child.childNodes) 

def handle_author_name(self, node): 
first = "" 
last = "" 
for child in node.childNodes: 
if child.nodeType != Node.ELEMENT_NODE: 
continue
if child.tagName == "first": 
first = self.gettext(child.childNodes) 
if child.tagName == last: 
last = self.gettext(child.childNodes) 

print "firstname:%s,lastname:%s" % (first, last) 

def gettext(self, nodelist): 
retlist = [] 
for node in nodelist: 
if node.nodeType == Node.TEXT_NODE: 
retlist.append(node.wholeText) 
elif node.hasChildNodes: 
retlist.append(self.gettext(node.childNodes)) 

return re.sub(\s+, " ", ‘‘.join(retlist)) 

if __name__=="__main__": 
doc = minidom.parse("simple.xml") 
sample = SampleScanner(doc)

python解析xml文件操作的例子

标签:des   style   blog   http   color   io   ar   for   文件   

原文地址:http://www.cnblogs.com/yes123/p/4010383.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!