标签:
import xml.dom.minidom document = """ <slideshow> <title>Demo slideshow</title> <slide><title>Slide title</title> <point>This is a demo</point> <point>Of a program for processing slides</point> </slide> <slide><title>Another demo slide</title> <point>It is important</point> <point>To have more than</point> <point>one slide</point> </slide> </slideshow> """ dom = xml.dom.minidom.parseString(document) #获取一个xml文档对象 def getText(nodelist): rc = [] for node in nodelist: if node.nodeType == node.TEXT_NODE: #检查是不是文本类型 rc.append(node.data) #如果是文本类型就把数据读取出来 return ‘‘.join(rc) def handleSlideshow(slideshow): print("<html>") handleSlideshowTitle(slideshow.getElementsByTagName("title")[0]) #获取标签名 slides = slideshow.getElementsByTagName("slide") handleToc(slides) handleSlides(slides) print("</html>") def handleSlides(slides): for slide in slides: handleSlide(slide) def handleSlide(slide): handleSlideTitle(slide.getElementsByTagName("title")[0]) handlePoints(slide.getElementsByTagName("point")) def handleSlideshowTitle(title): print("<title>%s</title>" % getText(title.childNodes)) #检查是否有子节点 def handleSlideTitle(title): print("<h2>%s</h2>" % getText(title.childNodes)) def handlePoints(points): print("<ul>") for point in points: handlePoint(point) print("</ul>") def handlePoint(point): print("<li>%s</li>" % getText(point.childNodes)) def handleToc(slides): for slide in slides: title = slide.getElementsByTagName("title")[0] print("<p>%s</p>" % getText(title.childNodes)) handleSlideshow(dom)
标签:
原文地址:http://www.cnblogs.com/zhuweiblog/p/5191653.html