一. xml相关术语:
1.Document(文档): 对应一个xml文件
2.Declaration(声明):
xml version="1.0" encoding="utf-8"?>
version指定了版本,encoding指定了文件编码
3.Comment(注释),同html中的注释
4.Element(元素):指的是从( 且包括) 开始标签直到
( 且包括) 结束标签的部分,如
<book_store name&#61;"newhua" website&#61;"https://www.amazon.cn/b?node&#61;1876097071"><book1><name>hamletname><author>William Shakespeareauthor>book1>
book_store>
5.Tag(标签): 用于表示素的起始与结束&#xff0c;如book1,name,author等
6.Attribute(属性),如上面的name,website
7.Text(文本),如hamelt
二.解析xml
有三种方法
from xml.dom.minidom import parse,parseStringdom1 &#61; parse(&#39;test.xml&#39;) #通过文件名解析xml
data &#61; open(&#39;test.xml&#39;)
dom2 &#61; parse(data) #通过解析已打开的xml文件
note &#61; """
Reminder
Don&#39;t be late for the meeting
"""
dom3 &#61; parseString(note) #解析字符串
2.得到根元素
doc &#61; parse(&#39;test.xml&#39;) #通过文件名解析xml
root &#61; doc.documentElement
三.创建xml
from xml.dom.minidom import Document
doc &#61; Document() #创建一篇空的文档from xml.dom.minidom import getDOMImplementation
impl &#61; getDOMImplementation()
#创建doc&#xff0c;并且添加根节点book_store
doc &#61; impl.createDocument(None,"book_store",None)
print(doc.documentElement.tagName) #book_store#doc同doc&#61;Document()
doc2 &#61; impl.createDocument(None,None,None)
四.类及层次结构
可以发现&#xff1a;Element,Text,Comment,Attribute的创建工作全部由Document完成&#xff0c;然后通过appendChild或insertBefore方法将新的对象插入到Document中。
五.具体操作
1.解析xml文件
movies.xml
<collection shelf&#61;"New Arrivals">
<movie title&#61;"Enemy Behind"><type>War, Thrillertype><format>DVDformat><year>2003year><rating>PGrating><stars>10stars><description>Talk about a US-Japan wardescription>
movie>
<movie title&#61;"Transformers"><type>Anime, Science Fictiontype><format>DVDformat><year>1989year><rating>Rrating><stars>8stars><description>A schientific fictiondescription>
movie><movie title&#61;"Trigun"><type>Anime, Actiontype><format>DVDformat><episodes>4episodes><rating>PGrating><stars>10stars><description>Vash the Stampede!description>
movie>
<movie title&#61;"Ishtar"><type>Comedytype><format>VHSformat><rating>PGrating><stars>2stars><description>Viewable boredomdescription>
movie>
collection>
代码:
import xml.dom.minidom
from xml.dom.minidom import parse#使用minidom解析器打开xml文档
tree &#61; xml.dom.minidom.parse(&#39;movies.xml&#39;)
#print(type(tree)) #
collection &#61; tree.documentElement #获取文档根元素
if collection.hasAttribute(&#39;shelf&#39;):print("root element attribute:",collection.getAttribute("shelf"))
#print(collection.getAttribute(&#39;shelf&#39;)) #获取属性
movie_list &#61; collection.getElementsByTagName(&#39;movie&#39;)
movie_info_list &#61; []
for movie in movie_list:print(&#39;******Movie*****&#39;)if movie.hasAttribute("title"):title &#61; movie.getAttribute(&#39;title&#39;)print(&#39;Title&#39;,title)type &#61; movie.getElementsByTagName(&#39;type&#39;)[0].childNodes[0].dataformat &#61; movie.getElementsByTagName(&#39;format&#39;)[0].childNodes[0].datarating &#61; movie.getElementsByTagName("rating")[0].firstChild.datastars &#61; movie.getElementsByTagName(&#39;stars&#39;)[0].firstChild.datadescription &#61; movie.getElementsByTagName("description")[0].firstChild.dataprint("type: ", type)print("format: ", format)print("rating: ", rating)print("stars: ", stars)print(&#39;description: &#39;, description)
2.写xml文件
效果:
xml version&#61;"1.0" encoding&#61;"utf-8"?>
<book_store name&#61;"amzon" website&#61;"https://www.amazon.cn/b?node&#61;1876097071"><book1><name>hamletname><author>William Shakespeareauthor>book1>
book_store>
代码:
from xml.dom.minidom import Document
doc &#61; Document()
comment &#61; doc.createComment(&#39;just a comment about book_store&#39;) #添加注释
doc.appendChild(comment)
# from xml.dom.minidom import getDOMImplementation
# impl &#61; getDOMImplementation()
# doc &#61; impl.createDocument(None, None, None)
book_store &#61; doc.createElement(&#39;book_store&#39;) # 创建根节点
book_store.setAttribute(&#39;name&#39;, &#39;amazon&#39;) #设置属性
book_store.setAttribute(&#39;website&#39;, &#39;https://www.amazon.cn/b?node&#61;1876097071&#39;)
doc.appendChild(book_store) #添加节点
book1 &#61; doc.createElement(&#39;book1&#39;) #创建元素book1
book1_name &#61; doc.createElement(&#39;name&#39;)
book1_name_value &#61; doc.createTextNode(&#39;hamlet&#39;) #创建text节点
book1_name.appendChild(book1_name_value)
book1_author &#61; doc.createElement(&#39;author&#39;)
book1_author_value &#61; doc.createTextNode(&#39;William Shakespeare&#39;)
book1_author.appendChild(book1_author_value)
book1.appendChild(book1_name)
book1.appendChild(book1_author)
book_store.appendChild(book1)print(doc.toprettyxml(indent&#61;&#39;\t&#39;, newl&#61;&#39;\n&#39;, encoding&#61;&#39;utf-8&#39;).decode(&#39;utf-8&#39;))
# with open(&#39;book_store.xml&#39;,&#39;wb&#39;) as f: #写入的数据是bytes类型&#xff0c;所以wb方法写入
# data &#61; doc.toprettyxml(indent&#61;&#39;\t&#39;, newl&#61;&#39;\n&#39;, encoding&#61;&#39;utf-8&#39;) #bytes类型数据
# f.write(data)
with open(&#39;test_store.xml&#39;, &#39;w&#39;) as f:doc.writexml(f, indent&#61;&#39;\t&#39;, newl&#61;&#39;\n&#39;, encoding&#61;&#39;utf-8&#39;) #写入的是str类型数据,所以w方法写入
3.简单封装
class MyXMLGenerator:def __init__(self,xml_name):self.xml_name &#61; xml_nameself.doc &#61; xml.dom.minidom.Document()def createComment(self,value):c &#61; self.doc.createComment(value)self.doc.appendChild(c)def setNodeAttribute(self,node,attname,value):node.setAttribute(attname,value)def createElement(self,tagName):ele &#61; self.doc.createElement(tagName)return eledef appendChild(self,node,parent_node&#61;None):if parent_node is not None:parent_node.appendChild(node)else:self.doc.appendChild(node)def setNodeValue(self,node,value):text_node &#61; self.doc.createTextNode(value)node.appendChild(text_node)def genXML(self):#print(self.doc.toprettyxml(indent&#61;&#39;\t&#39;,newl&#61;&#39;\n&#39;,encoding&#61;&#39;utf-8&#39;).decode(&#39;utf-8&#39;))with open(self.xml_name,&#39;wb&#39;) as f:data &#61; self.doc.toprettyxml(indent&#61;&#39;\t&#39;,newl&#61;&#39;\n&#39;,encoding&#61;&#39;utf-8&#39;) #python3中二进制数据f.write(data)
参考:
https://docs.python.org/3/library/xml.dom.minidom.html
https://docs.python.org/3/library/xml.dom.html