python解析xml文件——通过etree来解析xml文件
2017-07-21 11:21
423 查看
利用from lxml import etree来解析
---------------------------这个是有背包的情况下---------------------------------
<?xmlversion="1.0" ?>
<annotation>
<size>
<width>75</width>
<height>177</height>
<depth>3</depth>
</size>
<gender>1</gender>
<hairlength>0</hairlength>
<object>
<name>head</name>
<bndbox>
<xmin>38</xmin>
<ymin>1</ymin>
<xmax>58</xmax>
<ymax>26</ymax>
</bndbox>
</object>
<object>
<name>top</name>
<category>3</category>
<color>8</color>
<bndbox>
<xmin>17</xmin>
<ymin>23</ymin>
<xmax>71</xmax>
<ymax>106</ymax>
</bndbox>
</object>
<object>
<name>down</name>
<category>0</category>
<color>0</color>
<bndbox>
<xmin>30</xmin>
<ymin>105</ymin>
<xmax>57</xmax>
<ymax>121</ymax>
</bndbox>
</object>
<object>
<name>shoes</name>
<category>3</category>
<color>7</color>
<bndbox>
<xmin>29</xmin>
<ymin>122</ymin>
<xmax>42</xmax>
<ymax>168</ymax>
</bndbox>
</object>
<object>
<name>shoes</name>
<category>3</category>
<color>7</color>
<bndbox>
<xmin>43</xmin>
<ymin>122</ymin>
<xmax>57</xmax>
<ymax>168</ymax>
</bndbox>
</object>
<object>
<name>bag</name>
<category>0</category>
<color>7</color>
<bndbox>
<xmin>9</xmin>
<ymin>55</ymin>
<xmax>27</xmax>
<ymax>84</ymax>
</bndbox>
</object>
</annotation>
-------------------------------------------这个是没有背包的情况下:此时在object标签下面没有bag这个label----------------------
<?xmlversion="1.0" ?>
<annotation>
<size>
<width>66</width>
<height>199</height>
<depth>3</depth>
</size>
<gender>1</gender>
<hairlength>0</hairlength>
<object>
4000
<name>head</name>
<bndbox>
<xmin>23</xmin>
<ymin>2</ymin>
<xmax>50</xmax>
<ymax>34</ymax>
</bndbox>
</object>
<object>
<name>top</name>
<category>2</category>
<color>2</color>
<bndbox>
<xmin>1</xmin>
<ymin>35</ymin>
<xmax>63</xmax>
<ymax>132</ymax>
</bndbox>
</object>
<object>
<name>down</name>
<category>0</category>
<color>0</color>
<bndbox>
<xmin>9</xmin>
<ymin>131</ymin>
<xmax>39</xmax>
<ymax>167</ymax>
</bndbox>
</object>
<object>
<name>shoes</name>
<category>3</category>
<color>0</color>
<bndbox>
<xmin>26</xmin>
<ymin>164</ymin>
<xmax>46</xmax>
<ymax>195</ymax>
</bndbox>
</object>
<object>
<name>shoes</name>
<category>3</category>
<color>0</color>
<bndbox>
<xmin>11</xmin>
<ymin>174</ymin>
<xmax>28</xmax>
<ymax>193</ymax>
</bndbox>
</object>
</annotation>
Q1:
遇到的第一个难题就是:如何解析<gender>以及<hairlength>,
多亏了https://zhidao.baidu.com/question/523477874624497285.html
利用findall方法,将gender以及hairlength属性名来进行解析,pass
Q2:如何解析根节点下面的子节点的内容:
http://blog.csdn.net/oxiangduinishuo1/article/details/51864503这个主要是教理论的 http://www.cnblogs.com/hongten/p/hongten_python_xml_etree_elementtree.html,参考这个教程,写出了图片的size,但是有个问题:多个object标签怎么处理
http://techpool.iteye.com/blog/643667,可以通过if判断标签名字来解决,这个链接给我了灵感
Q3:卡了半天,因为不同的文件,是否背包的属性不同,有的文件没有书包这个属性,因此我直接做了一个obj_name_list,将所有的属性都放到这个list当中了,然后进行判断是否bag这个属性在list当中,如果不在的话,那就像读取第二个xml文件那样了
-----------------------------最后贴一下代码-----------------------------------------
# -*- coding: utf-8 -*-
__author__ = 'xuy'
from lxml
import etree
gender_list=['male','female']
hair_list=['long','short','other']
top_list=['T-shirt','skirt','waitao','rurongfu','xifu','other']#上衣
down_list=['changku','duanku','changqun','duanqun','other']
shoes_list=['pixie','yundongxie','liangxie','xuezi','other']
bag_list=['danjianbao','shuangjianbao','shoulaxiang','qianbao','other']
color_list=['black','white','red','yellow','blue','green','purpose','brown','gray','orange','multi_color','other']
xml_file=etree.parse('IMG_000009.xml')
root_node=xml_file.getroot()
def gender_hair_node(type):
for atype
in root_node.findall(type):
attr=int(atype.text)
return attr
"""
输出文件的大小
"""
for size_node
in root_node.findall('size'):
pic_width=int(size_node.find('width').text)
pic_height=int(size_node.find('height').text)
pic_depth=int(size_node.find('depth').text)
print "图片大小:%d",pic_width
print "图片大小:%d",pic_height
print "图片大小:%d",pic_depth
"""
输出性别以及头发的长短
"""
gender_arr=gender_list[gender_hair_node('gender')]
hair_arr=hair_list[gender_hair_node('hairlength')]
print gender_arr
print hair_arr
#未对与函数进行封装的时候
# for atype_gender in root_node.findall('gender'):
# gender_attr=gender_list[int(atype_gender.text)]
# print gender_attr
#
# for atype_hair in root_node.findall('hairlength'):
# hair_attr=hair_list[int(atype_hair.text)]
# print hair_attr
"""
获取head的相关信息,head虽然在object里面,但是没有category以及color的属性,因此单独拿出来
"""
all_object_node=root_node.findall('object')
for obj_node
in all_object_node:
obj_name=obj_node.find('name').text
if obj_name=='head':
head_obj_name=obj_name+'_head'
for
bndbox_node in obj_node.findall('bndbox'):
head_xmin=int(bndbox_node.find('xmin').text)
head_ymin=int(bndbox_node.find('ymin').text)
head_xmax=int(bndbox_node.find('xmax').text)
head_ymax=int(bndbox_node.find('ymax').text)
print head_xmin
print head_ymin
print head_xmax
print head_ymax
print "-----------------------------------------------"
all_object_node=root_node.findall('object')
def object_node(node_name):
all_object_node=root_node.findall('object')
for obj_node
in all_object_node:
obj_name=obj_node.find('name').text#找到了obj_name,根据obj_name来区分属性
if
obj_name==node_name:#如果输入的参数符合obj_name的话,那就直接进入该属性label里面
#---------------------------------------------------------
category=int(obj_node.find('category').text)
#---------------------------------------------------------
color=int(obj_node.find('color').text)
#---------------------------------------------------------
for
bndbox_node in obj_node.findall('bndbox'):
xmin=int(bndbox_node.find('xmin').text)
ymin=int(bndbox_node.find('ymin').text)
xmax=int(bndbox_node.find('xmax').text)
ymax=int(bndbox_node.find('ymax').text)
print node_name#更新它的obj的名字
print
category
print color
print xmin
print ymin
print xmax
print ymax
return node_name,category,color,xmin,ymin,xmax,ymax
#---------------------------------------------------------
print "-----------------------------------------------"
top_node_name,top_category,top_color,top_xmin,top_ymin,top_xmax,top_ymax=object_node('top')
print "-----------------------------------------------"
down_node_name,down_category,down_color,down_xmin,down_ymin,down_xmax,down_ymax=object_node('down')
print "-----------------------------------------------"
shoes_node_name,shoes_category,shoes_color,shoes_xmin,shoes_ymin,shoes__xmax,shoes_ymax=object_node('shoes')
print "-----------------------------------------------"
"""
因为涉及到是否背包,如果不背包的话,xml文件里面没有数据集,因此单独处理
"""
all_object_node=root_node.findall('object')
has_bag=False
for obj_node
in all_object_node:
obj_name=obj_node.find('name').text
obj_name_list=[]
obj_name_list.append(obj_name)
if obj_name=='bag':#数据集当中有bag,那么就读取bag当中的像素区域
has_bag=True
for bndbox_node
in obj_node.findall('bndbox'):
bag_xmin=int(bndbox_node.find('xmin').text)
bag_ymin=int(bndbox_node.find('ymin').text)
bag_xmax=int(bndbox_node.find('xmax').text)
bag_ymax=int(bndbox_node.find('ymax').text)
if 'bag'
not in obj_name_list:
bag_xmin=None
bag_ymin=None
bag_xmax=None
bag_ymax=None
print '是否背包:',has_bag
print bag_xmin
print bag_ymin
print bag_xmax
print bag_ymax
总结:应该再看一下etree的官方文档,基本上是第二次写xml解析了,还不是特别熟练,以后应该多加练习
---------------------------这个是有背包的情况下---------------------------------
<?xmlversion="1.0" ?>
<annotation>
<size>
<width>75</width>
<height>177</height>
<depth>3</depth>
</size>
<gender>1</gender>
<hairlength>0</hairlength>
<object>
<name>head</name>
<bndbox>
<xmin>38</xmin>
<ymin>1</ymin>
<xmax>58</xmax>
<ymax>26</ymax>
</bndbox>
</object>
<object>
<name>top</name>
<category>3</category>
<color>8</color>
<bndbox>
<xmin>17</xmin>
<ymin>23</ymin>
<xmax>71</xmax>
<ymax>106</ymax>
</bndbox>
</object>
<object>
<name>down</name>
<category>0</category>
<color>0</color>
<bndbox>
<xmin>30</xmin>
<ymin>105</ymin>
<xmax>57</xmax>
<ymax>121</ymax>
</bndbox>
</object>
<object>
<name>shoes</name>
<category>3</category>
<color>7</color>
<bndbox>
<xmin>29</xmin>
<ymin>122</ymin>
<xmax>42</xmax>
<ymax>168</ymax>
</bndbox>
</object>
<object>
<name>shoes</name>
<category>3</category>
<color>7</color>
<bndbox>
<xmin>43</xmin>
<ymin>122</ymin>
<xmax>57</xmax>
<ymax>168</ymax>
</bndbox>
</object>
<object>
<name>bag</name>
<category>0</category>
<color>7</color>
<bndbox>
<xmin>9</xmin>
<ymin>55</ymin>
<xmax>27</xmax>
<ymax>84</ymax>
</bndbox>
</object>
</annotation>
-------------------------------------------这个是没有背包的情况下:此时在object标签下面没有bag这个label----------------------
<?xmlversion="1.0" ?>
<annotation>
<size>
<width>66</width>
<height>199</height>
<depth>3</depth>
</size>
<gender>1</gender>
<hairlength>0</hairlength>
<object>
4000
<name>head</name>
<bndbox>
<xmin>23</xmin>
<ymin>2</ymin>
<xmax>50</xmax>
<ymax>34</ymax>
</bndbox>
</object>
<object>
<name>top</name>
<category>2</category>
<color>2</color>
<bndbox>
<xmin>1</xmin>
<ymin>35</ymin>
<xmax>63</xmax>
<ymax>132</ymax>
</bndbox>
</object>
<object>
<name>down</name>
<category>0</category>
<color>0</color>
<bndbox>
<xmin>9</xmin>
<ymin>131</ymin>
<xmax>39</xmax>
<ymax>167</ymax>
</bndbox>
</object>
<object>
<name>shoes</name>
<category>3</category>
<color>0</color>
<bndbox>
<xmin>26</xmin>
<ymin>164</ymin>
<xmax>46</xmax>
<ymax>195</ymax>
</bndbox>
</object>
<object>
<name>shoes</name>
<category>3</category>
<color>0</color>
<bndbox>
<xmin>11</xmin>
<ymin>174</ymin>
<xmax>28</xmax>
<ymax>193</ymax>
</bndbox>
</object>
</annotation>
Q1:
遇到的第一个难题就是:如何解析<gender>以及<hairlength>,
多亏了https://zhidao.baidu.com/question/523477874624497285.html
利用findall方法,将gender以及hairlength属性名来进行解析,pass
Q2:如何解析根节点下面的子节点的内容:
http://blog.csdn.net/oxiangduinishuo1/article/details/51864503这个主要是教理论的 http://www.cnblogs.com/hongten/p/hongten_python_xml_etree_elementtree.html,参考这个教程,写出了图片的size,但是有个问题:多个object标签怎么处理
http://techpool.iteye.com/blog/643667,可以通过if判断标签名字来解决,这个链接给我了灵感
Q3:卡了半天,因为不同的文件,是否背包的属性不同,有的文件没有书包这个属性,因此我直接做了一个obj_name_list,将所有的属性都放到这个list当中了,然后进行判断是否bag这个属性在list当中,如果不在的话,那就像读取第二个xml文件那样了
-----------------------------最后贴一下代码-----------------------------------------
# -*- coding: utf-8 -*-
__author__ = 'xuy'
from lxml
import etree
gender_list=['male','female']
hair_list=['long','short','other']
top_list=['T-shirt','skirt','waitao','rurongfu','xifu','other']#上衣
down_list=['changku','duanku','changqun','duanqun','other']
shoes_list=['pixie','yundongxie','liangxie','xuezi','other']
bag_list=['danjianbao','shuangjianbao','shoulaxiang','qianbao','other']
color_list=['black','white','red','yellow','blue','green','purpose','brown','gray','orange','multi_color','other']
xml_file=etree.parse('IMG_000009.xml')
root_node=xml_file.getroot()
def gender_hair_node(type):
for atype
in root_node.findall(type):
attr=int(atype.text)
return attr
"""
输出文件的大小
"""
for size_node
in root_node.findall('size'):
pic_width=int(size_node.find('width').text)
pic_height=int(size_node.find('height').text)
pic_depth=int(size_node.find('depth').text)
print "图片大小:%d",pic_width
print "图片大小:%d",pic_height
print "图片大小:%d",pic_depth
"""
输出性别以及头发的长短
"""
gender_arr=gender_list[gender_hair_node('gender')]
hair_arr=hair_list[gender_hair_node('hairlength')]
print gender_arr
print hair_arr
#未对与函数进行封装的时候
# for atype_gender in root_node.findall('gender'):
# gender_attr=gender_list[int(atype_gender.text)]
# print gender_attr
#
# for atype_hair in root_node.findall('hairlength'):
# hair_attr=hair_list[int(atype_hair.text)]
# print hair_attr
"""
获取head的相关信息,head虽然在object里面,但是没有category以及color的属性,因此单独拿出来
"""
all_object_node=root_node.findall('object')
for obj_node
in all_object_node:
obj_name=obj_node.find('name').text
if obj_name=='head':
head_obj_name=obj_name+'_head'
for
bndbox_node in obj_node.findall('bndbox'):
head_xmin=int(bndbox_node.find('xmin').text)
head_ymin=int(bndbox_node.find('ymin').text)
head_xmax=int(bndbox_node.find('xmax').text)
head_ymax=int(bndbox_node.find('ymax').text)
print head_xmin
print head_ymin
print head_xmax
print head_ymax
print "-----------------------------------------------"
all_object_node=root_node.findall('object')
def object_node(node_name):
all_object_node=root_node.findall('object')
for obj_node
in all_object_node:
obj_name=obj_node.find('name').text#找到了obj_name,根据obj_name来区分属性
if
obj_name==node_name:#如果输入的参数符合obj_name的话,那就直接进入该属性label里面
#---------------------------------------------------------
category=int(obj_node.find('category').text)
#---------------------------------------------------------
color=int(obj_node.find('color').text)
#---------------------------------------------------------
for
bndbox_node in obj_node.findall('bndbox'):
xmin=int(bndbox_node.find('xmin').text)
ymin=int(bndbox_node.find('ymin').text)
xmax=int(bndbox_node.find('xmax').text)
ymax=int(bndbox_node.find('ymax').text)
print node_name#更新它的obj的名字
category
print color
print xmin
print ymin
print xmax
print ymax
return node_name,category,color,xmin,ymin,xmax,ymax
#---------------------------------------------------------
print "-----------------------------------------------"
top_node_name,top_category,top_color,top_xmin,top_ymin,top_xmax,top_ymax=object_node('top')
print "-----------------------------------------------"
down_node_name,down_category,down_color,down_xmin,down_ymin,down_xmax,down_ymax=object_node('down')
print "-----------------------------------------------"
shoes_node_name,shoes_category,shoes_color,shoes_xmin,shoes_ymin,shoes__xmax,shoes_ymax=object_node('shoes')
print "-----------------------------------------------"
"""
因为涉及到是否背包,如果不背包的话,xml文件里面没有数据集,因此单独处理
"""
all_object_node=root_node.findall('object')
has_bag=False
for obj_node
in all_object_node:
obj_name=obj_node.find('name').text
obj_name_list=[]
obj_name_list.append(obj_name)
if obj_name=='bag':#数据集当中有bag,那么就读取bag当中的像素区域
has_bag=True
for bndbox_node
in obj_node.findall('bndbox'):
bag_xmin=int(bndbox_node.find('xmin').text)
bag_ymin=int(bndbox_node.find('ymin').text)
bag_xmax=int(bndbox_node.find('xmax').text)
bag_ymax=int(bndbox_node.find('ymax').text)
if 'bag'
not in obj_name_list:
bag_xmin=None
bag_ymin=None
bag_xmax=None
bag_ymax=None
print '是否背包:',has_bag
print bag_xmin
print bag_ymin
print bag_xmax
print bag_ymax
总结:应该再看一下etree的官方文档,基本上是第二次写xml解析了,还不是特别熟练,以后应该多加练习
相关文章推荐
- python-xml.etree.ElementTree解析xml文件
- 使用Python和xml.etree.ElementTree解析xml文件
- Python xml文件解析
- 使用python将xml文件解析成html文件
- python 6-3 如何解析简单的xml文档 使用标准库中的xml.etree.ElementTree,其中parse函数可以解析xml文档
- Python语言解析xml文件
- 通过Spring工具类获取classpath下的文件资源,解析xml
- Python解析XML文件
- oracle存储过程通过http接收xml文件并解析入库
- Python 解析XML文件
- python解析xml文件
- python解析xml文件实例分析
- Python xml.etree.ElementTree解析XML文件实例演示(十六02)
- Python 利用 XSD 文件,解析 XML 文件
- Python解析XML文件
- Python解析大XML文件及读取XML不全的问题
- python开发_xml.etree.ElementTree_XML文件操作_该模块在操作XML数据是存在安全隐患_慎用
- python创建并解析xml文件
- python开发_xml.etree.ElementTree_XML文件操作_该模块在操作XML数据是存在安全隐患_慎用
- python解析xml文件操作实例