您的位置:首页 > 编程语言 > Python开发

Python excel转xml

2016-03-31 16:48 676 查看
#! encoding=utf-8

import xlrd
import xml.dom.minidom
import os

def open_excel(file):
try:
data = xlrd.open_workbook(file)
return data
except Exception, e:
print str(e)

def translate_excel_to_xml(excel_absolute_path, name, generate_xml_dir, parent_list, parent_list_names, parent_list_name, child_list_names="" , child_list_name="", colnnameindex=0, by_index=0, child_index=1):

#解析excel文件
data = open_excel(excel_absolute_path)

#获取需要的工作表
table = data.sheets()[by_index]

#行数
nrows = table.nrows

#列数
ncols = table.ncols
if(child_list_names):
childtable = data.sheets()[child_index]
child_nrows = childtable.nrows
child_ncols = childtable.ncols

#创建dom文档对象
doc = xml.dom.minidom.Document()
# 顶级元素创建
parent = doc.createElement(parent_list)
#创建根元素
doc.appendChild(parent)
info = doc.createElement(parent_list_names)

#将根元素添加到文档中区
parent.appendChild(info)

for nrow in range(1, nrows):
#创建元素
item = doc.createElement(parent_list_name)
for ncol in range(0, ncols):
#colnames = table.col_values(ncol)
#print colnames
#            print table.cell(nrow, ncol).value
key = u"%s" % table.cell(0, ncol).value
value = table.cell(nrow, ncol).value
if isinstance(value, float):
value = '%0d' % value
value = '%s' % value
#            print type(key), type(value)
#将数据都作为xml中元素的属性,属性名就是第一行的值,属性值就是某一行某一列的值
# item.setAttribute(key.encode('utf-8'), value.encode('utf-8'))
# tmp = key.encode('utf-8')
tmp = doc.createElement(key.encode('utf-8'))
tmp.appendChild(doc.createTextNode(value.encode('utf-8')))
item.appendChild(tmp)

# 请求子表
# 子表list名称作为参数传进来
if(child_list_names):
childlistnames = doc.createElement(child_list_names)
for crow in range(1,child_nrows):
p_value = table.cell(nrow,0).value
c_value = childtable.cell(crow,0).value
if(p_value == c_value):
# print c_value,p_value
list = doc.createElement(child_list_name)
for cls in range(0,child_ncols):
ckey = u"%s" % childtable.cell(0, cls).value
cvalue = childtable.cell(crow, cls).value
if isinstance(cvalue, float):
cvalue = '%0d' % cvalue
cvalue = '%s' % cvalue
# ctmp = ckey.encode('utf-8')
ctmp = doc.createElement(ckey.encode('utf-8'))
ctmp.appendChild(doc.createTextNode(cvalue.encode('utf-8')))
list.appendChild(ctmp)
#将此元素作为根元素的子节点
childlistnames.appendChild(list)
item.appendChild(childlistnames)
info.appendChild(item)

#要生成的xml文件名
generate_xml_name = name.strip().split('.')[0] + '.xml'
#要生成的xml文件到某个目录的绝对路径
geneate_xml_dir = os.path.join(generate_xml_dir, generate_xml_name)

f = open(geneate_xml_dir, 'w')
Indent(doc,parent)
# f.write(doc.toprettyxml())          #可以使生成xml有好看的格式,要是不需要,注释上一行,采用此行作为输出
doc.writexml(f)  # 已经用indent函数美化过了
f.close()

def find_assign_xlsx(xlsx_path, generate_xml_dir,parent_list,parent_list_names,parent_list_name,child_list_names="",child_list_name=""):
for name in os.listdir(xlsx_path):
if (name.endswith('.xls') or name.endswith('.xlsx')):
#生成excel文件的绝对路径
excel_absolute_path = os.path.join(xlsx_path, name)
#解析excel并转成xml
translate_excel_to_xml(excel_absolute_path, name,generate_xml_dir, parent_list,parent_list_names,parent_list_name,child_list_names,child_list_name)

def Indent(dom, node, indent = 0):
children = node.childNodes[:]
if indent:
text = dom.createTextNode('\n' + '\t' * indent)
node.parentNode.insertBefore(text, node)
if children:
if children[-1].nodeType == node.ELEMENT_NODE:
text = dom.createTextNode('\n' + '\t' * indent)
node.appendChild(text)

4000
for n in children:
if n.nodeType == node.ELEMENT_NODE:
Indent(dom, n, indent + 1)

def run_excel_to_xml(type=1):
#车辆计划type=1
if type == 1:
#excel文件路径,xml生成的文件路径
excel_src_path   = r'D:\Python\excel\1'
generate_xml_dir = r'D:\Python\data\1'
# 子表在excel中不存在的节点
child_list_names = "transportPlanDetails"
child_list_name  = "transportPlanDetail"
#父表在excel中不存在的节点
parent_list       = "transportPlanRequest"
parent_list_names = "transportPlans"
parent_list_name  = "transportPlan"
find_assign_xlsx(excel_src_path.decode('utf-8'), generate_xml_dir,parent_list,parent_list_names,parent_list_name,child_list_names,child_list_name)
elif type == 2:
excel_src_path   = r'D:\Python\excel\2'
generate_xml_dir = r'D:\Python\data\2'
parent_list       = "vehicleTrackRequest"
parent_list_names = "vehicleTrackList"
parent_list_name  = "vehicleTrack"
find_assign_xlsx(excel_src_path.decode('utf-8'), generate_xml_dir,parent_list,parent_list_names,parent_list_name)
else:
excel_src_path   = r'D:\Python\excel\3'
generate_xml_dir = r'D:\Python\data\3'
parent_list       = "transportPaidRequest"
parent_list_names = "transportPaidList"
parent_list_name  = "transportPaid"
find_assign_xlsx(excel_src_path.decode('utf-8'), generate_xml_dir,parent_list,parent_list_names,parent_list_name)
if __name__ == "__main__":

#车辆计划type=1,车辆轨迹2,实收实发3
run_excel_to_xml(1)
run_excel_to_xml(2)
run_excel_to_xml(3)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  python xml excel