Python excel转xml
2016-03-31 16:48
676 查看
#! encoding=utf-8 import xlrd import xml.dom.minidom import os def open_excel(file): try: data = xlrd.open_workbook(file) return data except Exception, e: print str(e) def translate_excel_to_xml(excel_absolute_path, name, generate_xml_dir, parent_list, parent_list_names, parent_list_name, child_list_names="" , child_list_name="", colnnameindex=0, by_index=0, child_index=1): #解析excel文件 data = open_excel(excel_absolute_path) #获取需要的工作表 table = data.sheets()[by_index] #行数 nrows = table.nrows #列数 ncols = table.ncols if(child_list_names): childtable = data.sheets()[child_index] child_nrows = childtable.nrows child_ncols = childtable.ncols #创建dom文档对象 doc = xml.dom.minidom.Document() # 顶级元素创建 parent = doc.createElement(parent_list) #创建根元素 doc.appendChild(parent) info = doc.createElement(parent_list_names) #将根元素添加到文档中区 parent.appendChild(info) for nrow in range(1, nrows): #创建元素 item = doc.createElement(parent_list_name) for ncol in range(0, ncols): #colnames = table.col_values(ncol) #print colnames # print table.cell(nrow, ncol).value key = u"%s" % table.cell(0, ncol).value value = table.cell(nrow, ncol).value if isinstance(value, float): value = '%0d' % value value = '%s' % value # print type(key), type(value) #将数据都作为xml中元素的属性,属性名就是第一行的值,属性值就是某一行某一列的值 # item.setAttribute(key.encode('utf-8'), value.encode('utf-8')) # tmp = key.encode('utf-8') tmp = doc.createElement(key.encode('utf-8')) tmp.appendChild(doc.createTextNode(value.encode('utf-8'))) item.appendChild(tmp) # 请求子表 # 子表list名称作为参数传进来 if(child_list_names): childlistnames = doc.createElement(child_list_names) for crow in range(1,child_nrows): p_value = table.cell(nrow,0).value c_value = childtable.cell(crow,0).value if(p_value == c_value): # print c_value,p_value list = doc.createElement(child_list_name) for cls in range(0,child_ncols): ckey = u"%s" % childtable.cell(0, cls).value cvalue = childtable.cell(crow, cls).value if isinstance(cvalue, float): cvalue = '%0d' % cvalue cvalue = '%s' % cvalue # ctmp = ckey.encode('utf-8') ctmp = doc.createElement(ckey.encode('utf-8')) ctmp.appendChild(doc.createTextNode(cvalue.encode('utf-8'))) list.appendChild(ctmp) #将此元素作为根元素的子节点 childlistnames.appendChild(list) item.appendChild(childlistnames) info.appendChild(item) #要生成的xml文件名 generate_xml_name = name.strip().split('.')[0] + '.xml' #要生成的xml文件到某个目录的绝对路径 geneate_xml_dir = os.path.join(generate_xml_dir, generate_xml_name) f = open(geneate_xml_dir, 'w') Indent(doc,parent) # f.write(doc.toprettyxml()) #可以使生成xml有好看的格式,要是不需要,注释上一行,采用此行作为输出 doc.writexml(f) # 已经用indent函数美化过了 f.close() def find_assign_xlsx(xlsx_path, generate_xml_dir,parent_list,parent_list_names,parent_list_name,child_list_names="",child_list_name=""): for name in os.listdir(xlsx_path): if (name.endswith('.xls') or name.endswith('.xlsx')): #生成excel文件的绝对路径 excel_absolute_path = os.path.join(xlsx_path, name) #解析excel并转成xml translate_excel_to_xml(excel_absolute_path, name,generate_xml_dir, parent_list,parent_list_names,parent_list_name,child_list_names,child_list_name) def Indent(dom, node, indent = 0): children = node.childNodes[:] if indent: text = dom.createTextNode('\n' + '\t' * indent) node.parentNode.insertBefore(text, node) if children: if children[-1].nodeType == node.ELEMENT_NODE: text = dom.createTextNode('\n' + '\t' * indent) node.appendChild(text) 4000 for n in children: if n.nodeType == node.ELEMENT_NODE: Indent(dom, n, indent + 1) def run_excel_to_xml(type=1): #车辆计划type=1 if type == 1: #excel文件路径,xml生成的文件路径 excel_src_path = r'D:\Python\excel\1' generate_xml_dir = r'D:\Python\data\1' # 子表在excel中不存在的节点 child_list_names = "transportPlanDetails" child_list_name = "transportPlanDetail" #父表在excel中不存在的节点 parent_list = "transportPlanRequest" parent_list_names = "transportPlans" parent_list_name = "transportPlan" find_assign_xlsx(excel_src_path.decode('utf-8'), generate_xml_dir,parent_list,parent_list_names,parent_list_name,child_list_names,child_list_name) elif type == 2: excel_src_path = r'D:\Python\excel\2' generate_xml_dir = r'D:\Python\data\2' parent_list = "vehicleTrackRequest" parent_list_names = "vehicleTrackList" parent_list_name = "vehicleTrack" find_assign_xlsx(excel_src_path.decode('utf-8'), generate_xml_dir,parent_list,parent_list_names,parent_list_name) else: excel_src_path = r'D:\Python\excel\3' generate_xml_dir = r'D:\Python\data\3' parent_list = "transportPaidRequest" parent_list_names = "transportPaidList" parent_list_name = "transportPaid" find_assign_xlsx(excel_src_path.decode('utf-8'), generate_xml_dir,parent_list,parent_list_names,parent_list_name) if __name__ == "__main__": #车辆计划type=1,车辆轨迹2,实收实发3 run_excel_to_xml(1) run_excel_to_xml(2) run_excel_to_xml(3)
相关文章推荐
- Python动态类型的学习---引用的理解
- Python3写爬虫(四)多线程实现数据爬取
- 垃圾邮件过滤器 python简单实现
- 下载并遍历 names.txt 文件,输出长度最长的回文人名。
- install and upgrade scrapy
- Scrapy的架构介绍
- Centos6 编译安装Python
- 使用Python生成Excel格式的图片
- 让Python文件也可以当bat文件运行
- [Python]推算数独
- Python中zip()函数用法举例
- Python中map()函数浅析
- Python将excel导入到mysql中
- Excel 曝出 Power Query 安全漏洞,1.2 亿用户易受远程 DDE 攻击
- Python在CAM软件Genesis2000中的应用
- 使用Shiboken为C++和Qt库创建Python绑定