解析使用xml.etree.ElementTree 模块,生成使用xml.dom.minidom模块, ElementTree比dom快,dom生成简单且会自动格式化。
0 1 111 111 111 0 1 5000 3 0
解析为dict:
{0: { 'retry': '111', 'auth_type': '111', 'portal_version': '1', 'timeout': '111', 'basprovider': '0'}, 1: { 'retry': '3', 'auth_type': '0', 'portal_version': '1', 'timeout': '5000', 'basprovider': '0'}}
将上述字典再还原xml 执行代码:
# coding = 'utf-8'import timeimport xml.etree.ElementTree as ETimport xml.dom.minidom as minidomstart = time.clock() # 记录处理开始时间;与最后一行一起使用,来判断输出运行时间。def read_xml(in_path): """读取并解析xml文件 in_path: xml路径 return: tree""" tree = ET.parse(in_path) return treedef creat_dict(root): """xml生成为dict:, 将tree中个节点添加到list中,将list转换为字典dict_init 叠加生成多层字典dict_new""" dict_new = {} for key, valu in enumerate(root): dict_init = {} list_init = [] for item in valu: list_init.append([item.tag, item.text]) for lists in list_init: dict_init[lists[0]] = lists[1] dict_new[key] = dict_init return dict_newdef dict_to_xml(input_dict, root_tag, node_tag): """ 定义根节点root_tag,定义第二层节点node_tag 第三层中将字典中键值对对应参数名和值 return: xml的tree结构 """ root_name = ET.Element(root_tag) for (k, v) in input_dict.items(): node_name = ET.SubElement(root_name, node_tag) for key, val in v.items(): key = ET.SubElement(node_name, key) key.text = val return root_namedef out_xml(root): """格式化root转换为xml文件""" rough_string = ET.tostring(root, 'utf-8') reared_content = minidom.parseString(rough_string) with open(out_file, 'w+') as fs: reared_content.writexml(fs, addindent=" ", newl="\n", encoding="utf-8") return Trueif __name__ == '__main__': in_files = r"D:\baspool_read.xml" out_file = r"D:\baspool_out.xml" tree = read_xml(in_files) node_new = creat_dict(tree.getroot()) # 将xml转换为dict root = dict_to_xml(node_new, "baspools", "bas") # 将dict转换为xml out_xml(root) # 输出xml到out_filesend = time.clock()print("read: %f s" % (end - start))
解决 字典无顺序导致生成的xml文件参数位置不固定,对dict_to_xml()函数进行修正:
def dict_to_xml(input_dict,root_tag,node_tag): """ 定义根节点root_tag,定义第二层节点node_tag 第三层中将字典中键值对对应参数名和值 return: xml的tree结构 """ root_name = ET.Element(root_tag) for (k, v) in input_dict.items(): node_name = ET.SubElement(root_name, node_tag) for (key, val) in sorted(v.items(), key=lambda e:e[0], reverse=True): key = ET.SubElement(node_name, key) key.text = val return root_name