5. Python 操作 xml 文件#
xml 文件的详细说明请参阅: XML (可扩展标记语言)
5.1. 读取 xml 文件#
5.1.1. 读取 xml 的根接点的标签名及属性。#
示例文件 country_data.xml
内容如下
<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>
其根接点名称为: data, 根接点无其它属性。
# 生成示例文件
example_string = """
<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>
"""
# 文件或字符串开头必须是<?xml 标识, 若有空行会报 PaserError 错误
example_string = example_string.strip()
with open('country_data.xml','w+') as f:
f.write(example_string)
# 解析根接点
import xml.etree.ElementTree as ET
try:
tree = ET.parse(r'./country_data.xml')
except ET.ParseError as e:
tree = ET.fromstring(example_string)
root = tree.getroot()
print(f'{root.tag =}')
print(f'{root.attrib =}')
root.tag ='data'
root.attrib ={}
5.1.2. 读取示例文件中根结点下子接点名称及属性#
示例文件中根接点(data)下有3个子接点,每个子接点的标签名都为 country , 每个子接点都有个 name 的属性名,该属性值对应不同的地名
通过循环访问
for child in root:
print(f'{child.tag = }; {child.attrib = }')
child.tag = 'country'; child.attrib = {'name': 'Liechtenstein'}
child.tag = 'country'; child.attrib = {'name': 'Singapore'}
child.tag = 'country'; child.attrib = {'name': 'Panama'}
通过索引访问
print(f'{root[0] = }')
print(f'{root[0].tag = }, {root[0].attrib = }')
print(f'{root[0][0] = }')
print(f'{root[0][0].tag = }')
print(f'{root[0][0].text = }')
print(f'{root[0][3] = }')
print(f'{root[0][3].attrib = }')
root[0] = <Element 'country' at 0x7fc38afd5ad0>
root[0].tag = 'country', root[0].attrib = {'name': 'Liechtenstein'}
root[0][0] = <Element 'rank' at 0x7fc38afd6d40>
root[0][0].tag = 'rank'
root[0][0].text = '1'
root[0][3] = <Element 'neighbor' at 0x7fc38afd6de0>
root[0][3].attrib = {'name': 'Austria', 'direction': 'E'}
通过 Element 的 iter() 访问, 它可以访问子级, 子级的子级,中某个特定的 tag
for neighbor in root.iter('neighbor'):
print(f'{neighbor.tag = }; {neighbor.attrib = }')
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Austria', 'direction': 'E'}
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Switzerland', 'direction': 'W'}
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Malaysia', 'direction': 'N'}
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Costa Rica', 'direction': 'W'}
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Colombia', 'direction': 'E'}
# 获取接点的属性列表
print(f'{root[0][1].items() = }')
print(f'{root[0][3].items() = }')
root[0][1].items() = []
root[0][3].items() = [('name', 'Austria'), ('direction', 'E')]
Element.findall() 仅查找当前元素的直接子元素中带有指定标签的元素。
Element.find() 找带有特定标签的第一个子级, 然后可以用 Element.text 访问元素的文本内容。 Element.get() 访问元素的属性
for country in root.findall('country'):
rank = country.find('rank').text
name = country.get('name')
print(name,rank)
Liechtenstein 1
Singapore 4
Panama 68
通过 XPath 来定位树中元素
# 查找所有根元素
print(f'{root.findall(".")}')
# 查找所有根元素下所有 country 子元素下的 neighbor 子元素
print(f'{root.findall("./country/neighbor")}')
# 查找 name='Singapore' 且有 'year' 子项的接点
print(f'{root.findall(".//year/..[@name='Singapore']")}')
# 查找 name='Singapore' 的 'year' 子接点
print(f'{root.findall(".//*[@name='Singapore']/year")}')
# 查找包含 2 个 neighbor 元素的接点中的第2个 neighbor 接点
print(f'{root.findall('.//neighbor[2]')}')
print(f'{[i.get('name') for i in root.findall('.//neighbor[1]')]}')
# 查找包含 2 个 neighbor 元素的接点
print(f'{root.findall(".//neighbor[2]/..")}')
print(f'{[i.get('name') for i in root.findall(".//neighbor[2]/..")]}')
Cell In[8], line 6
print(f'{root.findall(".//year/..[@name='Singapore']")}')
^
SyntaxError: f-string: unterminated string
5.2. 更新 xml 文件#
ElementTree.write() 写文件方法
Element.text 修改文体字段
Element.set() 方法添加和修改属性
Element.append() 添加新的子元素
Element.remove() 删除元素
import tempfile
for rank in root.findall('.//rank'):
new_rank = int(rank.text) + 1
rank.text = str(new_rank)
rank.set('updated','yes')
tree.write('output.xml')
with open('output.xml') as f:
for line in f.readlines():
print(line.rstrip().rstrip('\n').rstrip('\r'))
import os
os.remove('output.xml')
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E" />
<neighbor name="Switzerland" direction="W" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N" />
</country>
<country name="Panama">
<rank updated="yes">69</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W" />
<neighbor name="Colombia" direction="E" />
</country>
</data>
for country in root.findall('./country'):
rank = int(country.find('rank').text)
if rank > 50:
root.remove(country)
tree.write('output.xml')
with open('output.xml') as f:
for line in f.readlines():
print(line.rstrip().rstrip('\n').rstrip('\r'))
import os
os.remove('output.xml')
<data>
<country name="Liechtenstein">
<rank updated="yes">2</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E" />
<neighbor name="Switzerland" direction="W" />
</country>
<country name="Singapore">
<rank updated="yes">5</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N" />
</country>
</data>
# 删除示例文件
import os
os.remove("country_data.xml")
5.3. AutoFlochart 转 Drawio#
5.3.1. 1. AutoFlochart 导出的 svg 转 Drawio#
先创建一个 xml 的写入树
# 创建 drawio 空文件
import xml.etree.ElementTree as ET
drawio_string = '''
<mxfile>
<diagram >
<mxGraphModel >
<root>
<mxCell id="0"/>
<mxCell id="1" parent="0"/>
</root>
</mxGraphModel>
</diagram>
</mxfile>
'''.strip()
# tree = ET.fromstring(drawio_string)
# ET.ElementTree(tree).write('../../VsCode/export.xml')
write_tree = ET.ElementTree(ET.fromstring(drawio_string))
write_root = write_tree.getroot().find('./diagram/mxGraphModel/root')
print(f'{write_root.tag} 标签下有: {len(write_root)} 个子元素')
# el = ET.Element('mxCell',{'style':""})
# ET.SubElement(el,'mxGeometry')
# write_root.append(el)
# write_tree.write('../../Vscode/export.xml')
root 标签下有: 2 个子元素
将
rec
转为 drawio 中的矩形
# 读取 svg 文件所的 rect 标签
import xml.etree.ElementTree as ET
tree = ET.parse(r'../../VsCode/drawio/AutoFlowchart_Test001.svg')
root = tree.getroot()
# print(root.tag)
# print(root.attrib)
# print(len(root))
# print(root[1].tag)
# print(root.find('{http://www.w3.org/2000/svg}rect'))
# print(root.findall('./{http://www.w3.org/2000/svg}rect'))
for rect in root.findall('./{http://www.w3.org/2000/svg}rect'):
# print(rect.attrib)
geometry_attribute = {}
for item in "x y width height".split():
geometry_attribute[item] = rect.attrib[item]
geometry_attribute['as'] = 'geometry'
print(f'{geometry_attribute = }')
style_value = "html=1;"
if 'fill' in rect.attrib.keys():
style_value += f'fillColor={rect.attrib['fill']};'
if 'rx' in rect.attrib.keys():
style_value += f'rounded=1;arcSize={int(rect.attrib['height'])*2};'
print(f'{style_value=}')
el = ET.Element('mxCell',{'value':'', 'style':style_value, "vertex":"1", "parent":"1"})
ET.SubElement(el,'mxGeometry',geometry_attribute)
write_root.append(el)
# write_tree.write('../../Vscode/export.xml')
geometry_attribute = {'x': '32', 'y': '20', 'width': '75', 'height': '30', 'as': 'geometry'}
style_value='html=1;fillColor=none;rounded=1;arcSize=60;'
geometry_attribute = {'x': '32', 'y': '220', 'width': '75', 'height': '30', 'as': 'geometry'}
style_value='html=1;fillColor=none;rounded=1;arcSize=60;'
geometry_attribute = {'x': '20', 'y': '150', 'width': '100', 'height': '30', 'as': 'geometry'}
style_value='html=1;fillColor=none;'
geometry_attribute = {'x': '140', 'y': '150', 'width': '100', 'height': '30', 'as': 'geometry'}
style_value='html=1;fillColor=none;'
将 text 标签改换为 Drawio 中的文件
若指定文本宽和高,文本会偏右下方,若不指定文本还将就在框内
最好是通过座标判断文本在哪个框内,直接修改对应框的 value 属性值
# 读取 svg 文件中所有的 polygon 标签
import xml.etree.ElementTree as ET
tree = ET.parse(r'../../VsCode/drawio/AutoFlowchart_Test001.svg')
root = tree.getroot()
for polygon in root.findall('./{http://www.w3.org/2000/svg}polygon'):
print(polygon.attrib['points'])
x = [int(v.strip()) for i,v in enumerate(polygon.attrib['points'].split(',')) if int(i)%2 == 0]
print(x)
y = [int(v.strip()) for i,v in enumerate(polygon.attrib['points'].split(',')) if int(i)%2 == 1]
print(y)
print(min(x),max(x))
print(min(y),max(y))
mxCell_attribute = {}
mxCell_attribute['value'] = ''
mxCell_attribute['style'] = 'shape=rhombus;'
mxCell_attribute['vertex'] = "1"
mxCell_attribute['parent'] = "1"
mxCell_attribute['style'] += 'fillColor=none;'
print(mxCell_attribute)
geometry_attribute = {}
geometry_attribute['x'] = f'{min(x)}'
geometry_attribute['y'] = f'{min(y)}'
geometry_attribute['width'] = f'{max(x)-min(x)}'
geometry_attribute['height'] = f'{max(y)-min(y)}'
geometry_attribute['as'] = 'geometry'
print(geometry_attribute)
el = ET.Element("mxCell",mxCell_attribute)
ET.SubElement(el,'mxGeometry',geometry_attribute)
write_root.append(el)
# write_tree.write('../../Vscode/export.xml')
70,70,120,100,70,130,20,100
[70, 120, 70, 20]
[70, 100, 130, 100]
20 120
70 130
{'value': '', 'style': 'shape=rhombus;fillColor=none;', 'vertex': '1', 'parent': '1'}
{'x': '20', 'y': '70', 'width': '100', 'height': '60', 'as': 'geometry'}
# 读取 svg 文件中所有的 text 标签
import xml.etree.ElementTree as ET
import html
tree = ET.parse(r'../../VsCode/drawio/AutoFlowchart_Test001.svg')
root = tree.getroot()
# 新方法是将 svg 中 text 文件添加了 drawio 元素的 value 属性中去
# 该方法要求把所有框图元素绘制完再计算text的所属元素
def add_text_to_rect(x:int,y:int,text:str):
for mxCell in write_root.findall('./mxCell'):
geometry = mxCell.find('mxGeometry')
if (geometry != None and
'x' in geometry.attrib.keys() and
'y' in geometry.attrib.keys() and
'width' in geometry.attrib.keys() and
'height' in geometry.attrib.keys()):
# print(f'mxCell_mxGeometry:{geometry.attrib}')
rect_x = int(geometry.attrib['x'].strip())
rect_y = int(geometry.attrib['y'].strip())
rect_width = int(geometry.attrib['width'].strip())
rect_height = int(geometry.attrib['height'].strip())
if (rect_x < x < (rect_x + rect_width) and
rect_y < y < (rect_y + rect_height)):
mxCell.set('value', text)
print(f'成功添加文本:{text}')
return
print(f'未解析的:{text}在({x},{y})')
for text in root.findall('./{http://www.w3.org/2000/svg}text'):
add_text_to_rect(int(text.attrib['x'].strip()),int(text.attrib['y'].strip()),html.escape(text.text,False))
# for text in root.findall('./{http://www.w3.org/2000/svg}text'):
# print(text.attrib)
# print(text.text)
# mxCell_attribute = {}
# mxCell_attribute["value"] = html.escape(text.text)
# mxCell_attribute["style"] = "shape=text;html=1;align=center;verticalAlign=middle;strokeColor=none;fillColor=none;"
# mxCell_attribute['vertex'] = "1"
# mxCell_attribute['parent'] = "1"
# print(f'{mxCell_attribute = }')
# geometry_attribute = {}
# for item in "x y".split():
# geometry_attribute[item] = text.attrib[item]
# # geometry_attribute['width'] = '60'
# # geometry_attribute['height'] = '30'
# geometry_attribute['as'] = 'geometry'
# print(f'{geometry_attribute = }')
# el = ET.Element("mxCell",mxCell_attribute)
# ET.SubElement(el,"mxGeometry",geometry_attribute)
# write_root.append(el)
# write_tree.write('../../Vscode/export.xml')
成功添加文本:START
成功添加文本:END
成功添加文本:(xTask == TaskHandleVoltage)
成功添加文本:Code
成功添加文本:Code
# 处理 svg 中的连线
tree = ET.parse(r'../../VsCode/drawio/AutoFlowchart_Test001.svg')
root = tree.getroot()
for polyline in root.findall('./{http://www.w3.org/2000/svg}polyline'):
points = [i.strip() for i in polyline.attrib['points'].split(',')]
mxCell_attribute = {}
mxCell_attribute['value'] = ""
mxCell_attribute['style'] = "rounded=0;"
mxCell_attribute['style'] += "endArrow=none;"
mxCell_attribute['edge'] = "1"
mxCell_attribute['parent'] = "1"
# print(mxCell_attribute)
geometry_attribute = {}
geometry_attribute['width'] = "80"
geometry_attribute['relative'] = "1"
geometry_attribute['as'] = 'geometry'
# print(geometry_attribute)
sourcePoint_attribute = {}
sourcePoint_attribute['x'] = points[0]
sourcePoint_attribute['y'] = points[1]
sourcePoint_attribute['as'] = 'sourcePoint'
# print(sourcePoint_attribute)
targetPoint_attribute = {}
targetPoint_attribute['x'] = points[-2]
targetPoint_attribute['y'] = points[-1]
targetPoint_attribute['as'] = 'targetPoint'
# print(targetPoint_attribute)
points = points[2:-2]
# print(points)
el_geometry = ET.Element('mxGeometry',geometry_attribute)
el_geometry.append(ET.Element("mxPoint",sourcePoint_attribute))
el_geometry.append(ET.Element("mxPoint",targetPoint_attribute))
if points:
el_array = ET.Element("Array",{'as':'points'})
while points:
ET.SubElement(el_array,"mxPoint",{'x':f'{points[0]}','y':f'{points[1]}'})
points = points[2:]
el_geometry.append(el_array)
print(f'{el_geometry.tag = }; {el_geometry.attrib = }; {len(el_geometry)}')
el = ET.Element("mxCell",mxCell_attribute)
el.append(el_geometry)
# ET.SubElement(el,"mxGeometry",geometry_attribute)
write_root.append(el)
write_tree.write('../../Vscode/drawio/export.xml')
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 3
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 3
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
基本功能已实现,文件可以进一步根据点位置来直接修改value属性值
5.3.2. 2. AutoFlochart 导出的 xml 文件转 Drawio#
待完成…