5. Python 操作 xml 文件#

xml 文件的详细说明请参阅: XML (可扩展标记语言)

5.1. 读取 xml 文件#

5.1.1. 读取 xml 的根接点的标签名及属性。#

示例文件 country_data.xml 内容如下

<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>

其根接点名称为: data, 根接点无其它属性。

# 生成示例文件

example_string = """
<?xml version="1.0"?>
<data>
    <country name="Liechtenstein">
        <rank>1</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E"/>
        <neighbor name="Switzerland" direction="W"/>
    </country>
    <country name="Singapore">
        <rank>4</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N"/>
    </country>
    <country name="Panama">
        <rank>68</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W"/>
        <neighbor name="Colombia" direction="E"/>
    </country>
</data>
"""

# 文件或字符串开头必须是<?xml 标识, 若有空行会报 PaserError 错误
example_string = example_string.strip()

with open('country_data.xml','w+') as f:
    f.write(example_string)
# 解析根接点
import xml.etree.ElementTree as ET
try:
    tree = ET.parse(r'./country_data.xml')
except ET.ParseError as e:
    tree = ET.fromstring(example_string)

root = tree.getroot()

print(f'{root.tag =}')
print(f'{root.attrib =}')
root.tag ='data'
root.attrib ={}

5.1.2. 读取示例文件中根结点下子接点名称及属性#

示例文件中根接点(data)下有3个子接点,每个子接点的标签名都为 country , 每个子接点都有个 name 的属性名,该属性值对应不同的地名

  • 通过循环访问

for child in root:
    print(f'{child.tag = }; {child.attrib = }')
child.tag = 'country'; child.attrib = {'name': 'Liechtenstein'}
child.tag = 'country'; child.attrib = {'name': 'Singapore'}
child.tag = 'country'; child.attrib = {'name': 'Panama'}
  • 通过索引访问

print(f'{root[0] = }')
print(f'{root[0].tag = }, {root[0].attrib = }')
print(f'{root[0][0] = }')
print(f'{root[0][0].tag = }')
print(f'{root[0][0].text = }')
print(f'{root[0][3] = }')
print(f'{root[0][3].attrib = }')
root[0] = <Element 'country' at 0x7fc38afd5ad0>
root[0].tag = 'country', root[0].attrib = {'name': 'Liechtenstein'}
root[0][0] = <Element 'rank' at 0x7fc38afd6d40>
root[0][0].tag = 'rank'
root[0][0].text = '1'
root[0][3] = <Element 'neighbor' at 0x7fc38afd6de0>
root[0][3].attrib = {'name': 'Austria', 'direction': 'E'}
  • 通过 Elementiter() 访问, 它可以访问子级, 子级的子级,中某个特定的 tag

for neighbor in root.iter('neighbor'):
    print(f'{neighbor.tag = }; {neighbor.attrib = }')
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Austria', 'direction': 'E'}
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Switzerland', 'direction': 'W'}
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Malaysia', 'direction': 'N'}
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Costa Rica', 'direction': 'W'}
neighbor.tag = 'neighbor'; neighbor.attrib = {'name': 'Colombia', 'direction': 'E'}
# 获取接点的属性列表
print(f'{root[0][1].items() = }')
print(f'{root[0][3].items() = }')
root[0][1].items() = []
root[0][3].items() = [('name', 'Austria'), ('direction', 'E')]
  • Element.findall() 仅查找当前元素的直接子元素中带有指定标签的元素。

  • Element.find() 找带有特定标签的第一个子级, 然后可以用 Element.text 访问元素的文本内容。 Element.get() 访问元素的属性

for country in root.findall('country'):
    rank = country.find('rank').text
    name = country.get('name')
    print(name,rank)
Liechtenstein 1
Singapore 4
Panama 68
  • 通过 XPath 来定位树中元素

# 查找所有根元素
print(f'{root.findall(".")}')
# 查找所有根元素下所有 country 子元素下的 neighbor 子元素
print(f'{root.findall("./country/neighbor")}')
# 查找 name='Singapore' 且有 'year' 子项的接点
print(f'{root.findall(".//year/..[@name='Singapore']")}')
# 查找 name='Singapore' 的 'year' 子接点
print(f'{root.findall(".//*[@name='Singapore']/year")}')
# 查找包含 2 个 neighbor 元素的接点中的第2个 neighbor 接点
print(f'{root.findall('.//neighbor[2]')}')
print(f'{[i.get('name') for i in root.findall('.//neighbor[1]')]}')
# 查找包含 2 个 neighbor 元素的接点
print(f'{root.findall(".//neighbor[2]/..")}')
print(f'{[i.get('name') for i in root.findall(".//neighbor[2]/..")]}')
  Cell In[8], line 6
    print(f'{root.findall(".//year/..[@name='Singapore']")}')
                                             ^
SyntaxError: f-string: unterminated string

5.2. 更新 xml 文件#

  1. ElementTree.write() 写文件方法

  2. Element.text 修改文体字段

  3. Element.set() 方法添加和修改属性

  4. Element.append() 添加新的子元素

  5. Element.remove() 删除元素

import tempfile

for rank in root.findall('.//rank'):
    new_rank = int(rank.text) + 1
    rank.text = str(new_rank)
    rank.set('updated','yes')

tree.write('output.xml')

with open('output.xml') as f:
    for line in f.readlines():
        print(line.rstrip().rstrip('\n').rstrip('\r'))

import os
os.remove('output.xml')
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E" />
        <neighbor name="Switzerland" direction="W" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N" />
    </country>
    <country name="Panama">
        <rank updated="yes">69</rank>
        <year>2011</year>
        <gdppc>13600</gdppc>
        <neighbor name="Costa Rica" direction="W" />
        <neighbor name="Colombia" direction="E" />
    </country>
</data>
for country in root.findall('./country'):
    rank = int(country.find('rank').text)
    if rank > 50:
        root.remove(country)

tree.write('output.xml')

with open('output.xml') as f:
    for line in f.readlines():
        print(line.rstrip().rstrip('\n').rstrip('\r'))

import os
os.remove('output.xml')
<data>
    <country name="Liechtenstein">
        <rank updated="yes">2</rank>
        <year>2008</year>
        <gdppc>141100</gdppc>
        <neighbor name="Austria" direction="E" />
        <neighbor name="Switzerland" direction="W" />
    </country>
    <country name="Singapore">
        <rank updated="yes">5</rank>
        <year>2011</year>
        <gdppc>59900</gdppc>
        <neighbor name="Malaysia" direction="N" />
    </country>
    </data>
# 删除示例文件
import os
os.remove("country_data.xml")

5.3. AutoFlochart 转 Drawio#

5.3.1. 1. AutoFlochart 导出的 svg 转 Drawio#

先创建一个 xml 的写入树

# 创建 drawio 空文件

import xml.etree.ElementTree as ET

drawio_string = '''
<mxfile>
    <diagram >
        <mxGraphModel >
            <root>
                <mxCell id="0"/>
                <mxCell id="1" parent="0"/>
            </root>
        </mxGraphModel>
    </diagram>
</mxfile>
'''.strip()

# tree = ET.fromstring(drawio_string)
# ET.ElementTree(tree).write('../../VsCode/export.xml')

write_tree = ET.ElementTree(ET.fromstring(drawio_string))
write_root = write_tree.getroot().find('./diagram/mxGraphModel/root')
print(f'{write_root.tag} 标签下有: {len(write_root)} 个子元素')

# el = ET.Element('mxCell',{'style':""})
# ET.SubElement(el,'mxGeometry')

# write_root.append(el)


# write_tree.write('../../Vscode/export.xml')
root 标签下有: 2 个子元素
  • rec 转为 drawio 中的矩形

# 读取 svg 文件所的 rect 标签

import xml.etree.ElementTree as ET

tree = ET.parse(r'../../VsCode/drawio/AutoFlowchart_Test001.svg')
root = tree.getroot()

# print(root.tag)
# print(root.attrib)
# print(len(root))
# print(root[1].tag)
# print(root.find('{http://www.w3.org/2000/svg}rect'))
# print(root.findall('./{http://www.w3.org/2000/svg}rect'))

for rect in root.findall('./{http://www.w3.org/2000/svg}rect'):
    # print(rect.attrib)

    geometry_attribute  = {}
    for item in "x y width height".split():
        geometry_attribute[item] = rect.attrib[item]
    geometry_attribute['as'] = 'geometry'
    print(f'{geometry_attribute = }')

    style_value = "html=1;"
    if 'fill' in rect.attrib.keys():
        style_value += f'fillColor={rect.attrib['fill']};'
    if 'rx' in rect.attrib.keys():
        style_value += f'rounded=1;arcSize={int(rect.attrib['height'])*2};'
    print(f'{style_value=}')
    
    el = ET.Element('mxCell',{'value':'', 'style':style_value, "vertex":"1", "parent":"1"})
    ET.SubElement(el,'mxGeometry',geometry_attribute)

    write_root.append(el)

# write_tree.write('../../Vscode/export.xml')
geometry_attribute = {'x': '32', 'y': '20', 'width': '75', 'height': '30', 'as': 'geometry'}
style_value='html=1;fillColor=none;rounded=1;arcSize=60;'
geometry_attribute = {'x': '32', 'y': '220', 'width': '75', 'height': '30', 'as': 'geometry'}
style_value='html=1;fillColor=none;rounded=1;arcSize=60;'
geometry_attribute = {'x': '20', 'y': '150', 'width': '100', 'height': '30', 'as': 'geometry'}
style_value='html=1;fillColor=none;'
geometry_attribute = {'x': '140', 'y': '150', 'width': '100', 'height': '30', 'as': 'geometry'}
style_value='html=1;fillColor=none;'
  • 将 text 标签改换为 Drawio 中的文件

    若指定文本宽和高,文本会偏右下方,若不指定文本还将就在框内

    最好是通过座标判断文本在哪个框内,直接修改对应框的 value 属性值

# 读取 svg 文件中所有的 polygon 标签

import xml.etree.ElementTree as ET

tree = ET.parse(r'../../VsCode/drawio/AutoFlowchart_Test001.svg')
root = tree.getroot()

for polygon in root.findall('./{http://www.w3.org/2000/svg}polygon'):
    print(polygon.attrib['points'])

    x = [int(v.strip()) for i,v in enumerate(polygon.attrib['points'].split(',')) if int(i)%2 == 0]
    print(x)
    y =  [int(v.strip()) for i,v in enumerate(polygon.attrib['points'].split(',')) if int(i)%2 == 1]
    print(y)

    print(min(x),max(x))
    print(min(y),max(y))

    mxCell_attribute = {}
    mxCell_attribute['value'] = ''
    mxCell_attribute['style'] = 'shape=rhombus;'
    mxCell_attribute['vertex'] = "1"
    mxCell_attribute['parent'] = "1"
    mxCell_attribute['style'] += 'fillColor=none;'
    print(mxCell_attribute)

    geometry_attribute = {}
    geometry_attribute['x'] = f'{min(x)}'
    geometry_attribute['y'] = f'{min(y)}'
    geometry_attribute['width'] = f'{max(x)-min(x)}'
    geometry_attribute['height'] = f'{max(y)-min(y)}'
    geometry_attribute['as'] = 'geometry'
    print(geometry_attribute)

    el = ET.Element("mxCell",mxCell_attribute)
    ET.SubElement(el,'mxGeometry',geometry_attribute)

    write_root.append(el)

# write_tree.write('../../Vscode/export.xml')
70,70,120,100,70,130,20,100
[70, 120, 70, 20]
[70, 100, 130, 100]
20 120
70 130
{'value': '', 'style': 'shape=rhombus;fillColor=none;', 'vertex': '1', 'parent': '1'}
{'x': '20', 'y': '70', 'width': '100', 'height': '60', 'as': 'geometry'}
# 读取 svg 文件中所有的 text 标签

import xml.etree.ElementTree as ET
import html

tree = ET.parse(r'../../VsCode/drawio/AutoFlowchart_Test001.svg')
root = tree.getroot()

# 新方法是将 svg 中 text 文件添加了 drawio 元素的 value 属性中去
# 该方法要求把所有框图元素绘制完再计算text的所属元素

def add_text_to_rect(x:int,y:int,text:str):
    for mxCell in write_root.findall('./mxCell'):
        geometry = mxCell.find('mxGeometry')
        if (geometry != None and 
            'x' in geometry.attrib.keys() and
            'y' in geometry.attrib.keys() and
            'width' in geometry.attrib.keys() and
            'height' in geometry.attrib.keys()):
            # print(f'mxCell_mxGeometry:{geometry.attrib}')
            rect_x = int(geometry.attrib['x'].strip())
            rect_y = int(geometry.attrib['y'].strip())
            rect_width = int(geometry.attrib['width'].strip())
            rect_height = int(geometry.attrib['height'].strip())

            if (rect_x < x < (rect_x + rect_width) and
                rect_y < y < (rect_y + rect_height)):
                mxCell.set('value', text)
                print(f'成功添加文本:{text}')
                return
            
    print(f'未解析的:{text}在({x},{y})')

for text in root.findall('./{http://www.w3.org/2000/svg}text'):
    add_text_to_rect(int(text.attrib['x'].strip()),int(text.attrib['y'].strip()),html.escape(text.text,False))

    
# for text in root.findall('./{http://www.w3.org/2000/svg}text'):
#     print(text.attrib)
#     print(text.text)

#     mxCell_attribute = {}
#     mxCell_attribute["value"] = html.escape(text.text)
#     mxCell_attribute["style"] = "shape=text;html=1;align=center;verticalAlign=middle;strokeColor=none;fillColor=none;"
#     mxCell_attribute['vertex'] = "1"
#     mxCell_attribute['parent'] = "1"
#     print(f'{mxCell_attribute = }')

#     geometry_attribute = {}
#     for item in "x y".split():
#         geometry_attribute[item] = text.attrib[item]
    
#     # geometry_attribute['width'] = '60'
#     # geometry_attribute['height'] = '30'
#     geometry_attribute['as'] = 'geometry'
#     print(f'{geometry_attribute = }')

#     el = ET.Element("mxCell",mxCell_attribute)
#     ET.SubElement(el,"mxGeometry",geometry_attribute)

#     write_root.append(el)

# write_tree.write('../../Vscode/export.xml')
成功添加文本:START
成功添加文本:END
成功添加文本:(xTask == TaskHandleVoltage)
成功添加文本:Code
成功添加文本:Code
# 处理 svg 中的连线

tree = ET.parse(r'../../VsCode/drawio/AutoFlowchart_Test001.svg')
root = tree.getroot()

for polyline in root.findall('./{http://www.w3.org/2000/svg}polyline'):
    points = [i.strip() for i in polyline.attrib['points'].split(',')]

    mxCell_attribute = {}
    mxCell_attribute['value'] = ""
    mxCell_attribute['style'] = "rounded=0;"
    mxCell_attribute['style'] += "endArrow=none;"
    mxCell_attribute['edge'] = "1"
    mxCell_attribute['parent'] = "1"
    # print(mxCell_attribute)

    geometry_attribute = {}
    geometry_attribute['width'] = "80"
    geometry_attribute['relative'] = "1"
    geometry_attribute['as'] = 'geometry'
    # print(geometry_attribute)

    sourcePoint_attribute = {}
    sourcePoint_attribute['x'] = points[0]
    sourcePoint_attribute['y'] = points[1]
    sourcePoint_attribute['as'] = 'sourcePoint'
    # print(sourcePoint_attribute)

    targetPoint_attribute = {}
    targetPoint_attribute['x'] = points[-2]
    targetPoint_attribute['y'] = points[-1]
    targetPoint_attribute['as'] = 'targetPoint'
    # print(targetPoint_attribute)

    points = points[2:-2]
    # print(points)

    el_geometry = ET.Element('mxGeometry',geometry_attribute)
    el_geometry.append(ET.Element("mxPoint",sourcePoint_attribute))
    el_geometry.append(ET.Element("mxPoint",targetPoint_attribute))
    if points:
        el_array = ET.Element("Array",{'as':'points'})
        while points:
            ET.SubElement(el_array,"mxPoint",{'x':f'{points[0]}','y':f'{points[1]}'})
            points = points[2:]
        el_geometry.append(el_array)
    print(f'{el_geometry.tag = }; {el_geometry.attrib = }; {len(el_geometry)}')
    

    el = ET.Element("mxCell",mxCell_attribute)
    el.append(el_geometry)
    # ET.SubElement(el,"mxGeometry",geometry_attribute)


    write_root.append(el)

write_tree.write('../../Vscode/drawio/export.xml')
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 3
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 3
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2
el_geometry.tag = 'mxGeometry'; el_geometry.attrib = {'width': '80', 'relative': '1', 'as': 'geometry'}; 2

基本功能已实现,文件可以进一步根据点位置来直接修改value属性值

5.3.2. 2. AutoFlochart 导出的 xml 文件转 Drawio#

待完成…