84 lines
3.0 KiB
Python
84 lines
3.0 KiB
Python
![]() |
import xml.etree.ElementTree as ET
|
||
|
import json
|
||
|
|
||
|
def parse_kegg_xml_with_group(xml_file):
|
||
|
tree = ET.parse(xml_file)
|
||
|
root = tree.getroot()
|
||
|
|
||
|
pathway_info = {
|
||
|
"name": root.attrib.get("name"),
|
||
|
"org": root.attrib.get("org"),
|
||
|
"number": root.attrib.get("number"),
|
||
|
"title": root.attrib.get("title"),
|
||
|
"image": root.attrib.get("image"),
|
||
|
"link": root.attrib.get("link"),
|
||
|
"entries": [],
|
||
|
"relations": [],
|
||
|
"reactions": []
|
||
|
}
|
||
|
|
||
|
for entry in root.findall("entry"):
|
||
|
entry_id = int(entry.attrib.get("id"))
|
||
|
entry_group = str((entry_id // 10000) * 10000) if entry_id >= 10000 else None
|
||
|
graphics = entry.find("graphics")
|
||
|
entry_data = {
|
||
|
"id": entry_id,
|
||
|
"name": entry.attrib.get("name"),
|
||
|
"type": entry.attrib.get("type"),
|
||
|
"link": entry.attrib.get("link"),
|
||
|
"reaction": entry.attrib.get("reaction"),
|
||
|
"group": entry_group,
|
||
|
"graphics": {
|
||
|
"name": graphics.attrib.get("name"),
|
||
|
"fgcolor": graphics.attrib.get("fgcolor"),
|
||
|
"bgcolor": graphics.attrib.get("bgcolor"),
|
||
|
"type": graphics.attrib.get("type"),
|
||
|
"x": int(graphics.attrib.get("x")),
|
||
|
"y": int(graphics.attrib.get("y")),
|
||
|
"width": int(graphics.attrib.get("width")),
|
||
|
"height": int(graphics.attrib.get("height")),
|
||
|
}
|
||
|
}
|
||
|
pathway_info["entries"].append(entry_data)
|
||
|
|
||
|
for relation in root.findall("relation"):
|
||
|
rel = {
|
||
|
"entry1": int(relation.attrib.get("entry1")),
|
||
|
"entry2": int(relation.attrib.get("entry2")),
|
||
|
"type": relation.attrib.get("type"),
|
||
|
"subtypes": []
|
||
|
}
|
||
|
for subtype in relation.findall("subtype"):
|
||
|
rel["subtypes"].append({
|
||
|
"name": subtype.attrib.get("name"),
|
||
|
"value": subtype.attrib.get("value")
|
||
|
})
|
||
|
pathway_info["relations"].append(rel)
|
||
|
|
||
|
for reaction in root.findall("reaction"):
|
||
|
reac = {
|
||
|
"id": int(reaction.attrib.get("id")),
|
||
|
"name": reaction.attrib.get("name"),
|
||
|
"type": reaction.attrib.get("type"),
|
||
|
"substrates": [],
|
||
|
"products": []
|
||
|
}
|
||
|
for substrate in reaction.findall("substrate"):
|
||
|
reac["substrates"].append({
|
||
|
"id": int(substrate.attrib.get("id")),
|
||
|
"name": substrate.attrib.get("name")
|
||
|
})
|
||
|
for product in reaction.findall("product"):
|
||
|
reac["products"].append({
|
||
|
"id": int(product.attrib.get("id")),
|
||
|
"name": product.attrib.get("name")
|
||
|
})
|
||
|
pathway_info["reactions"].append(reac)
|
||
|
|
||
|
return pathway_info
|
||
|
|
||
|
# 사용 예:
|
||
|
result = parse_kegg_xml_with_group("expanded_pathway43200.xml")
|
||
|
with open("group43200.json", "w", encoding="utf-8") as f:
|
||
|
json.dump(result, f, ensure_ascii=False, indent=2)
|