import xml.etree.ElementTree as ET import json def parse_kegg_xml_with_group(xml_file): tree = ET.parse(xml_file) root = tree.getroot() pathway_info = { "name": root.attrib.get("name"), "org": root.attrib.get("org"), "number": root.attrib.get("number"), "title": root.attrib.get("title"), "image": root.attrib.get("image"), "link": root.attrib.get("link"), "entries": [], "relations": [], "reactions": [] } for entry in root.findall("entry"): entry_id = int(entry.attrib.get("id")) entry_group = str((entry_id // 10000) * 10000) if entry_id >= 10000 else None graphics = entry.find("graphics") entry_data = { "id": entry_id, "name": entry.attrib.get("name"), "type": entry.attrib.get("type"), "link": entry.attrib.get("link"), "reaction": entry.attrib.get("reaction"), "group": entry_group, "graphics": { "name": graphics.attrib.get("name"), "fgcolor": graphics.attrib.get("fgcolor"), "bgcolor": graphics.attrib.get("bgcolor"), "type": graphics.attrib.get("type"), "x": int(graphics.attrib.get("x")), "y": int(graphics.attrib.get("y")), "width": int(graphics.attrib.get("width")), "height": int(graphics.attrib.get("height")), } } pathway_info["entries"].append(entry_data) for relation in root.findall("relation"): rel = { "entry1": int(relation.attrib.get("entry1")), "entry2": int(relation.attrib.get("entry2")), "type": relation.attrib.get("type"), "subtypes": [] } for subtype in relation.findall("subtype"): rel["subtypes"].append({ "name": subtype.attrib.get("name"), "value": subtype.attrib.get("value") }) pathway_info["relations"].append(rel) for reaction in root.findall("reaction"): reac = { "id": int(reaction.attrib.get("id")), "name": reaction.attrib.get("name"), "type": reaction.attrib.get("type"), "substrates": [], "products": [] } for substrate in reaction.findall("substrate"): reac["substrates"].append({ "id": int(substrate.attrib.get("id")), "name": substrate.attrib.get("name") }) for product in reaction.findall("product"): reac["products"].append({ "id": int(product.attrib.get("id")), "name": product.attrib.get("name") }) pathway_info["reactions"].append(reac) return pathway_info # 사용 예: result = parse_kegg_xml_with_group("expanded_pathway43200.xml") with open("group43200.json", "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=2)