Use case : Search through all the directories and files in the local for a match of a particular text inside the xml. Create a csv of the file name and matching text.
2024-05-21
3 reads
Use case : Search through all the directories and files in the local for a match of a particular text inside the xml. Create a csv of the file name and matching text.
import xml.etree.ElementTree as ET import glob import pandas as pd answer = [] prefix = "~./folder/filename.xml" files = glob.glob(f"{prefix}*/fields/*.xml") text = 'funnyText' nodeName = 'parse' for f in files: object = f.split("/")[4] field = f.split("/")[6].split(".")[0] tree = ET.parse(f) root = tree.getroot() for a in root: tag = a.tag.split("}")[-1] if tag == nodeName and a.text == text: element = [f, tag, a.text] answer.append(element) df = pd.DataFrame(answer, columns=["filename", "tag name", "reference"]) print(df) df.sort_values(by=["filename", "tag name", "reference"], inplace=True) df.to_csv("~/Downloads/extract.csv", index=False)