Use case : Search through all the directories and files in the local for a match of a particular text inside the xml. Create a csv of the file name and matching text.
2024-05-21
9 reads
Use case : Search through all the directories and files in the local for a match of a particular text inside the xml. Create a csv of the file name and matching text.
import xml.etree.ElementTree as ET
import glob
import pandas as pd
answer = []
prefix = "~./folder/filename.xml"
files = glob.glob(f"{prefix}*/fields/*.xml")
text = 'funnyText'
nodeName = 'parse'
for f in files:
object = f.split("/")[4]
field = f.split("/")[6].split(".")[0]
tree = ET.parse(f)
root = tree.getroot()
for a in root:
tag = a.tag.split("}")[-1]
if tag == nodeName and a.text == text:
element = [f, tag, a.text]
answer.append(element)
df = pd.DataFrame(answer, columns=["filename", "tag name", "reference"])
print(df)
df.sort_values(by=["filename", "tag name", "reference"], inplace=True)
df.to_csv("~/Downloads/extract.csv", index=False)