Technical Article

Python script to search through folders and files for a particular text in XML

,

Use case : Search through all the directories and files in the local for a match of a particular text inside the xml. Create a csv of the file name and matching text.

import xml.etree.ElementTree as ET
import glob
import pandas as pd

answer = []
prefix = "~./folder/filename.xml"
files = glob.glob(f"{prefix}*/fields/*.xml")

text = 'funnyText'
nodeName = 'parse'

for f in files:
object = f.split("/")[4]
field = f.split("/")[6].split(".")[0]
tree = ET.parse(f)
root = tree.getroot()
for a in root:
tag = a.tag.split("}")[-1]
if tag == nodeName and a.text == text:
element = [f, tag, a.text]
answer.append(element)

df = pd.DataFrame(answer, columns=["filename", "tag name", "reference"])
print(df)
df.sort_values(by=["filename", "tag name", "reference"], inplace=True)
df.to_csv("~/Downloads/extract.csv", index=False)

Rate

5 (1)

You rated this post out of 5. Change rating

Share

Share

Rate

5 (1)

You rated this post out of 5. Change rating