from PIL import Image import pytesseract import pandas as pd import swifter import glob pytesseract.pytesseract.tesseract_cmd = ( r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe" ) # Coords of where names appear on screen t = 830 r = 1400 b = 945 l = 530 crop_coords = (l, t, r, b) frames_path = "C:\\Coding\\grad\\frames2" f_paths = glob.glob(frames_path + "\\*jpg") print("Frames loaded:", len(f_paths)) # Make dataframe with frame number as index and frame_path as a column df = pd.DataFrame( {"frame_path": f_paths}, index=[int(f_path.split("\\")[-1][:-4]) - 1 for f_path in f_paths], ) df = df.reset_index(drop=True) def im_str(im_path): # im = f"C:\\Coding\\grad\\frames2\\{im_name}.jpg" im = Image.open(im_path) im = im.crop(crop_coords) return pytesseract.image_to_string(im) df["text"] = df["frame_path"].swifter.apply(im_str) print(df) df.to_feather("C:\\Coding\\grad\\out\\full.feather")