from PIL import Image import pytesseract import pandas as pd import swifter import glob # pytesseract.pytesseract.tesseract_cmd = ( # r"C:/Program Files/Tesseract-OCR/tesseract.exe" # ) # Coords of where names appear on screen t = 830 r = 1400 b = 945 l = 530 crop_coords = (l, t, r, b) frames_path = "../frames" f_paths = glob.glob(frames_path + "/*jpg") print("Frames loaded:", len(f_paths)) # Make dataframe with frame number as index and frame_path as a column df = pd.DataFrame( {"frame_path": f_paths}, index=[int(f_path.split("/")[-1][:-4]) - 1 for f_path in f_paths], ) df = df.reset_index(drop=True) def im_str(im_path): # im = f"C:/Coding/grad/frames2/{im_name}.jpg" im = Image.open(im_path) im = im.crop(crop_coords) return pytesseract.image_to_string(im) df["text"] = df["frame_path"].swifter.apply(im_str) # df = df[df["text"] != ""] print(df) df.to_feather("./out/full.feather")