from PIL import Image
import pytesseract
import pandas as pd

import swifter
import glob

pytesseract.pytesseract.tesseract_cmd = (
    r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
)

# Coords of where names appear on screen
t = 830
r = 1400
b = 945
l = 530
crop_coords = (l, t, r, b)

frames_path = "C:\\Coding\\grad\\frames2"
f_paths = glob.glob(frames_path + "\\*jpg")
print("Frames loaded:", len(f_paths))
# Make dataframe with frame number as index and frame_path as a column
df = pd.DataFrame(
    {"frame_path": f_paths},
    index=[int(f_path.split("\\")[-1][:-4]) - 1 for f_path in f_paths],
)


df = df.reset_index(drop=True)


def im_str(im_path):
    # im = f"C:\\Coding\\grad\\frames2\\{im_name}.jpg"
    im = Image.open(im_path)
    im = im.crop(crop_coords)
    return pytesseract.image_to_string(im)


df["text"] = df["frame_path"].swifter.apply(im_str)

print(df)

df.to_feather("C:\\Coding\\grad\\out\\full.feather")