mirror of
https://gitlab.com/MisterBiggs/grad.git
synced 2025-06-15 14:26:39 +00:00
47 lines
955 B
Python
47 lines
955 B
Python
from PIL import Image
|
|
import pytesseract
|
|
import pandas as pd
|
|
|
|
import swifter
|
|
import glob
|
|
|
|
# pytesseract.pytesseract.tesseract_cmd = (
|
|
# r"C:/Program Files/Tesseract-OCR/tesseract.exe"
|
|
# )
|
|
|
|
# Coords of where names appear on screen
|
|
t = 830
|
|
r = 1400
|
|
b = 945
|
|
l = 530
|
|
crop_coords = (l, t, r, b)
|
|
|
|
frames_path = "../frames"
|
|
f_paths = glob.glob(frames_path + "/*0.jpg")
|
|
|
|
print("Frames loaded:", len(f_paths))
|
|
# Make dataframe with frame number as index and frame_path as a column
|
|
df = pd.DataFrame(
|
|
{"frame_path": f_paths},
|
|
index=[int(f_path.split("/")[-1][:-4]) - 1 for f_path in f_paths],
|
|
)
|
|
|
|
|
|
df = df.reset_index(drop=True)
|
|
|
|
|
|
def im_str(im_path):
|
|
# im = f"C:/Coding/grad/frames2/{im_name}.jpg"
|
|
im = Image.open(im_path)
|
|
im = im.crop(crop_coords)
|
|
return pytesseract.image_to_string(im, config="--psm 7")
|
|
|
|
|
|
df["text"] = df["frame_path"].swifter.apply(im_str)
|
|
|
|
# df = df[df["text"] != ""]
|
|
|
|
print(df)
|
|
|
|
df.to_feather("./out/full.feather")
|