mirror of
https://gitlab.com/MisterBiggs/grad.git
synced 2025-06-15 22:36:39 +00:00
updated code to work on linux
This commit is contained in:
parent
a7d79dd3e3
commit
305ca556a1
19
grad.py
19
grad.py
@ -5,9 +5,9 @@ import pandas as pd
|
||||
import swifter
|
||||
import glob
|
||||
|
||||
pytesseract.pytesseract.tesseract_cmd = (
|
||||
r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
|
||||
)
|
||||
# pytesseract.pytesseract.tesseract_cmd = (
|
||||
# r"C:/Program Files/Tesseract-OCR/tesseract.exe"
|
||||
# )
|
||||
|
||||
# Coords of where names appear on screen
|
||||
t = 830
|
||||
@ -16,13 +16,14 @@ b = 945
|
||||
l = 530
|
||||
crop_coords = (l, t, r, b)
|
||||
|
||||
frames_path = "C:\\Coding\\grad\\frames2"
|
||||
f_paths = glob.glob(frames_path + "\\*jpg")
|
||||
frames_path = "../frames"
|
||||
f_paths = glob.glob(frames_path + "/*jpg")[10000:10100]
|
||||
|
||||
print("Frames loaded:", len(f_paths))
|
||||
# Make dataframe with frame number as index and frame_path as a column
|
||||
df = pd.DataFrame(
|
||||
{"frame_path": f_paths},
|
||||
index=[int(f_path.split("\\")[-1][:-4]) - 1 for f_path in f_paths],
|
||||
index=[int(f_path.split("/")[-1][:-4]) - 1 for f_path in f_paths],
|
||||
)
|
||||
|
||||
|
||||
@ -30,7 +31,7 @@ df = df.reset_index(drop=True)
|
||||
|
||||
|
||||
def im_str(im_path):
|
||||
# im = f"C:\\Coding\\grad\\frames2\\{im_name}.jpg"
|
||||
# im = f"C:/Coding/grad/frames2/{im_name}.jpg"
|
||||
im = Image.open(im_path)
|
||||
im = im.crop(crop_coords)
|
||||
return pytesseract.image_to_string(im)
|
||||
@ -38,6 +39,8 @@ def im_str(im_path):
|
||||
|
||||
df["text"] = df["frame_path"].swifter.apply(im_str)
|
||||
|
||||
# df = df[df["text"] != ""]
|
||||
|
||||
print(df)
|
||||
|
||||
df.to_feather("C:\\Coding\\grad\\out\\full.feather")
|
||||
df.to_feather("./out/full.feather")
|
||||
|
Loading…
x
Reference in New Issue
Block a user