1
0
mirror of https://gitlab.com/MisterBiggs/grad.git synced 2025-08-05 13:01:28 +00:00

updated code to work on linux

This commit is contained in:
2020-05-23 16:36:02 +00:00
parent a7d79dd3e3
commit 305ca556a1

19
grad.py
View File

@@ -5,9 +5,9 @@ import pandas as pd
import swifter import swifter
import glob import glob
pytesseract.pytesseract.tesseract_cmd = ( # pytesseract.pytesseract.tesseract_cmd = (
r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe" # r"C:/Program Files/Tesseract-OCR/tesseract.exe"
) # )
# Coords of where names appear on screen # Coords of where names appear on screen
t = 830 t = 830
@@ -16,13 +16,14 @@ b = 945
l = 530 l = 530
crop_coords = (l, t, r, b) crop_coords = (l, t, r, b)
frames_path = "C:\\Coding\\grad\\frames2" frames_path = "../frames"
f_paths = glob.glob(frames_path + "\\*jpg") f_paths = glob.glob(frames_path + "/*jpg")[10000:10100]
print("Frames loaded:", len(f_paths)) print("Frames loaded:", len(f_paths))
# Make dataframe with frame number as index and frame_path as a column # Make dataframe with frame number as index and frame_path as a column
df = pd.DataFrame( df = pd.DataFrame(
{"frame_path": f_paths}, {"frame_path": f_paths},
index=[int(f_path.split("\\")[-1][:-4]) - 1 for f_path in f_paths], index=[int(f_path.split("/")[-1][:-4]) - 1 for f_path in f_paths],
) )
@@ -30,7 +31,7 @@ df = df.reset_index(drop=True)
def im_str(im_path): def im_str(im_path):
# im = f"C:\\Coding\\grad\\frames2\\{im_name}.jpg" # im = f"C:/Coding/grad/frames2/{im_name}.jpg"
im = Image.open(im_path) im = Image.open(im_path)
im = im.crop(crop_coords) im = im.crop(crop_coords)
return pytesseract.image_to_string(im) return pytesseract.image_to_string(im)
@@ -38,6 +39,8 @@ def im_str(im_path):
df["text"] = df["frame_path"].swifter.apply(im_str) df["text"] = df["frame_path"].swifter.apply(im_str)
# df = df[df["text"] != ""]
print(df) print(df)
df.to_feather("C:\\Coding\\grad\\out\\full.feather") df.to_feather("./out/full.feather")