updated code to work on linux

2025-08-03 12:01:27 +00:00 · 2020-05-23 16:36:02 +00:00
parent a7d79dd3e3
commit 305ca556a1
1 changed files with 11 additions and 8 deletions
--- a/grad.py
+++ b/grad.py
@@ -5,9 +5,9 @@ import pandas as pd
 import swifter
 import glob

-pytesseract.pytesseract.tesseract_cmd = (
-    r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
-)
+# pytesseract.pytesseract.tesseract_cmd = (
+#     r"C:/Program Files/Tesseract-OCR/tesseract.exe"
+# )

 # Coords of where names appear on screen
 t = 830
@@ -16,13 +16,14 @@ b = 945
 l = 530
 crop_coords = (l, t, r, b)

-frames_path = "C:\\Coding\\grad\\frames2"
-f_paths = glob.glob(frames_path + "\\*jpg")
+frames_path = "../frames"
+f_paths = glob.glob(frames_path + "/*jpg")[10000:10100]
+
 print("Frames loaded:", len(f_paths))
 # Make dataframe with frame number as index and frame_path as a column
 df = pd.DataFrame(
    {"frame_path": f_paths},
-    index=[int(f_path.split("\\")[-1][:-4]) - 1 for f_path in f_paths],
+    index=[int(f_path.split("/")[-1][:-4]) - 1 for f_path in f_paths],
 )


@@ -30,7 +31,7 @@ df = df.reset_index(drop=True)


 def im_str(im_path):
-    # im = f"C:\\Coding\\grad\\frames2\\{im_name}.jpg"
+    # im = f"C:/Coding/grad/frames2/{im_name}.jpg"
    im = Image.open(im_path)
    im = im.crop(crop_coords)
    return pytesseract.image_to_string(im)
@@ -38,6 +39,8 @@ def im_str(im_path):

 df["text"] = df["frame_path"].swifter.apply(im_str)

+# df = df[df["text"] != ""]
+
 print(df)

-df.to_feather("C:\\Coding\\grad\\out\\full.feather")
+df.to_feather("./out/full.feather")