Spaces:

Bhashini-IITJ
/

IndicPhotoOCR

Paused

anikde commited on Feb 12

Commit

ba402b9

1 Parent(s): 1a662ac

added AUTO choose capability

Files changed (3) hide show

IndicPhotoOCR/ocr.py CHANGED Viewed

@@ -17,7 +17,7 @@ from IndicPhotoOCR.utils.helper import detect_para
 class OCR:
-    def __init__(self, device='cuda:0', verbose=False):
         # self.detect_model_checkpoint = detect_model_checkpoint
         self.device = device
         self.verbose = verbose
@@ -27,6 +27,7 @@ class OCR:
         self.recogniser = PARseqrecogniser()
         # self.identifier = CLIPidentifier()
         self.identifier = VIT_identifier()
     # def detect(self, image_path, detect_model_checkpoint=cfg.checkpoint):
     #     """Run the detection model to get bounding boxes of text areas."""
@@ -109,7 +110,7 @@ class OCR:
         # Predict script language, here we assume "hindi" as the model name
         if self.verbose:
             print("Identifying script for the cropped area...")
-        script_lang = self.identifier.identify(cropped_path, "hindi", self.device)  # Use "hindi" as the model name
         # print(script_lang)
         # Clean up temporary file

 class OCR:
+    def __init__(self, device='cuda:0', identifier_lang='hindi', verbose=False):
         # self.detect_model_checkpoint = detect_model_checkpoint
         self.device = device
         self.verbose = verbose
         self.recogniser = PARseqrecogniser()
         # self.identifier = CLIPidentifier()
         self.identifier = VIT_identifier()
+        self.indentifier_lang = identifier_lang
     # def detect(self, image_path, detect_model_checkpoint=cfg.checkpoint):
     #     """Run the detection model to get bounding boxes of text areas."""
         # Predict script language, here we assume "hindi" as the model name
         if self.verbose:
             print("Identifying script for the cropped area...")
+        script_lang = self.identifier.identify(cropped_path, self.indentifier_lang, self.device)  # Use "hindi" as the model name
         # print(script_lang)
         # Clean up temporary file

IndicPhotoOCR/recognition/parseq_recogniser.py CHANGED Viewed

@@ -37,6 +37,14 @@ model_info = {
         "path": "models/gujarati.ckpt",
         "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/gujarati.ckpt",
     },
     "marathi": {
         "path": "models/marathi.ckpt",
         "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/marathi.ckpt",
@@ -200,7 +208,7 @@ class PARseqrecogniser:
         Returns:
             str: The recognized text from the image.
         """
-        # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         if language != "english":
             model_path = self.ensure_model(checkpoint)

         "path": "models/gujarati.ckpt",
         "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/gujarati.ckpt",
     },
+    "kannada": {
+        "path": "models/kannada.ckpt",
+        "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/kannada.ckpt",
+    },
+    "malayalam": {
+        "path": "models/malayalam.ckpt",
+        "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/malayalam.ckpt",
+    },
     "marathi": {
         "path": "models/marathi.ckpt",
         "url" : "https://github.com/anikde/STocr/releases/download/V2.0.0/marathi.ckpt",
         Returns:
             str: The recognized text from the image.
         """
+        device = device
         if language != "english":
             model_path = self.ensure_model(checkpoint)

IndicPhotoOCR/script_identification/vit/vit_infer.py CHANGED Viewed

@@ -74,11 +74,16 @@ model_info = {
         "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/hindienglishtelugu.zip",
         "subcategories": ["hindi", "english", "telugu"]
     },
-    "12C": {
         "path": "models/12_classes",
         "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/12_classes.zip",
         "subcategories": ["hindi", "english", "assamese","bengali","gujarati","kannada","malayalam","marathi","odia","punjabi","tamil","telegu"]
     },
 }

         "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/hindienglishtelugu.zip",
         "subcategories": ["hindi", "english", "telugu"]
     },
+    "auto": {
         "path": "models/12_classes",
         "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/12_classes.zip",
         "subcategories": ["hindi", "english", "assamese","bengali","gujarati","kannada","malayalam","marathi","odia","punjabi","tamil","telegu"]
     },
+    "10C": {
+        "path": "models/12_classes",
+        "url" : "https://github.com/Bhashini-IITJ/ScriptIdentification/releases/download/Vit_Models/10_classes.zip",
+        "subcategories": ["hindi", "english", "assamese","bengali","gujarati","kannada","malayalam","marathi","odia","punjabi","tamil","telegu"]
+    },
 }