ImageDataExtractor2 / core /ocr_engine.py
WebashalarForML's picture
Upload 42 files
fad436e verified
Raw
History Blame Contribute Delete
4.7 kB
import logging
import cv2
import os
import numpy as np
from PIL import Image, ImageEnhance
from .base import BaseOCR
from .gradio_ocr import GradioOCREngine
class OCREngine(BaseOCR):
def __init__(self, engine_type='paddle'):
self.engine_type = engine_type
self.ocr = None
self.gradio_fallback = None
self._initialize_engine()
def _initialize_engine(self):
logging.info(f"Initializing OCR engine: {self.engine_type}")
# Pre-emptive Gradio initialization as it's the most reliable fallback
try:
self.gradio_fallback = GradioOCREngine()
except Exception as e:
logging.error(f"Failed to pre-initialize Gradio fallback: {e}")
if self.engine_type == 'paddle':
try:
from paddleocr import PaddleOCR
self.ocr = PaddleOCR(use_angle_cls=False, lang='en', show_log=False)
logging.info("PaddleOCR engine initialized successfully.")
except Exception as e:
logging.warning(f"Failed to initialize PaddleOCR: {e}. Switching to EasyOCR fallback.")
self.engine_type = 'easyocr'
if self.engine_type == 'easyocr':
try:
import easyocr
self.ocr = easyocr.Reader(['en'])
logging.info("EasyOCR engine initialized successfully.")
except Exception as e:
logging.error(f"Failed to initialize EasyOCR: {e}. OCR will be partially unavailable.")
self.ocr = None
def preprocess_image(self, image_path, scale=2):
try:
image = cv2.imread(image_path)
if image is None:
logging.error(f"Image not found or unreadable: {image_path}")
return None
# Upscale
height, width = image.shape[:2]
image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
# Denoise
image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
# Sharpen
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
image = cv2.filter2D(image, -1, kernel)
# Enhance Contrast
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
enhancer = ImageEnhance.Contrast(pil_img)
enhanced_image = enhancer.enhance(1.5)
logging.debug(f"Preprocessing completed for {image_path}")
return cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
except Exception as e:
logging.error(f"Error during image preprocessing for {image_path}: {e}")
return None
def extract_text(self, image_path: str) -> str:
logging.info(f"Starting text extraction for: {os.path.basename(image_path)}")
# Tiered Extraction Strategy:
# 1. Primary Engine (Paddle/EasyOCR)
# 2. Gradio Remote Fallback (Very reliable)
extracted_text = ""
# 1. Local OCR
if self.engine_type == 'paddle' and self.ocr:
try:
processed_img = self.preprocess_image(image_path)
if processed_img is not None:
results = self.ocr.ocr(processed_img)
if results and results[0]:
extracted_text = " ".join([line[1][0] for line in results[0]])
except Exception as e:
logging.error(f"PaddleOCR crashed: {e}")
elif self.engine_type == 'easyocr' and self.ocr:
try:
processed_img = self.preprocess_image(image_path)
if processed_img is not None:
results = self.ocr.readtext(processed_img)
extracted_text = " ".join([res[1] for res in results])
except Exception as e:
logging.error(f"EasyOCR crashed: {e}")
# 2. Gradio Fallback if Local failed
if not extracted_text and self.gradio_fallback:
logging.info("Local OCR failed or returned empty. Trying Gradio OCR fallback...")
extracted_text = self.gradio_fallback.extract_text(image_path)
if extracted_text:
logging.info(f"OCR extracted {len(extracted_text)} characters using {'Gradio' if not extracted_text else self.engine_type}.")
else:
logging.error("All OCR methods failed to extract text.")
return extracted_text
def process(self, image_path: str) -> str:
return self.extract_text(image_path)