ImageDataExtractor2

Sleeping

App Files Files Community

ImageDataExtractor2 / core /ocr_engine.py

WebashalarForML

Upload 42 files

fad436e verified 3 months ago

Raw

History Blame Contribute Delete

4.7 kB

	import logging
	import cv2
	import os
	import numpy as np
	from PIL import Image, ImageEnhance
	from .base import BaseOCR
	from .gradio_ocr import GradioOCREngine

	class OCREngine(BaseOCR):
	def __init__(self, engine_type='paddle'):
	self.engine_type = engine_type
	self.ocr = None
	self.gradio_fallback = None
	self._initialize_engine()

	def _initialize_engine(self):
	logging.info(f"Initializing OCR engine: {self.engine_type}")

	# Pre-emptive Gradio initialization as it's the most reliable fallback
	try:
	self.gradio_fallback = GradioOCREngine()
	except Exception as e:
	logging.error(f"Failed to pre-initialize Gradio fallback: {e}")

	if self.engine_type == 'paddle':
	try:
	from paddleocr import PaddleOCR
	self.ocr = PaddleOCR(use_angle_cls=False, lang='en', show_log=False)
	logging.info("PaddleOCR engine initialized successfully.")
	except Exception as e:
	logging.warning(f"Failed to initialize PaddleOCR: {e}. Switching to EasyOCR fallback.")
	self.engine_type = 'easyocr'

	if self.engine_type == 'easyocr':
	try:
	import easyocr
	self.ocr = easyocr.Reader(['en'])
	logging.info("EasyOCR engine initialized successfully.")
	except Exception as e:
	logging.error(f"Failed to initialize EasyOCR: {e}. OCR will be partially unavailable.")
	self.ocr = None

	def preprocess_image(self, image_path, scale=2):
	try:
	image = cv2.imread(image_path)
	if image is None:
	logging.error(f"Image not found or unreadable: {image_path}")
	return None

	# Upscale
	height, width = image.shape[:2]
	image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)

	# Denoise
	image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)

	# Sharpen
	kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
	image = cv2.filter2D(image, -1, kernel)

	# Enhance Contrast
	pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
	enhancer = ImageEnhance.Contrast(pil_img)
	enhanced_image = enhancer.enhance(1.5)

	logging.debug(f"Preprocessing completed for {image_path}")
	return cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
	except Exception as e:
	logging.error(f"Error during image preprocessing for {image_path}: {e}")
	return None

	def extract_text(self, image_path: str) -> str:
	logging.info(f"Starting text extraction for: {os.path.basename(image_path)}")

	# Tiered Extraction Strategy:
	# 1. Primary Engine (Paddle/EasyOCR)
	# 2. Gradio Remote Fallback (Very reliable)

	extracted_text = ""

	# 1. Local OCR
	if self.engine_type == 'paddle' and self.ocr:
	try:
	processed_img = self.preprocess_image(image_path)
	if processed_img is not None:
	results = self.ocr.ocr(processed_img)
	if results and results[0]:
	extracted_text = " ".join([line[1][0] for line in results[0]])
	except Exception as e:
	logging.error(f"PaddleOCR crashed: {e}")

	elif self.engine_type == 'easyocr' and self.ocr:
	try:
	processed_img = self.preprocess_image(image_path)
	if processed_img is not None:
	results = self.ocr.readtext(processed_img)
	extracted_text = " ".join([res[1] for res in results])
	except Exception as e:
	logging.error(f"EasyOCR crashed: {e}")

	# 2. Gradio Fallback if Local failed
	if not extracted_text and self.gradio_fallback:
	logging.info("Local OCR failed or returned empty. Trying Gradio OCR fallback...")
	extracted_text = self.gradio_fallback.extract_text(image_path)

	if extracted_text:
	logging.info(f"OCR extracted {len(extracted_text)} characters using {'Gradio' if not extracted_text else self.engine_type}.")
	else:
	logging.error("All OCR methods failed to extract text.")

	return extracted_text

	def process(self, image_path: str) -> str:
	return self.extract_text(image_path)