Spaces:
Running
Running
File size: 3,673 Bytes
3808d14 b9c7347 3808d14 b9c7347 3808d14 b9c7347 3aebd6c b9c7347 0de769a c8bb4b7 3808d14 0821432 3808d14 9f1ede3 0821432 a4e3ada 0821432 bd6b4d0 0821432 bd6b4d0 a4e3ada 9f1ede3 0821432 55fce49 44b2ad3 0821432 9f1ede3 68d0d09 44b2ad3 68d0d09 9f1ede3 5bcbec9 68d0d09 5bcbec9 68d0d09 55fce49 0821432 8d0b3cb bd6b4d0 55fce49 bd6b4d0 9f1ede3 bd6b4d0 9f1ede3 bd6b4d0 9f1ede3 bd6b4d0 a4e3ada 55fce49 0821432 3808d14 55fce49 3808d14 55fce49 b9c7347 66cd527 55fce49 d02151d c8bb4b7 2f98af2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | import os
import sys
# 1. 強制下載 Spacy 模型
os.system(f"{sys.executable} -m spacy download en_core_web_sm")
# 2. 解決相容性問題的補丁
try:
import huggingface_hub
if not hasattr(huggingface_hub, 'HfFolder'):
class MockHfFolder:
@staticmethod
def get_token(): return os.getenv("HF_TOKEN")
@staticmethod
def save_token(token): pass
huggingface_hub.HfFolder = MockHfFolder
except:
pass
import gradio as gr
from fastcoref import FCoref
from deep_translator import GoogleTranslator
# 關鍵修正:將 model_name 改為 model_name_or_path
print("🚀 [System] 正在進行阻塞式權重同步(約 300MB)...")
try:
# 使用相容性最高的參數名稱
model = FCoref(model_name_or_path='biu-nlp/f-coref', device='cpu')
print("✅ [System] 模型載入完成,介面即將開啟。")
except TypeError:
# 萬一連上面的都不行,就用最原始的 positional argument
model = FCoref('biu-nlp/f-coref', device='cpu')
print("✅ [System] 使用原始參數模式載入完成。")
def coref_chat(user_input):
if not user_input.strip():
return "請輸入內容", "等待輸入..."
try:
# 1. 偵測語系並統一轉換為英文供模型運算
has_chinese = any('\u4e00' <= char <= '\u9fff' for char in user_input)
if has_chinese:
working_text = GoogleTranslator(source='zh-CN', target='en').translate(user_input)
mode_notice = "【模式:中文 ➔ 英文解析】"
else:
working_text = user_input
mode_notice = "【模式:純英文解析】"
# 2. 執行指代消解
preds = model.predict(texts=[working_text])
clusters = preds[0].get_clusters()
# 3. 整理輸出格式 (嚴格按照你的要求排版)
result = f"✨ {mode_notice}\n"
result += f"📝 英文邏輯空間: {working_text}\n"
# --- 英文翻譯中文 ---
try:
translation_back = GoogleTranslator(source='en', target='zh-TW').translate(working_text)
result += f"📖 中文句子: {translation_back}\n"
except:
result += f"📖 中文句子: (翻譯暫時無法讀取)\n"
result += "---------------------------------\n"
if not clusters:
result += "🔍 分析結果:指代關係明確,或模型判定關聯度未達門檻。"
else:
result += "🎯【偵測到之實體鏈 (Entity Chains)】:\n"
for i, cluster in enumerate(clusters):
cluster_str_en = ' ↔ '.join(cluster)
# 實體鏈個別翻譯
try:
translated_items = [GoogleTranslator(source='en', target='zh-TW').translate(item) for item in cluster]
cluster_str_zh = ' ↔ '.join(translated_items)
except:
cluster_str_zh = "(鏈結翻譯失敗)"
result += f" 🔗 鏈結 {i+1} (繁中): {cluster_str_zh}\n"
result += f" └─ (原文): {cluster_str_en}\n"
return user_input, result
except Exception as e:
return user_input, f"運行錯誤: {str(e)}"
# 介面設定
demo = gr.Interface(
fn=coref_chat,
inputs=gr.Textbox(label="輸入文本", lines=3, placeholder="輸入中文或英文段落..."),
outputs=[gr.Textbox(label="原始輸入"), gr.Textbox(label="AI 語意分析報告")],
title="AI 跨語言指代消解系統 (Stable Version)"
)
if __name__ == "__main__":
demo.launch() |