import gradio as gr
import pickle
import os
import re

# Đường dẫn đến thư mục và file mô hình
model_dir = r"D:\Visual Studio Code\Data\Lab8\ung-dung-them-dau-tieng-viet"
model_path = "kneserney_ngram_model.pkl"
syllables_path = "vn_syllables.txt"

# Hàm xử lý
def remove_vn_accent(word):
    """
    Hàm loại bỏ dấu tiếng Việt.
    """
    word = re.sub('[áàảãạăắằẳẵặâấầẩẫậ]', 'a', word)
    word = re.sub('[éèẻẽẹêếềểễệ]', 'e', word)
    word = re.sub('[óòỏõọôốồổỗộơớờởỡợ]', 'o', word)
    word = re.sub('[íìỉĩị]', 'i', word)
    word = re.sub('[úùủũụưứừửữự]', 'u', word)
    word = re.sub('[ýỳỷỹỵ]', 'y', word)
    word = re.sub('đ', 'd', word)
    return word

def load_vn_syllables():
    """
    Hàm tải các từ tiếng Việt và ánh xạ từ không dấu sang có dấu.
    """
    try:
        with open(syllables_path, encoding="utf8") as f:
            syllables = f.read().splitlines()
        syllable_map = {}
        for w in syllables:
            w_no_accent = remove_vn_accent(w.lower())
            if w_no_accent not in syllable_map:
                syllable_map[w_no_accent] = {w}
            else:
                syllable_map[w_no_accent].add(w)
        return syllable_map
    except FileNotFoundError:
        print("Không tìm thấy file vn_syllables.txt")
        return {}

syllable_map = load_vn_syllables()

def gen_accents_word(word):
    """
    Hàm tạo các từ có dấu dựa trên từ không dấu.
    """
    word_no_accent = remove_vn_accent(word.lower())
    return syllable_map.get(word_no_accent, {word})

def beam_search(words, model, k=3):
    """
    Hàm beam search để tìm kiếm từ tốt nhất dựa trên ngữ cảnh.
    """
    sequences = []
    for idx, word in enumerate(words):
        if idx == 0:
            sequences = [([x], 0.0) for x in gen_accents_word(word)]
        else:
            all_sequences = []
            for seq in sequences:
                for next_word in gen_accents_word(word):
                    current_word = seq[0][-1]
                    try:
                        previous_word = seq[0][-2]
                        score = model.logscore(next_word, [previous_word, current_word])
                    except IndexError:
                        score = model.logscore(next_word, [current_word])
                    new_seq = seq[0].copy()
                    new_seq.append(next_word)
                    all_sequences.append((new_seq, seq[1] + score))
            all_sequences = sorted(all_sequences, key=lambda x: x[1], reverse=True)
            sequences = all_sequences[:k]
    return sequences

# Tải mô hình
model_loaded = None
try:
    with open(model_path, 'rb') as fin:
        model_loaded = pickle.load(fin)
except FileNotFoundError:
    print("Không tìm thấy file mô hình.")
except pickle.UnpicklingError:
    print("Lỗi khi tải mô hình.")

def them_dau(text):
    if not text.strip():
        return ""  # Trả về chuỗi trống nếu đầu vào là chuỗi trống
    words = text.split()
    best_sequences = beam_search(words, model_loaded, k=1)
    best_sentence = ' '.join(best_sequences[0][0])
    return best_sentence

iface = gr.Interface(
    fn=them_dau,
    inputs=gr.Textbox(lines=2, placeholder="Nhập câu tiếng Việt không dấu..."),
    outputs="text",
    title="Ứng dụng thêm dấu tiếng Việt",
    description="Nhập câu không dấu và nhận kết quả đã thêm dấu!",
)

iface.launch(share=True)  # Chỉ gọi launch() một lần ở đây

# In thông báo sau khi launch() để đảm bảo ứng dụng đã khởi động
if not model_loaded:
    print("CẢNH BÁO: Không thể tải mô hình. Ứng dụng sẽ chạy nhưng không thể thêm dấu.")
if not syllable_map:
    print("CẢNH BÁO: Không thể tải file từ điển.")