Redmind commited on
Commit
edcc7cc
·
verified ·
1 Parent(s): 560b692

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -17
app.py CHANGED
@@ -48,29 +48,18 @@ def convert_to_casual_hindi(text):
48
  iface = gr.Interface(fn=convert_to_casual_hindi, inputs="text", outputs="text", title="Formal to Casual Hindi Converter")
49
  iface.launch()
50
  """
51
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
52
- import gradio as gr
53
-
54
 
55
- # Load the model and tokenizer
56
- model_name = "google/mt5-base" # You can replace with another model name if needed
57
- tokenizer = AutoTokenizer.from_pretrained(model_name, legacy=True) # Suppress legacy warning
58
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
59
 
60
  def formal_to_casual_hindi(input_text):
61
- """
62
- Converts formal Hindi text into conversational Hindi using mT5.
63
- """
64
- # Prepare the input for conversational reformulation
65
- prompt = f"Convert the following formal Hindi text to casual spoken Hindi: {input_text}"
66
-
67
- # Tokenize input
68
  input_ids = tokenizer.encode(prompt, return_tensors="pt")
69
-
70
- # Generate conversational text
71
  outputs = model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
72
-
73
- # Decode the output
74
  casual_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
75
  return casual_text
76
 
 
48
  iface = gr.Interface(fn=convert_to_casual_hindi, inputs="text", outputs="text", title="Formal to Casual Hindi Converter")
49
  iface.launch()
50
  """
51
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 
 
52
 
53
+ # Use the IndicTrans model
54
+ model_name = "ai4bharat/indictrans-hin-eng" # IndicTrans for Hindi
55
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
56
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
57
 
58
  def formal_to_casual_hindi(input_text):
59
+ # Prepare input for IndicTrans
60
+ prompt = f"Convert formal Hindi to casual Hindi: {input_text}"
 
 
 
 
 
61
  input_ids = tokenizer.encode(prompt, return_tensors="pt")
 
 
62
  outputs = model.generate(input_ids, max_length=128, num_beams=5, early_stopping=True)
 
 
63
  casual_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
64
  return casual_text
65