Redmind commited on
Commit
25d1f46
·
verified ·
1 Parent(s): 76041d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -49
app.py CHANGED
@@ -1,62 +1,27 @@
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
3
  import torch
4
- # Load the Open-Source LLM (e.g., BLOOM or Falcon)
5
- model_name = "tiiuae/falcon-7b-instruct" # Replace with your desired model
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name,
8
- device_map="auto", # Automatically allocates model to available devices
9
- torch_dtype=torch.float16 # Use reduced precision to save memory
10
- )
11
 
12
- # Ensure the `pad_token_id` is set explicitly to avoid warnings
13
- if tokenizer.pad_token_id is None:
14
- tokenizer.pad_token_id = tokenizer.eos_token_id
 
 
 
 
15
 
16
  def convert_to_spoken_hindi(formal_hindi_text):
17
- """
18
- Convert formal Hindi text to spoken Hindi using an open-source LLM.
19
- """
20
- # Define the prompt
21
- prompt = (
22
- "Convert the following formal Hindi text into conversational spoken Hindi:\n\n"
23
- f"Formal Hindi: {formal_hindi_text}\n\n"
24
- "Spoken Hindi:"
25
- )
26
-
27
- # Tokenize the input and create an attention mask
28
  inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
29
- input_ids = inputs["input_ids"]
30
- attention_mask = inputs["attention_mask"]
31
-
32
- # Generate the response
33
  outputs = model.generate(
34
- input_ids=input_ids,
35
- attention_mask=attention_mask,
36
- max_length=150, # Adjust based on your use case
37
- num_beams=5, # Beam search for diverse generation
38
- temperature=0.7, # Sampling temperature for randomness
39
- pad_token_id=tokenizer.pad_token_id # Avoid warnings
40
  )
41
-
42
- # Decode the generated text
43
  spoken_hindi = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
-
45
- # Extract the relevant output (after "Spoken Hindi:")
46
- if "Spoken Hindi:" in spoken_hindi:
47
- spoken_hindi = spoken_hindi.split("Spoken Hindi:")[-1].strip()
48
-
49
- return spoken_hindi
50
-
51
- # Example Input
52
- formal_hindi_text = "आपका स्वास्थ्य अच्छा रहे, इस बात का ध्यान रखें। क्या आप ठीक से भोजन कर रहे हैं?"
53
-
54
- # Convert to Spoken Hindi
55
- #spoken_hindi_text = convert_to_spoken_hindi(formal_hindi_text)
56
-
57
- # Print the results
58
- print("Formal Hindi Text:", formal_hindi_text)
59
- #print("Spoken Hindi Text:", spoken_hindi_text)
60
 
61
  iface = gr.Interface(
62
  fn=convert_to_spoken_hindi,
@@ -64,4 +29,5 @@ iface = gr.Interface(
64
  outputs="text",
65
  title="Hindi Text Converter"
66
  )
 
67
  iface.launch()
 
1
  from transformers import AutoTokenizer, AutoModelForCausalLM
2
  import gradio as gr
3
  import torch
 
 
 
 
 
 
 
4
 
5
+ model_name = "tiiuae/falcon-7b-instruct"
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ model_name,
9
+ device_map="auto",
10
+ torch_dtype=torch.float16
11
+ )
12
 
13
  def convert_to_spoken_hindi(formal_hindi_text):
14
+ prompt = f"Convert the following formal Hindi text into conversational spoken Hindi:\n\nFormal Hindi: {formal_hindi_text}\n\nSpoken Hindi:"
 
 
 
 
 
 
 
 
 
 
15
  inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
 
 
 
 
16
  outputs = model.generate(
17
+ inputs["input_ids"],
18
+ attention_mask=inputs["attention_mask"],
19
+ max_length=150,
20
+ num_beams=5,
21
+ temperature=0.7
 
22
  )
 
 
23
  spoken_hindi = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+ return spoken_hindi.split("Spoken Hindi:")[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  iface = gr.Interface(
27
  fn=convert_to_spoken_hindi,
 
29
  outputs="text",
30
  title="Hindi Text Converter"
31
  )
32
+
33
  iface.launch()