capradeepgujaran commited on
Commit
2c34fac
·
verified ·
1 Parent(s): b473e24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -47
app.py CHANGED
@@ -3,37 +3,21 @@ import tempfile
3
  import os
4
  from gtts import gTTS
5
  from deep_translator import GoogleTranslator
6
- import logging
7
- from llama_index import VectorStoreIndex, Document, SimpleDirectoryReader
8
- from llama_index.node_parser import SimpleNodeParser
9
- from llama_index.embeddings import HuggingFaceEmbedding
10
- from llama_index import ServiceContext
11
- from llama_index.llms import HuggingFaceLLM
12
  from groq import Groq
13
- from dotenv import load_dotenv
14
-
15
- # Load environment variables
16
- load_dotenv()
17
 
18
  logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
19
 
20
  # Initialize Groq client
21
- groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
22
-
23
- # Initialize the embedding model
24
- embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
25
-
26
- # Initialize a local LLM for indexing purposes with reduced context window
27
- local_llm = HuggingFaceLLM(model_name="gpt2", tokenizer_name="gpt2", context_window=256, max_new_tokens=128)
28
-
29
- # Set up node parser for chunking with smaller chunk size
30
- node_parser = SimpleNodeParser.from_defaults(chunk_size=128, chunk_overlap=20)
31
 
32
- # Initialize the ServiceContext with the local LLM and node parser
33
- service_context = ServiceContext.from_defaults(llm=local_llm, embed_model=embed_model, node_parser=node_parser)
34
 
35
- # Initialize the index
36
- index = None
37
 
38
  # Translation languages dropdown options
39
  translation_languages = {
@@ -62,37 +46,29 @@ audio_language_dict = {
62
  }
63
 
64
  def index_text(text: str) -> str:
65
- global index
66
  try:
67
- documents = [Document(text=text)]
68
- if index is None:
69
- index = VectorStoreIndex.from_documents(documents, service_context=service_context)
70
- else:
71
- index.insert(documents[0])
72
  return "Text indexed successfully."
73
  except Exception as e:
74
- logging.error(f"Error in indexing: {str(e)}")
75
  return f"Error indexing text: {str(e)}"
76
 
 
 
 
 
 
 
77
  def chat_with_context(question: str, model: str) -> str:
78
- global index
79
- if index is None:
80
  return "Please index some text first."
81
 
 
 
82
  try:
83
- query_engine = index.as_query_engine(
84
- similarity_top_k=1,
85
- response_mode="compact"
86
- )
87
- context = query_engine.query(question).response
88
-
89
- # Truncate context if it's too long
90
- max_context_length = 1024 # Reduced from 2048
91
- if len(context) > max_context_length:
92
- context = context[:max_context_length] + "..."
93
-
94
  prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
95
-
96
  chat_completion = groq_client.chat.completions.create(
97
  messages=[
98
  {
@@ -101,7 +77,7 @@ def chat_with_context(question: str, model: str) -> str:
101
  }
102
  ],
103
  model=model,
104
- max_tokens=256 # Reduced from 500
105
  )
106
  return chat_completion.choices[0].message.content
107
  except Exception as e:
@@ -126,7 +102,6 @@ def google_tts(text, lang):
126
  except Exception as e:
127
  return None, f"Error in Google TTS: {str(e)}"
128
 
129
- # Gradio interface
130
  with gr.Blocks() as iface:
131
  gr.Markdown("# Free Text-to-Speech Tool with Language Translation and Chat")
132
 
 
3
  import os
4
  from gtts import gTTS
5
  from deep_translator import GoogleTranslator
 
 
 
 
 
 
6
  from groq import Groq
7
+ import logging
8
+ from sentence_transformers import SentenceTransformer
9
+ import numpy as np
 
10
 
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
12
 
13
  # Initialize Groq client
14
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
 
 
 
 
 
 
15
 
16
+ # Initialize HuggingFace embeddings (free to use)
17
+ sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
18
 
19
+ indexed_texts = []
20
+ indexed_embeddings = []
21
 
22
  # Translation languages dropdown options
23
  translation_languages = {
 
46
  }
47
 
48
  def index_text(text: str) -> str:
49
+ global indexed_texts, indexed_embeddings
50
  try:
51
+ embedding = sentence_model.encode([text])[0]
52
+ indexed_texts.append(text)
53
+ indexed_embeddings.append(embedding)
 
 
54
  return "Text indexed successfully."
55
  except Exception as e:
 
56
  return f"Error indexing text: {str(e)}"
57
 
58
+ def find_most_similar(query: str, top_k: int = 1) -> list:
59
+ query_embedding = sentence_model.encode([query])[0]
60
+ similarities = [np.dot(query_embedding, doc_embedding) for doc_embedding in indexed_embeddings]
61
+ top_indices = np.argsort(similarities)[-top_k:][::-1]
62
+ return [indexed_texts[i] for i in top_indices]
63
+
64
  def chat_with_context(question: str, model: str) -> str:
65
+ if not indexed_texts:
 
66
  return "Please index some text first."
67
 
68
+ context = find_most_similar(question)[0]
69
+
70
  try:
 
 
 
 
 
 
 
 
 
 
 
71
  prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
 
72
  chat_completion = groq_client.chat.completions.create(
73
  messages=[
74
  {
 
77
  }
78
  ],
79
  model=model,
80
+ max_tokens=500 # Limit the response length
81
  )
82
  return chat_completion.choices[0].message.content
83
  except Exception as e:
 
102
  except Exception as e:
103
  return None, f"Error in Google TTS: {str(e)}"
104
 
 
105
  with gr.Blocks() as iface:
106
  gr.Markdown("# Free Text-to-Speech Tool with Language Translation and Chat")
107