Spaces:

AffordableAI
/

Free_TTS_Translation_Chat_Tool

Sleeping

App Files Files Community

capradeepgujaran commited on Oct 12, 2024

Commit

2c34fac

verified ·

1 Parent(s): b473e24

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -47

app.py CHANGED Viewed

@@ -3,37 +3,21 @@ import tempfile
 import os
 from gtts import gTTS
 from deep_translator import GoogleTranslator
-import logging
-from llama_index import VectorStoreIndex, Document, SimpleDirectoryReader
-from llama_index.node_parser import SimpleNodeParser
-from llama_index.embeddings import HuggingFaceEmbedding
-from llama_index import ServiceContext
-from llama_index.llms import HuggingFaceLLM
 from groq import Groq
-from dotenv import load_dotenv
-# Load environment variables
-load_dotenv()
 logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
 # Initialize Groq client
-groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
-# Initialize the embedding model
-embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
-# Initialize a local LLM for indexing purposes with reduced context window
-local_llm = HuggingFaceLLM(model_name="gpt2", tokenizer_name="gpt2", context_window=256, max_new_tokens=128)
-# Set up node parser for chunking with smaller chunk size
-node_parser = SimpleNodeParser.from_defaults(chunk_size=128, chunk_overlap=20)
-# Initialize the ServiceContext with the local LLM and node parser
-service_context = ServiceContext.from_defaults(llm=local_llm, embed_model=embed_model, node_parser=node_parser)
-# Initialize the index
-index = None
 # Translation languages dropdown options
 translation_languages = {
@@ -62,37 +46,29 @@ audio_language_dict = {
 }
 def index_text(text: str) -> str:
-    global index
     try:
-        documents = [Document(text=text)]
-        if index is None:
-            index = VectorStoreIndex.from_documents(documents, service_context=service_context)
-        else:
-            index.insert(documents[0])
         return "Text indexed successfully."
     except Exception as e:
-        logging.error(f"Error in indexing: {str(e)}")
         return f"Error indexing text: {str(e)}"
 def chat_with_context(question: str, model: str) -> str:
-    global index
-    if index is None:
         return "Please index some text first."
     try:
-        query_engine = index.as_query_engine(
-            similarity_top_k=1,
-            response_mode="compact"
-        )
-        context = query_engine.query(question).response
-        # Truncate context if it's too long
-        max_context_length = 1024  # Reduced from 2048
-        if len(context) > max_context_length:
-            context = context[:max_context_length] + "..."
         prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
         chat_completion = groq_client.chat.completions.create(
             messages=[
                 {
@@ -101,7 +77,7 @@ def chat_with_context(question: str, model: str) -> str:
                 }
             ],
             model=model,
-            max_tokens=256  # Reduced from 500
         )
         return chat_completion.choices[0].message.content
     except Exception as e:
@@ -126,7 +102,6 @@ def google_tts(text, lang):
     except Exception as e:
         return None, f"Error in Google TTS: {str(e)}"
-# Gradio interface
 with gr.Blocks() as iface:
     gr.Markdown("# Free Text-to-Speech Tool with Language Translation and Chat")

 import os
 from gtts import gTTS
 from deep_translator import GoogleTranslator
 from groq import Groq
+import logging
+from sentence_transformers import SentenceTransformer
+import numpy as np
 logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s')
 # Initialize Groq client
+groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+# Initialize HuggingFace embeddings (free to use)
+sentence_model = SentenceTransformer('all-MiniLM-L6-v2')
+indexed_texts = []
+indexed_embeddings = []
 # Translation languages dropdown options
 translation_languages = {
 }
 def index_text(text: str) -> str:
+    global indexed_texts, indexed_embeddings
     try:
+        embedding = sentence_model.encode([text])[0]
+        indexed_texts.append(text)
+        indexed_embeddings.append(embedding)
         return "Text indexed successfully."
     except Exception as e:
         return f"Error indexing text: {str(e)}"
+def find_most_similar(query: str, top_k: int = 1) -> list:
+    query_embedding = sentence_model.encode([query])[0]
+    similarities = [np.dot(query_embedding, doc_embedding) for doc_embedding in indexed_embeddings]
+    top_indices = np.argsort(similarities)[-top_k:][::-1]
+    return [indexed_texts[i] for i in top_indices]
 def chat_with_context(question: str, model: str) -> str:
+    if not indexed_texts:
         return "Please index some text first."
+    context = find_most_similar(question)[0]
     try:
         prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"
         chat_completion = groq_client.chat.completions.create(
             messages=[
                 {
                 }
             ],
             model=model,
+            max_tokens=500  # Limit the response length
         )
         return chat_completion.choices[0].message.content
     except Exception as e:
     except Exception as e:
         return None, f"Error in Google TTS: {str(e)}"
 with gr.Blocks() as iface:
     gr.Markdown("# Free Text-to-Speech Tool with Language Translation and Chat")