Spaces:

arsath-sm
/

ENG-TAM_with_Gemma_model

Sleeping

App Files Files Community

arsath-sm commited on Oct 31, 2024

Commit

dd17995

verified ·

1 Parent(s): 3a03290

Upload 3 files

Browse files

Files changed (3) hide show

.env +1 -0
requirements.txt +2 -0
translator2.py +264 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ GROQ_API_KEY ="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"

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ groq
2	+ streamlit

translator2.py ADDED Viewed

	@@ -0,0 +1,264 @@

+import os
+import streamlit as st
+from dotenv import load_dotenv
+from groq import Groq
+import json
+from typing import List, Dict
+import time
+# Load environment variables from .env file
+load_dotenv()
+# Initialize the Groq client
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+class TranslationManager:
+    def __init__(self):
+        self.chunk_size = 1500
+        self.overlap_size = 200
+        self.context_window = []
+    def chunk_text_with_context(self, text: str) -> List[Dict]:
+        """Split text into chunks while maintaining context"""
+        words = text.split()
+        chunks = []
+        current_chunk = []
+        current_length = 0
+        for i, word in enumerate(words):
+            current_chunk.append(word)
+            current_length += len(word) + 1
+            # Check if chunk size is reached
+            if current_length >= self.chunk_size:
+                # Add overlap from next words if available
+                overlap_words = words[i+1:i+1+self.overlap_size] if i+1 < len(words) else []
+                chunks.append({
+                    'main_text': ' '.join(current_chunk),
+                    'overlap_text': ' '.join(overlap_words),
+                    'position': len(chunks)
+                })
+                # Start new chunk with some overlap
+                current_chunk = words[max(0, i-50):i+1]
+                current_length = sum(len(w) + 1 for w in current_chunk)
+        # Add remaining text as last chunk
+        if current_chunk:
+            chunks.append({
+                'main_text': ' '.join(current_chunk),
+                'overlap_text': '',
+                'position': len(chunks)
+            })
+        return chunks
+    def create_translation_prompt(self, chunk: Dict, mode: str, domain: str = None) -> str:
+        """Create appropriate prompt based on translation mode"""
+        if mode == "normal":
+            prompt = f"""Translate the following English text to Tamil.
+            Provide only the Tamil translation without any other text.
+            English text: {chunk['main_text']}"""
+        else:  # contextual
+            context = f"Domain: {domain}\n" if domain else ""
+            previous_context = self.context_window[-1] if self.context_window else ""
+            prompt = f"""Perform a contextual translation from English to Tamil.
+            Consider the following aspects:
+            {context}
+            Previous context: {previous_context}
+            Maintain the following in your translation:
+            - Preserve domain-specific terminology
+            - Maintain consistent style and tone
+            - Ensure contextual coherence with previous translations
+            - Adapt idiomatic expressions appropriately
+            Text to translate: {chunk['main_text']}
+            Overlap context: {chunk['overlap_text']}
+            Provide only the Tamil translation without any explanations."""
+        return prompt
+    def translate_chunk(self, chunk: Dict, mode: str, domain: str = None) -> str:
+        """Translate a single chunk of text"""
+        prompt = self.create_translation_prompt(chunk, mode, domain)
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                completion = client.chat.completions.create(
+                    model="Gemma2-9b-It",
+                    messages=[
+                        {
+                            "role": "user",
+                            "content": prompt
+                        }
+                    ],
+                    temperature=0.3 if mode == "normal" else 0.4,
+                    max_tokens=2048,
+                    top_p=1,
+                    stream=True,
+                    stop=None,
+                )
+                translation = ""
+                for chunk_response in completion:
+                    translation += chunk_response.choices[0].delta.content or ""
+                # Update context window for contextual translation
+                if mode == "contextual":
+                    self.context_window.append(translation)
+                    if len(self.context_window) > 3:
+                        self.context_window.pop(0)
+                return translation
+            except Exception as e:
+                if attempt == max_retries - 1:
+                    raise e
+                time.sleep(2)  # Wait before retry
+        return ""
+def main():
+    st.set_page_config(page_title="Advanced Tamil Translator", layout="wide")
+    # Initialize translation manager
+    if 'translation_manager' not in st.session_state:
+        st.session_state.translation_manager = TranslationManager()
+    if 'translation_history' not in st.session_state:
+        st.session_state.translation_history = []
+    st.title("Advanced English to Tamil Translator")
+    # Translation settings
+    with st.expander("Translation Settings", expanded=True):
+        col1, col2 = st.columns(2)
+        with col1:
+            translation_mode = st.radio(
+                "Translation Mode",
+                ["Normal", "Contextual"],
+                help="Normal: Direct translation\nContextual: Context-aware translation with domain specificity"
+            )
+        with col2:
+            if translation_mode == "Contextual":
+                domain = st.selectbox(
+                    "Select Domain",
+                    ["General", "Technical", "Medical", "Legal", "Literary", "Business", "Academic"],
+                    help="Select the domain to improve translation accuracy"
+                )
+    # Input area
+    st.subheader("Enter Text")
+    english_input = st.text_area("Enter English text of any length:", height=200)
+    # Translation button
+    if st.button("Translate"):
+        if not english_input:
+            st.error("Please enter some text to translate.")
+            return
+        try:
+            # Initialize progress tracking
+            progress_bar = st.progress(0)
+            status_text = st.empty()
+            # Reset context window for new translation
+            st.session_state.translation_manager.context_window = []
+            # Chunk the input text
+            chunks = st.session_state.translation_manager.chunk_text_with_context(english_input)
+            translated_chunks = []
+            # Translate each chunk
+            for i, chunk in enumerate(chunks):
+                status_text.text(f"Translating part {i+1} of {len(chunks)}...")
+                translation = st.session_state.translation_manager.translate_chunk(
+                    chunk,
+                    mode=translation_mode.lower(),
+                    domain=domain if translation_mode == "Contextual" else None
+                )
+                translated_chunks.append(translation)
+                progress_bar.progress((i + 1) / len(chunks))
+            # Combine translations
+            final_translation = ' '.join(translated_chunks)
+            # Display results
+            col1, col2 = st.columns(2)
+            with col1:
+                st.subheader("Original Text")
+                st.write(english_input)
+                st.info(f"Word count: {len(english_input.split())}")
+            with col2:
+                st.subheader("Tamil Translation")
+                st.write(final_translation)
+            # Add to history
+            st.session_state.translation_history.append({
+                'english': english_input,
+                'tamil': final_translation,
+                'mode': translation_mode,
+                'domain': domain if translation_mode == "Contextual" else "N/A",
+                'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
+            })
+            # Download options
+            col1, col2 = st.columns(2)
+            with col1:
+                st.download_button(
+                    "Download Translation",
+                    final_translation,
+                    file_name=f"tamil_translation_{translation_mode.lower()}.txt",
+                    mime="text/plain"
+                )
+            with col2:
+                # Export translation with metadata
+                export_data = {
+                    'original': english_input,
+                    'translation': final_translation,
+                    'mode': translation_mode,
+                    'domain': domain if translation_mode == "Contextual" else "N/A",
+                    'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
+                }
+                st.download_button(
+                    "Export with Metadata",
+                    json.dumps(export_data, indent=2),
+                    file_name="translation_with_metadata.json",
+                    mime="application/json"
+                )
+        except Exception as e:
+            st.error(f"An error occurred: {str(e)}")
+        finally:
+            progress_bar.empty()
+            status_text.empty()
+    # Translation History
+    if st.session_state.translation_history:
+        with st.expander("Translation History"):
+            for i, entry in enumerate(reversed(st.session_state.translation_history[-5:])):
+                st.write(f"Translation {len(st.session_state.translation_history)-i}")
+                st.write(f"Mode: {entry['mode']}")
+                if entry['domain'] != "N/A":
+                    st.write(f"Domain: {entry['domain']}")
+                st.write(f"Timestamp: {entry['timestamp']}")
+                st.write("English:", entry['english'][:100] + "..." if len(entry['english']) > 100 else entry['english'])
+                st.write("Tamil:", entry['tamil'][:100] + "..." if len(entry['tamil']) > 100 else entry['tamil'])
+                st.markdown("---")
+if __name__ == "__main__":
+    main()