Spaces:

arsath-sm
/

ENG-TAM_with_Gemma_model

Running

File size: 10,400 Bytes

dd17995

import os
import streamlit as st
from dotenv import load_dotenv
from groq import Groq
import json
from typing import List, Dict
import time

# Load environment variables from .env file
load_dotenv()

# Initialize the Groq client
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

class TranslationManager:
    def __init__(self):
        self.chunk_size = 1500
        self.overlap_size = 200
        self.context_window = []
        
    def chunk_text_with_context(self, text: str) -> List[Dict]:
        """Split text into chunks while maintaining context"""
        words = text.split()
        chunks = []
        current_chunk = []
        current_length = 0
        
        for i, word in enumerate(words):
            current_chunk.append(word)
            current_length += len(word) + 1
            
            # Check if chunk size is reached
            if current_length >= self.chunk_size:
                # Add overlap from next words if available
                overlap_words = words[i+1:i+1+self.overlap_size] if i+1 < len(words) else []
                
                chunks.append({
                    'main_text': ' '.join(current_chunk),
                    'overlap_text': ' '.join(overlap_words),
                    'position': len(chunks)
                })
                
                # Start new chunk with some overlap
                current_chunk = words[max(0, i-50):i+1]
                current_length = sum(len(w) + 1 for w in current_chunk)
        
        # Add remaining text as last chunk
        if current_chunk:
            chunks.append({
                'main_text': ' '.join(current_chunk),
                'overlap_text': '',
                'position': len(chunks)
            })
        
        return chunks

    def create_translation_prompt(self, chunk: Dict, mode: str, domain: str = None) -> str:
        """Create appropriate prompt based on translation mode"""
        if mode == "normal":
            prompt = f"""Translate the following English text to Tamil.

            Provide only the Tamil translation without any other text.

            

            English text: {chunk['main_text']}"""
        else:  # contextual
            context = f"Domain: {domain}\n" if domain else ""
            previous_context = self.context_window[-1] if self.context_window else ""
            
            prompt = f"""Perform a contextual translation from English to Tamil.

            Consider the following aspects:

            {context}

            Previous context: {previous_context}

            

            Maintain the following in your translation:

            - Preserve domain-specific terminology

            - Maintain consistent style and tone

            - Ensure contextual coherence with previous translations

            - Adapt idiomatic expressions appropriately

            

            Text to translate: {chunk['main_text']}

            

            Overlap context: {chunk['overlap_text']}

            

            Provide only the Tamil translation without any explanations."""
        
        return prompt

    def translate_chunk(self, chunk: Dict, mode: str, domain: str = None) -> str:
        """Translate a single chunk of text"""
        prompt = self.create_translation_prompt(chunk, mode, domain)
        
        max_retries = 3
        for attempt in range(max_retries):
            try:
                completion = client.chat.completions.create(
                    model="Gemma2-9b-It",
                    messages=[
                        {
                            "role": "user",
                            "content": prompt
                        }
                    ],
                    temperature=0.3 if mode == "normal" else 0.4,
                    max_tokens=2048,
                    top_p=1,
                    stream=True,
                    stop=None,
                )
                
                translation = ""
                for chunk_response in completion:
                    translation += chunk_response.choices[0].delta.content or ""
                
                # Update context window for contextual translation
                if mode == "contextual":
                    self.context_window.append(translation)
                    if len(self.context_window) > 3:
                        self.context_window.pop(0)
                
                return translation
                
            except Exception as e:
                if attempt == max_retries - 1:
                    raise e
                time.sleep(2)  # Wait before retry
                
        return ""

def main():
    st.set_page_config(page_title="Advanced Tamil Translator", layout="wide")
    
    # Initialize translation manager
    if 'translation_manager' not in st.session_state:
        st.session_state.translation_manager = TranslationManager()
    
    if 'translation_history' not in st.session_state:
        st.session_state.translation_history = []

    st.title("Advanced English to Tamil Translator")
    
    # Translation settings
    with st.expander("Translation Settings", expanded=True):
        col1, col2 = st.columns(2)
        with col1:
            translation_mode = st.radio(
                "Translation Mode",
                ["Normal", "Contextual"],
                help="Normal: Direct translation\nContextual: Context-aware translation with domain specificity"
            )
        
        with col2:
            if translation_mode == "Contextual":
                domain = st.selectbox(
                    "Select Domain",
                    ["General", "Technical", "Medical", "Legal", "Literary", "Business", "Academic"],
                    help="Select the domain to improve translation accuracy"
                )
            
    # Input area
    st.subheader("Enter Text")
    english_input = st.text_area("Enter English text of any length:", height=200)
    
    # Translation button
    if st.button("Translate"):
        if not english_input:
            st.error("Please enter some text to translate.")
            return
        
        try:
            # Initialize progress tracking
            progress_bar = st.progress(0)
            status_text = st.empty()
            
            # Reset context window for new translation
            st.session_state.translation_manager.context_window = []
            
            # Chunk the input text
            chunks = st.session_state.translation_manager.chunk_text_with_context(english_input)
            translated_chunks = []
            
            # Translate each chunk
            for i, chunk in enumerate(chunks):
                status_text.text(f"Translating part {i+1} of {len(chunks)}...")
                
                translation = st.session_state.translation_manager.translate_chunk(
                    chunk,
                    mode=translation_mode.lower(),
                    domain=domain if translation_mode == "Contextual" else None
                )
                
                translated_chunks.append(translation)
                progress_bar.progress((i + 1) / len(chunks))
            
            # Combine translations
            final_translation = ' '.join(translated_chunks)
            
            # Display results
            col1, col2 = st.columns(2)
            
            with col1:
                st.subheader("Original Text")
                st.write(english_input)
                st.info(f"Word count: {len(english_input.split())}")
            
            with col2:
                st.subheader("Tamil Translation")
                st.write(final_translation)
                
            # Add to history
            st.session_state.translation_history.append({
                'english': english_input,
                'tamil': final_translation,
                'mode': translation_mode,
                'domain': domain if translation_mode == "Contextual" else "N/A",
                'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
            })
            
            # Download options
            col1, col2 = st.columns(2)
            with col1:
                st.download_button(
                    "Download Translation",
                    final_translation,
                    file_name=f"tamil_translation_{translation_mode.lower()}.txt",
                    mime="text/plain"
                )
            
            with col2:
                # Export translation with metadata
                export_data = {
                    'original': english_input,
                    'translation': final_translation,
                    'mode': translation_mode,
                    'domain': domain if translation_mode == "Contextual" else "N/A",
                    'timestamp': time.strftime("%Y-%m-%d %H:%M:%S")
                }
                st.download_button(
                    "Export with Metadata",
                    json.dumps(export_data, indent=2),
                    file_name="translation_with_metadata.json",
                    mime="application/json"
                )
            
        except Exception as e:
            st.error(f"An error occurred: {str(e)}")
            
        finally:
            progress_bar.empty()
            status_text.empty()
    
    # Translation History
    if st.session_state.translation_history:
        with st.expander("Translation History"):
            for i, entry in enumerate(reversed(st.session_state.translation_history[-5:])):
                st.write(f"Translation {len(st.session_state.translation_history)-i}")
                st.write(f"Mode: {entry['mode']}")
                if entry['domain'] != "N/A":
                    st.write(f"Domain: {entry['domain']}")
                st.write(f"Timestamp: {entry['timestamp']}")
                st.write("English:", entry['english'][:100] + "..." if len(entry['english']) > 100 else entry['english'])
                st.write("Tamil:", entry['tamil'][:100] + "..." if len(entry['tamil']) > 100 else entry['tamil'])
                st.markdown("---")

if __name__ == "__main__":
    main()