Spaces:

RAHULJUNEJA33
/

LexiGen-Userstory_Generator

Running

App Files Files Community

RAHULJUNEJA33 commited on 4 days ago

Commit

8efe5a0

verified ·

1 Parent(s): a28d499

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -52

app.py CHANGED Viewed

@@ -1,91 +1,172 @@
-import numpy as np
 import streamlit as st
 from PyPDF2 import PdfReader
 from PIL import Image
 import pytesseract
-import faiss
 from transformers import pipeline
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 from langchain.llms import OpenAI
-from openai import OpenAI
-# Initialize OpenAI client with Streamlit secrets
-openai_api_key = st.secrets["OPENAI_API_KEY"]
-client = OpenAI(api_key=openai_api_key)  # Correct client initialization
 # Configuration
 pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
 classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-# Initialize FAISS
-dim = 1536
 index = faiss.IndexFlatL2(dim)
 def extract_text(uploaded_file):
     """Extract text from PDF, TXT, or image files"""
     try:
-        text = ""
         if uploaded_file.type == "application/pdf":
             reader = PdfReader(uploaded_file)
             for page in reader.pages:
                 if page.extract_text():
                     text += page.extract_text() + "\n"
         elif uploaded_file.type == "text/plain":
             text = uploaded_file.read().decode("utf-8")
         elif uploaded_file.type.startswith('image'):
             image = Image.open(uploaded_file)
             text = pytesseract.image_to_string(image)
-        return text.strip()
     except Exception as e:
         st.error(f"Text extraction failed: {str(e)}")
-        return ""
-# Rest of the functions remain the same as previous working version
-# ... [Keep all other functions identical to the last working code] ...
-# Streamlit UI
-st.set_page_config(page_title="SpecAnalyzer", layout="wide")
-st.title("📋 Functional Specification Analyzer")
-uploaded_file = st.file_uploader("Upload document (PDF/TXT/Image)", type=["pdf", "txt", "jpg", "jpeg"])
-if uploaded_file:
-    with st.spinner("📄 Extracting text..."):
-        text = extract_text(uploaded_file)
-    if text:
-        with st.spinner("🔍 Analyzing document..."):
-            analysis = process_document(text)
-        if analysis:
-            display_results(analysis)
-            # User story selection
-            user_stories = [
-                line.split(":", 1)[1].strip()
-                for line in analysis.split('\n')
-                if line.startswith("#### User Story:")
-            ]
-            selected_story = st.selectbox("📌 Select a User Story to expand:", user_stories)
-            if st.button("✨ Generate Detailed Breakdown"):
-                with st.spinner("⚙️ Generating details..."):
-                    details = generate_user_story_details(selected_story)
-                    st.subheader("📝 Detailed Specifications")
-                    st.markdown(details)
-                    st.download_button(
-                        label="💾 Download as Markdown",
-                        data=details,
-                        file_name="user_story_details.md",
-                        mime="text/markdown"
-                    )
-    else:
-        st.error("Failed to extract text from document")

 import streamlit as st
+import numpy as np
 from PyPDF2 import PdfReader
 from PIL import Image
 import pytesseract
+import openai
 from transformers import pipeline
 from langchain.prompts import PromptTemplate
 from langchain.chains import LLMChain
 from langchain.llms import OpenAI
+import faiss
+# OpenAI API Key (ensure to use secure storage in production)
+OPENAI_API_KEY = "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"
 # Configuration
 pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
 classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+dim = 1536  # Embedding dimension
 index = faiss.IndexFlatL2(dim)
+def create_embedding(text):
+    """Generate embeddings using OpenAI"""
+    try:
+        response = openai.Embedding.create(
+            model="text-embedding-ada-002",
+            input=text,
+            api_key=OPENAI_API_KEY
+        )
+        return response['data'][0]['embedding']
+    except Exception as e:
+        st.error(f"Error creating embedding: {str(e)}")
+        return None
 def extract_text(uploaded_file):
     """Extract text from PDF, TXT, or image files"""
+    text = ""
     try:
         if uploaded_file.type == "application/pdf":
             reader = PdfReader(uploaded_file)
             for page in reader.pages:
                 if page.extract_text():
                     text += page.extract_text() + "\n"
         elif uploaded_file.type == "text/plain":
             text = uploaded_file.read().decode("utf-8")
         elif uploaded_file.type.startswith('image'):
             image = Image.open(uploaded_file)
             text = pytesseract.image_to_string(image)
     except Exception as e:
         st.error(f"Text extraction failed: {str(e)}")
+    return text.strip()
+def chunk_text(text, max_tokens=1000):
+    """Splits text into smaller chunks within token limits"""
+    words = text.split()
+    chunks = []
+    current_chunk = []
+    count = 0
+    for word in words:
+        current_chunk.append(word)
+        count += len(word.split())
+        if count >= max_tokens:
+            chunks.append(" ".join(current_chunk))
+            current_chunk = []
+            count = 0
+    if current_chunk:
+        chunks.append(" ".join(current_chunk))
+    return chunks
+def extract_summary(text):
+    """Extract high-level summary in smaller chunks"""
+    chunks = chunk_text(text)
+    summary_parts = []
+    prompt = """
+    Extract a concise summary of the following categories:
+    - Business Requirements
+    - Functional Requirements
+    - Use Cases
+    - Technical Constraints
+    Document:
+    {document_text}
+    """
+    try:
+        for chunk in chunks:
+            llm_chain = LLMChain(
+                prompt=PromptTemplate(template=prompt, input_variables=["document_text"]),
+                llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300)
+            )
+            summary_parts.append(llm_chain.run(document_text=chunk))
+        return "\n".join(summary_parts).strip()
+    except Exception as e:
+        st.error(f"Summary extraction failed: {str(e)}")
+        return ""
+def extract_agile_elements(text):
+    """Extract EPICs, Features, and User Stories"""
+    chunks = chunk_text(text)
+    structured_output_parts = []
+    prompt = """
+    Identify and structure these elements from the document:
+    ## 🎯 Epic: [High-level objective]
+    ### Feature: [Key capability]
+    #### User Story: As a [persona], I want to [goal], so that [reason]
+    Document:
+    {document_text}
+    """
+    try:
+        for chunk in chunks:
+            llm_chain = LLMChain(
+                prompt=PromptTemplate(template=prompt, input_variables=["document_text"]),
+                llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300)
+            )
+            structured_output_parts.append(llm_chain.run(document_text=chunk))
+        return "\n".join(structured_output_parts).strip()
+    except Exception as e:
+        st.error(f"Agile extraction failed: {str(e)}")
+        return ""
+def generate_detailed_user_story(user_story):
+    """Generate a detailed user story including acceptance criteria"""
+    prompt = """
+    Refine the user story into the following structure:
+    #### User Story: As a [persona], I want to [goal], so that [reason]
+    **Acceptance Criteria:**
+    - [List of testable criteria]
+    User Story:
+    {user_story}
+    """
+    try:
+        llm_chain = LLMChain(
+            prompt=PromptTemplate(template=prompt, input_variables=["user_story"]),
+            llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300)
+        )
+        return llm_chain.run(user_story=user_story)
+    except Exception as e:
+        st.error(f"Detailed user story generation failed: {str(e)}")
+        return ""
+def main():
+    st.title("📑 GenAI Functional Spec Processor")
+    uploaded_file = st.file_uploader("Upload a functional specification document (PDF, TXT, Image)", type=["pdf", "txt", "png", "jpg", "jpeg"])
+    if uploaded_file:
+        text = extract_text(uploaded_file)
+        if text:
+            st.text_area("Extracted Text", value=text[:1000] + "...", height=200)  # Show preview only
+            summary = extract_summary(text)
+            structured_output = extract_agile_elements(text)
+            with st.expander("📋 Extracted Summary", expanded=False):
+                st.info(summary)
+            st.subheader("📌 Agile Breakdown")
+            st.text_area("Agile Output", value=structured_output, height=300)
+            user_story = st.text_area("Paste a User Story to Generate Detailed Version")
+            if st.button("Generate Detailed User Story"):
+                detailed_story = generate_detailed_user_story(user_story)
+                st.subheader("Detailed User Story")
+                st.write(detailed_story)
+if __name__ == "__main__":
+    main()