import streamlit as st |
import numpy as np |
from PyPDF2 import PdfReader |
from PIL import Image |
import pytesseract |
import openai |
from transformers import pipeline |
from langchain.prompts import PromptTemplate |
from langchain.chains import LLMChain |
from langchain.llms import OpenAI |
import faiss |
OPENAI_API_KEY = "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" |
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' |
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") |
dim = 1536 |
index = faiss.IndexFlatL2(dim) |
def create_embedding(text): |
"""Generate embeddings using OpenAI""" |
try: |
response = openai.Embedding.create( |
model="text-embedding-ada-002", |
input=text, |
api_key=OPENAI_API_KEY |
) |
return response['data'][0]['embedding'] |
except Exception as e: |
st.error(f"Error creating embedding: {str(e)}") |
return None |
def extract_text(uploaded_file): |
"""Extract text from PDF, TXT, or image files""" |
text = "" |
try: |
if uploaded_file.type == "application/pdf": |
reader = PdfReader(uploaded_file) |
for page in reader.pages: |
if page.extract_text(): |
text += page.extract_text() + "\n" |
elif uploaded_file.type == "text/plain": |
text = uploaded_file.read().decode("utf-8") |
elif uploaded_file.type.startswith('image'): |
image = Image.open(uploaded_file) |
text = pytesseract.image_to_string(image) |
except Exception as e: |
st.error(f"Text extraction failed: {str(e)}") |
return text.strip() |
def chunk_text(text, max_tokens=1000): |
"""Splits text into smaller chunks within token limits""" |
words = text.split() |
chunks = [] |
current_chunk = [] |
count = 0 |
for word in words: |
current_chunk.append(word) |
count += len(word.split()) |
if count >= max_tokens: |
chunks.append(" ".join(current_chunk)) |
current_chunk = [] |
count = 0 |
if current_chunk: |
chunks.append(" ".join(current_chunk)) |
return chunks |
def extract_summary(text): |
"""Extract high-level summary in smaller chunks""" |
chunks = chunk_text(text) |
summary_parts = [] |
prompt = """ |
Extract a concise summary of the following categories: |
- Business Requirements |
- Functional Requirements |
- Use Cases |
- Technical Constraints |
Document: |
{document_text} |
""" |
try: |
for chunk in chunks: |
llm_chain = LLMChain( |
prompt=PromptTemplate(template=prompt, input_variables=["document_text"]), |
llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300) |
) |
summary_parts.append(llm_chain.run(document_text=chunk)) |
return "\n".join(summary_parts).strip() |
except Exception as e: |
st.error(f"Summary extraction failed: {str(e)}") |
return "" |
def extract_agile_elements(text): |
"""Extract EPICs, Features, and User Stories""" |
chunks = chunk_text(text) |
structured_output_parts = [] |
prompt = """ |
Identify and structure these elements from the document: |
## π― Epic: [High-level objective] |
### Feature: [Key capability] |
#### User Story: As a [persona], I want to [goal], so that [reason] |
Document: |
{document_text} |
""" |
try: |
for chunk in chunks: |
llm_chain = LLMChain( |
prompt=PromptTemplate(template=prompt, input_variables=["document_text"]), |
llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300) |
) |
structured_output_parts.append(llm_chain.run(document_text=chunk)) |
return "\n".join(structured_output_parts).strip() |
except Exception as e: |
st.error(f"Agile extraction failed: {str(e)}") |
return "" |
def generate_detailed_user_story(user_story): |
"""Generate a detailed user story including acceptance criteria""" |
prompt = """ |
Refine the user story into the following structure: |
#### User Story: As a [persona], I want to [goal], so that [reason] |
**Acceptance Criteria:** |
- [List of testable criteria] |
User Story: |
{user_story} |
""" |
try: |
llm_chain = LLMChain( |
prompt=PromptTemplate(template=prompt, input_variables=["user_story"]), |
llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300) |
) |
return llm_chain.run(user_story=user_story) |
except Exception as e: |
st.error(f"Detailed user story generation failed: {str(e)}") |
return "" |
def main(): |
st.title("π GenAI Functional Spec Processor") |
uploaded_file = st.file_uploader("Upload a functional specification document (PDF, TXT, Image)", type=["pdf", "txt", "png", "jpg", "jpeg"]) |
if uploaded_file: |
text = extract_text(uploaded_file) |
if text: |
st.text_area("Extracted Text", value=text[:1000] + "...", height=200) |
summary = extract_summary(text) |
structured_output = extract_agile_elements(text) |
with st.expander("π Extracted Summary", expanded=False): |
st.info(summary) |
st.subheader("π Agile Breakdown") |
st.text_area("Agile Output", value=structured_output, height=300) |
user_story = st.text_area("Paste a User Story to Generate Detailed Version") |
if st.button("Generate Detailed User Story"): |
detailed_story = generate_detailed_user_story(user_story) |
st.subheader("Detailed User Story") |
st.write(detailed_story) |
if __name__ == "__main__": |
main() |