|
import streamlit as st |
|
import numpy as np |
|
from PyPDF2 import PdfReader |
|
from PIL import Image |
|
import pytesseract |
|
import openai |
|
from transformers import pipeline |
|
from langchain.prompts import PromptTemplate |
|
from langchain.chains import LLMChain |
|
from langchain.llms import OpenAI |
|
import faiss |
|
|
|
|
|
openai_api_key = st.secrets["OPENAI_API_KEY"] |
|
client = OpenAI(api_key=openai_api_key) |
|
|
|
|
|
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' |
|
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") |
|
|
|
dim = 1536 |
|
index = faiss.IndexFlatL2(dim) |
|
|
|
def create_embedding(text): |
|
"""Generate embeddings using OpenAI""" |
|
try: |
|
response = openai.Embedding.create( |
|
model="text-embedding-ada-002", |
|
input=text, |
|
api_key=openai_api_key |
|
) |
|
return response['data'][0]['embedding'] |
|
except Exception as e: |
|
st.error(f"Error creating embedding: {str(e)}") |
|
return None |
|
|
|
def extract_text(uploaded_file): |
|
"""Extract text from PDF, TXT, or image files""" |
|
text = "" |
|
try: |
|
if uploaded_file.type == "application/pdf": |
|
reader = PdfReader(uploaded_file) |
|
for page in reader.pages: |
|
if page.extract_text(): |
|
text += page.extract_text() + "\n" |
|
elif uploaded_file.type == "text/plain": |
|
text = uploaded_file.read().decode("utf-8") |
|
elif uploaded_file.type.startswith('image'): |
|
image = Image.open(uploaded_file) |
|
text = pytesseract.image_to_string(image) |
|
except Exception as e: |
|
st.error(f"Text extraction failed: {str(e)}") |
|
return text.strip() |
|
|
|
def chunk_text(text, max_tokens=1000): |
|
"""Splits text into smaller chunks within token limits""" |
|
words = text.split() |
|
chunks = [] |
|
current_chunk = [] |
|
count = 0 |
|
|
|
for word in words: |
|
current_chunk.append(word) |
|
count += len(word.split()) |
|
if count >= max_tokens: |
|
chunks.append(" ".join(current_chunk)) |
|
current_chunk = [] |
|
count = 0 |
|
|
|
if current_chunk: |
|
chunks.append(" ".join(current_chunk)) |
|
|
|
return chunks |
|
|
|
def extract_summary(text): |
|
"""Extract high-level summary in smaller chunks""" |
|
chunks = chunk_text(text) |
|
summary_parts = [] |
|
prompt = """ |
|
Extract a concise summary of the following categories: |
|
- Business Requirements |
|
- Functional Requirements |
|
- Use Cases |
|
- Technical Constraints |
|
Document: |
|
{document_text} |
|
""" |
|
|
|
try: |
|
for chunk in chunks: |
|
llm_chain = LLMChain( |
|
prompt=PromptTemplate(template=prompt, input_variables=["document_text"]), |
|
llm=OpenAI(openai_api_key=openai_api_key, temperature=0.3, max_tokens=300) |
|
) |
|
summary_parts.append(llm_chain.run(document_text=chunk)) |
|
return "\n".join(summary_parts).strip() |
|
except Exception as e: |
|
st.error(f"Summary extraction failed: {str(e)}") |
|
return "" |
|
|
|
def extract_agile_elements(text): |
|
"""Extract EPICs, Features, and User Stories""" |
|
chunks = chunk_text(text) |
|
structured_output_parts = [] |
|
prompt = """ |
|
Identify and structure these elements from the document: |
|
## π― Epic: [High-level objective] |
|
### Feature: [Key capability] |
|
#### User Story: As a [persona], I want to [goal], so that [reason] |
|
|
|
Document: |
|
{document_text} |
|
""" |
|
try: |
|
for chunk in chunks: |
|
llm_chain = LLMChain( |
|
prompt=PromptTemplate(template=prompt, input_variables=["document_text"]), |
|
llm=OpenAI(openai_api_key=openai_api_key, temperature=0.3, max_tokens=300) |
|
) |
|
structured_output_parts.append(llm_chain.run(document_text=chunk)) |
|
return "\n".join(structured_output_parts).strip() |
|
except Exception as e: |
|
st.error(f"Agile extraction failed: {str(e)}") |
|
return "" |
|
|
|
def generate_detailed_user_story(user_story): |
|
"""Generate a detailed user story including acceptance criteria""" |
|
prompt = """ |
|
Refine the user story into the following structure: |
|
|
|
#### User Story: As a [persona], I want to [goal], so that [reason] |
|
|
|
**Acceptance Criteria:** |
|
- [List of testable criteria] |
|
|
|
User Story: |
|
{user_story} |
|
""" |
|
try: |
|
llm_chain = LLMChain( |
|
prompt=PromptTemplate(template=prompt, input_variables=["user_story"]), |
|
llm=OpenAI(openai_api_key=openai_api_key, temperature=0.3, max_tokens=300) |
|
) |
|
return llm_chain.run(user_story=user_story) |
|
except Exception as e: |
|
st.error(f"Detailed user story generation failed: {str(e)}") |
|
return "" |
|
|
|
def main(): |
|
st.title("π GenAI Functional Spec Processor") |
|
uploaded_file = st.file_uploader("Upload a functional specification document (PDF, TXT, Image)", type=["pdf", "txt", "png", "jpg", "jpeg"]) |
|
|
|
if uploaded_file: |
|
text = extract_text(uploaded_file) |
|
if text: |
|
st.text_area("Extracted Text", value=text[:1000] + "...", height=200) |
|
summary = extract_summary(text) |
|
structured_output = extract_agile_elements(text) |
|
|
|
with st.expander("π Extracted Summary", expanded=False): |
|
st.info(summary) |
|
|
|
st.subheader("π Agile Breakdown") |
|
st.text_area("Agile Output", value=structured_output, height=300) |
|
|
|
user_story = st.text_area("Paste a User Story to Generate Detailed Version") |
|
if st.button("Generate Detailed User Story"): |
|
detailed_story = generate_detailed_user_story(user_story) |
|
st.subheader("Detailed User Story") |
|
st.write(detailed_story) |
|
|
|
if __name__ == "__main__": |
|
main() |