RAHULJUNEJA33 commited on
Commit
8efe5a0
Β·
verified Β·
1 Parent(s): a28d499

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -52
app.py CHANGED
@@ -1,91 +1,172 @@
1
- import numpy as np
2
  import streamlit as st
 
3
  from PyPDF2 import PdfReader
4
  from PIL import Image
5
  import pytesseract
6
- import faiss
7
  from transformers import pipeline
8
  from langchain.prompts import PromptTemplate
9
  from langchain.chains import LLMChain
10
  from langchain.llms import OpenAI
11
- from openai import OpenAI
12
 
13
- # Initialize OpenAI client with Streamlit secrets
14
- openai_api_key = st.secrets["OPENAI_API_KEY"]
15
- client = OpenAI(api_key=openai_api_key) # Correct client initialization
16
 
17
  # Configuration
18
  pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
19
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
20
 
21
- # Initialize FAISS
22
- dim = 1536
23
  index = faiss.IndexFlatL2(dim)
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def extract_text(uploaded_file):
26
  """Extract text from PDF, TXT, or image files"""
 
27
  try:
28
- text = ""
29
-
30
  if uploaded_file.type == "application/pdf":
31
  reader = PdfReader(uploaded_file)
32
  for page in reader.pages:
33
  if page.extract_text():
34
  text += page.extract_text() + "\n"
35
-
36
  elif uploaded_file.type == "text/plain":
37
  text = uploaded_file.read().decode("utf-8")
38
-
39
  elif uploaded_file.type.startswith('image'):
40
  image = Image.open(uploaded_file)
41
  text = pytesseract.image_to_string(image)
42
-
43
- return text.strip()
44
-
45
  except Exception as e:
46
  st.error(f"Text extraction failed: {str(e)}")
47
- return ""
48
 
49
- # Rest of the functions remain the same as previous working version
50
- # ... [Keep all other functions identical to the last working code] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- # Streamlit UI
53
- st.set_page_config(page_title="SpecAnalyzer", layout="wide")
54
- st.title("πŸ“‹ Functional Specification Analyzer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
- uploaded_file = st.file_uploader("Upload document (PDF/TXT/Image)", type=["pdf", "txt", "jpg", "jpeg"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- if uploaded_file:
59
- with st.spinner("πŸ“„ Extracting text..."):
60
- text = extract_text(uploaded_file)
61
 
62
- if text:
63
- with st.spinner("πŸ” Analyzing document..."):
64
- analysis = process_document(text)
65
-
66
- if analysis:
67
- display_results(analysis)
68
 
69
- # User story selection
70
- user_stories = [
71
- line.split(":", 1)[1].strip()
72
- for line in analysis.split('\n')
73
- if line.startswith("#### User Story:")
74
- ]
75
 
76
- selected_story = st.selectbox("πŸ“Œ Select a User Story to expand:", user_stories)
 
77
 
78
- if st.button("✨ Generate Detailed Breakdown"):
79
- with st.spinner("βš™οΈ Generating details..."):
80
- details = generate_user_story_details(selected_story)
81
- st.subheader("πŸ“ Detailed Specifications")
82
- st.markdown(details)
83
-
84
- st.download_button(
85
- label="πŸ’Ύ Download as Markdown",
86
- data=details,
87
- file_name="user_story_details.md",
88
- mime="text/markdown"
89
- )
90
- else:
91
- st.error("Failed to extract text from document")
 
 
1
  import streamlit as st
2
+ import numpy as np
3
  from PyPDF2 import PdfReader
4
  from PIL import Image
5
  import pytesseract
6
+ import openai
7
  from transformers import pipeline
8
  from langchain.prompts import PromptTemplate
9
  from langchain.chains import LLMChain
10
  from langchain.llms import OpenAI
11
+ import faiss
12
 
13
+ # OpenAI API Key (ensure to use secure storage in production)
14
+ OPENAI_API_KEY = "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"
 
15
 
16
  # Configuration
17
  pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
18
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
19
 
20
+ dim = 1536 # Embedding dimension
 
21
  index = faiss.IndexFlatL2(dim)
22
 
23
+ def create_embedding(text):
24
+ """Generate embeddings using OpenAI"""
25
+ try:
26
+ response = openai.Embedding.create(
27
+ model="text-embedding-ada-002",
28
+ input=text,
29
+ api_key=OPENAI_API_KEY
30
+ )
31
+ return response['data'][0]['embedding']
32
+ except Exception as e:
33
+ st.error(f"Error creating embedding: {str(e)}")
34
+ return None
35
+
36
  def extract_text(uploaded_file):
37
  """Extract text from PDF, TXT, or image files"""
38
+ text = ""
39
  try:
 
 
40
  if uploaded_file.type == "application/pdf":
41
  reader = PdfReader(uploaded_file)
42
  for page in reader.pages:
43
  if page.extract_text():
44
  text += page.extract_text() + "\n"
 
45
  elif uploaded_file.type == "text/plain":
46
  text = uploaded_file.read().decode("utf-8")
 
47
  elif uploaded_file.type.startswith('image'):
48
  image = Image.open(uploaded_file)
49
  text = pytesseract.image_to_string(image)
 
 
 
50
  except Exception as e:
51
  st.error(f"Text extraction failed: {str(e)}")
52
+ return text.strip()
53
 
54
+ def chunk_text(text, max_tokens=1000):
55
+ """Splits text into smaller chunks within token limits"""
56
+ words = text.split()
57
+ chunks = []
58
+ current_chunk = []
59
+ count = 0
60
+
61
+ for word in words:
62
+ current_chunk.append(word)
63
+ count += len(word.split())
64
+ if count >= max_tokens:
65
+ chunks.append(" ".join(current_chunk))
66
+ current_chunk = []
67
+ count = 0
68
+
69
+ if current_chunk:
70
+ chunks.append(" ".join(current_chunk))
71
+
72
+ return chunks
73
+
74
+ def extract_summary(text):
75
+ """Extract high-level summary in smaller chunks"""
76
+ chunks = chunk_text(text)
77
+ summary_parts = []
78
+ prompt = """
79
+ Extract a concise summary of the following categories:
80
+ - Business Requirements
81
+ - Functional Requirements
82
+ - Use Cases
83
+ - Technical Constraints
84
+ Document:
85
+ {document_text}
86
+ """
87
+
88
+ try:
89
+ for chunk in chunks:
90
+ llm_chain = LLMChain(
91
+ prompt=PromptTemplate(template=prompt, input_variables=["document_text"]),
92
+ llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300)
93
+ )
94
+ summary_parts.append(llm_chain.run(document_text=chunk))
95
+ return "\n".join(summary_parts).strip()
96
+ except Exception as e:
97
+ st.error(f"Summary extraction failed: {str(e)}")
98
+ return ""
99
 
100
+ def extract_agile_elements(text):
101
+ """Extract EPICs, Features, and User Stories"""
102
+ chunks = chunk_text(text)
103
+ structured_output_parts = []
104
+ prompt = """
105
+ Identify and structure these elements from the document:
106
+ ## 🎯 Epic: [High-level objective]
107
+ ### Feature: [Key capability]
108
+ #### User Story: As a [persona], I want to [goal], so that [reason]
109
+
110
+ Document:
111
+ {document_text}
112
+ """
113
+ try:
114
+ for chunk in chunks:
115
+ llm_chain = LLMChain(
116
+ prompt=PromptTemplate(template=prompt, input_variables=["document_text"]),
117
+ llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300)
118
+ )
119
+ structured_output_parts.append(llm_chain.run(document_text=chunk))
120
+ return "\n".join(structured_output_parts).strip()
121
+ except Exception as e:
122
+ st.error(f"Agile extraction failed: {str(e)}")
123
+ return ""
124
 
125
+ def generate_detailed_user_story(user_story):
126
+ """Generate a detailed user story including acceptance criteria"""
127
+ prompt = """
128
+ Refine the user story into the following structure:
129
+
130
+ #### User Story: As a [persona], I want to [goal], so that [reason]
131
+
132
+ **Acceptance Criteria:**
133
+ - [List of testable criteria]
134
+
135
+ User Story:
136
+ {user_story}
137
+ """
138
+ try:
139
+ llm_chain = LLMChain(
140
+ prompt=PromptTemplate(template=prompt, input_variables=["user_story"]),
141
+ llm=OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0.3, max_tokens=300)
142
+ )
143
+ return llm_chain.run(user_story=user_story)
144
+ except Exception as e:
145
+ st.error(f"Detailed user story generation failed: {str(e)}")
146
+ return ""
147
 
148
+ def main():
149
+ st.title("πŸ“‘ GenAI Functional Spec Processor")
150
+ uploaded_file = st.file_uploader("Upload a functional specification document (PDF, TXT, Image)", type=["pdf", "txt", "png", "jpg", "jpeg"])
151
 
152
+ if uploaded_file:
153
+ text = extract_text(uploaded_file)
154
+ if text:
155
+ st.text_area("Extracted Text", value=text[:1000] + "...", height=200) # Show preview only
156
+ summary = extract_summary(text)
157
+ structured_output = extract_agile_elements(text)
158
 
159
+ with st.expander("πŸ“‹ Extracted Summary", expanded=False):
160
+ st.info(summary)
 
 
 
 
161
 
162
+ st.subheader("πŸ“Œ Agile Breakdown")
163
+ st.text_area("Agile Output", value=structured_output, height=300)
164
 
165
+ user_story = st.text_area("Paste a User Story to Generate Detailed Version")
166
+ if st.button("Generate Detailed User Story"):
167
+ detailed_story = generate_detailed_user_story(user_story)
168
+ st.subheader("Detailed User Story")
169
+ st.write(detailed_story)
170
+
171
+ if __name__ == "__main__":
172
+ main()