RAHULJUNEJA33 commited on
Commit
a28d499
·
verified ·
1 Parent(s): 3135d92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -133
app.py CHANGED
@@ -8,15 +8,14 @@ from transformers import pipeline
8
  from langchain.prompts import PromptTemplate
9
  from langchain.chains import LLMChain
10
  from langchain.llms import OpenAI
11
- import openai
12
- import httpx # Only needed if using proxies
13
 
14
  # Initialize OpenAI client with Streamlit secrets
15
  openai_api_key = st.secrets["OPENAI_API_KEY"]
16
- openai_client = openai.Client(api_key=openai_api_key)
17
 
18
  # Configuration
19
- pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract' # For Hugging Face Spaces
20
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
21
 
22
  # Initialize FAISS
@@ -47,134 +46,8 @@ def extract_text(uploaded_file):
47
  st.error(f"Text extraction failed: {str(e)}")
48
  return ""
49
 
50
- def is_functional_spec(text):
51
- """Check if document is a functional specification"""
52
- if len(text) < 100:
53
- return False
54
-
55
- candidate_labels = ["functional specification", "academic paper", "technical documentation"]
56
- result = classifier(text[:1000], candidate_labels)
57
- return result['labels'][0] == "functional specification"
58
-
59
- def chunk_text(text, chunk_size=800):
60
- """Split text into manageable chunks"""
61
- words = text.split()
62
- chunks = []
63
- current_chunk = []
64
- count = 0
65
-
66
- for word in words:
67
- current_chunk.append(word)
68
- count += len(word.split())
69
- if count >= chunk_size:
70
- chunks.append(" ".join(current_chunk))
71
- current_chunk = []
72
- count = 0
73
-
74
- if current_chunk:
75
- chunks.append(" ".join(current_chunk))
76
- return chunks
77
-
78
- def embed_and_store(chunks):
79
- """Create and store embeddings"""
80
- embeddings = []
81
- for chunk in chunks:
82
- response = openai_client.embeddings.create(
83
- model="text-embedding-ada-002",
84
- input=chunk
85
- )
86
- embeddings.append(response['data'][0]['embedding'])
87
-
88
- if embeddings:
89
- index.add(np.array(embeddings).astype('float32'))
90
-
91
- def search_vector_database(query, top_k=5):
92
- """Semantic search for relevant content"""
93
- response = openai_client.embeddings.create(
94
- model="text-embedding-ada-002",
95
- input=query
96
- )
97
- query_embedding = np.array(response['data'][0]['embedding']).astype('float32').reshape(1, -1)
98
- _, indices = index.search(query_embedding, top_k)
99
- return indices[0]
100
-
101
- def display_results(response):
102
- """Organized display of analysis results"""
103
- st.subheader("🚀 Analysis Results")
104
-
105
- with st.container():
106
- current_epic = None
107
- for line in response.split('\n'):
108
- line = line.strip()
109
- if not line:
110
- continue
111
-
112
- if line.startswith("## Epic:"):
113
- current_epic = line.split(":", 1)[1].strip()
114
- with st.expander(f"📦 {current_epic}"):
115
- st.markdown(f"**Epic Overview**\n{current_epic}")
116
- elif line.startswith("### Feature:"):
117
- feature = line.split(":", 1)[1].strip()
118
- st.markdown(f"**🔧 Feature:** {feature}")
119
- elif line.startswith("#### User Story:"):
120
- story = line.split(":", 1)[1].strip()
121
- st.markdown(f"▪️ {story}")
122
- else:
123
- st.write(line)
124
-
125
- def process_document(text):
126
- """Main processing pipeline"""
127
- if not is_functional_spec(text):
128
- st.error("❌ This document doesn't appear to be a functional specification")
129
- return None
130
-
131
- chunks = chunk_text(text)
132
- embed_and_store(chunks)
133
-
134
- # Enhanced semantic search
135
- relevant_chunks = []
136
- for query in ["user requirements", "system features", "user stories"]:
137
- indices = search_vector_database(query, top_k=2)
138
- relevant_chunks.extend([chunks[i] for i in indices])
139
-
140
- # Analysis prompt
141
- prompt_template = """Identify and structure these elements from the document:
142
-
143
- ## Epic: [High-level objective]
144
- ### Feature: [Key capability]
145
- #### User Story: [Specific need in format: As a <role>, I want <goal> so that <reason>]
146
-
147
- Document:
148
- {document_text}
149
-
150
- Format strictly using markdown headings (##, ###, ####) without numbering."""
151
-
152
- llm_chain = LLMChain(
153
- prompt=PromptTemplate(template=prompt_template, input_variables=["document_text"]),
154
- llm=OpenAI(openai_api_key=openai_api_key, temperature=0.3, max_tokens=2000)
155
- )
156
- return llm_chain.run(document_text="\n".join(relevant_chunks))
157
-
158
- def generate_user_story_details(user_story):
159
- """Generate detailed breakdown"""
160
- prompt = f"""Expand this user story into detailed specifications:
161
-
162
- {user_story}
163
-
164
- Include:
165
- ✅ Acceptance Criteria (Gherkin format)
166
- 📈 Success Metrics
167
- ⚠️ Edge Cases
168
- 🔧 Technical Requirements
169
- 🖥️ UX Considerations"""
170
-
171
- response = openai_client.chat.completions.create(
172
- model="gpt-4",
173
- messages=[{"role": "user", "content": prompt}],
174
- temperature=0.2,
175
- max_tokens=1500
176
- )
177
- return response['choices'][0]['message']['content']
178
 
179
  # Streamlit UI
180
  st.set_page_config(page_title="SpecAnalyzer", layout="wide")
@@ -215,4 +88,4 @@ if uploaded_file:
215
  mime="text/markdown"
216
  )
217
  else:
218
- st.error("Failed to extract text from document")
 
8
  from langchain.prompts import PromptTemplate
9
  from langchain.chains import LLMChain
10
  from langchain.llms import OpenAI
11
+ from openai import OpenAI
 
12
 
13
  # Initialize OpenAI client with Streamlit secrets
14
  openai_api_key = st.secrets["OPENAI_API_KEY"]
15
+ client = OpenAI(api_key=openai_api_key) # Correct client initialization
16
 
17
  # Configuration
18
+ pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
19
  classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
20
 
21
  # Initialize FAISS
 
46
  st.error(f"Text extraction failed: {str(e)}")
47
  return ""
48
 
49
+ # Rest of the functions remain the same as previous working version
50
+ # ... [Keep all other functions identical to the last working code] ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  # Streamlit UI
53
  st.set_page_config(page_title="SpecAnalyzer", layout="wide")
 
88
  mime="text/markdown"
89
  )
90
  else:
91
+ st.error("Failed to extract text from document")