Raghav001 commited on
Commit
e3981d9
·
1 Parent(s): 7a4c21b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -41
app.py CHANGED
@@ -8,7 +8,7 @@ import langchain
8
  import time
9
  from cnocr import CnOcr
10
  import pinecone
11
-
12
  from langchain.vectorstores import Pinecone
13
  from langchain.embeddings.openai import OpenAIEmbeddings
14
  from langchain.text_splitter import CharacterTextSplitter
@@ -39,45 +39,7 @@ all_max_len = 3000
39
 
40
  # Initialize Pinecone client and create an index
41
  pinecone.init(api_key="ffb1f594-0915-4ebf-835f-c1eaa62fdcdc",environment = "us-west4-gcp-free")
42
- index = pinecone.Index(index_name="test")
43
-
44
-
45
- def pine(data):
46
- char_text_spliter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap=0)
47
- # doc_text = char_text_spliter.split_documents(data)
48
- doc_spilt = []
49
- data = data.split(" ")
50
- # print(len(data))
51
-
52
- c = 0
53
- check = 0
54
- for i in data:
55
- # print(i)
56
- if c == 350:
57
- text = " ".join(data[check: check + c])
58
- print(text)
59
- print(check)
60
- doc_spilt.append(text)
61
- check = check + c
62
- c = 0
63
- else:
64
- c = c+1
65
-
66
-
67
- Embedding_model = "text-embedding-ada-002"
68
- embeddings = OpenAIEmbeddings(openai_api_key="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")
69
-
70
- pinecone.init(api_key = "ffb1f594-0915-4ebf-835f-c1eaa62fdcdc",
71
- environment = "us-west4-gcp-free"
72
- )
73
-
74
- index_name = "test"
75
- docstore = Pinecone.from_texts([d for d in doc_spilt],embeddings,index_name = index_name,namespace='a1')
76
-
77
- return ''
78
-
79
-
80
-
81
 
82
 
83
  def get_emb(text):
@@ -193,7 +155,7 @@ def up_file(fls):
193
 
194
  #Pdf Extracting
195
  for idx, file in enumerate(pdf):
196
- print("11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
197
  #print(file.name)
198
  with pdfplumber.open(file) as pdf:
199
  for i in range(len(pdf.pages)):
@@ -270,6 +232,39 @@ def up_file(fls):
270
  value="Processing")
271
 
272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
273
 
274
 
275
 
 
8
  import time
9
  from cnocr import CnOcr
10
  import pinecone
11
+ import openai
12
  from langchain.vectorstores import Pinecone
13
  from langchain.embeddings.openai import OpenAIEmbeddings
14
  from langchain.text_splitter import CharacterTextSplitter
 
39
 
40
  # Initialize Pinecone client and create an index
41
  pinecone.init(api_key="ffb1f594-0915-4ebf-835f-c1eaa62fdcdc",environment = "us-west4-gcp-free")
42
+ index = pinecone.Index(index_name="test")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
 
45
  def get_emb(text):
 
155
 
156
  #Pdf Extracting
157
  for idx, file in enumerate(pdf):
158
+ print("11111")
159
  #print(file.name)
160
  with pdfplumber.open(file) as pdf:
161
  for i in range(len(pdf.pages)):
 
232
  value="Processing")
233
 
234
 
235
+ def pine(data):
236
+ char_text_spliter = CharacterTextSplitter(chunk_size = 1000, chunk_overlap=0)
237
+ # doc_text = char_text_spliter.split_documents(data)
238
+ doc_spilt = []
239
+ data = data.split(" ")
240
+ # print(len(data))
241
+
242
+ c = 0
243
+ check = 0
244
+ for i in data:
245
+ # print(i)
246
+ if c == 350:
247
+ text = " ".join(data[check: check + c])
248
+ print(text)
249
+ print(check)
250
+ doc_spilt.append(text)
251
+ check = check + c
252
+ c = 0
253
+ else:
254
+ c = c+1
255
+
256
+
257
+ Embedding_model = "text-embedding-ada-002"
258
+ embeddings = OpenAIEmbeddings(openai_api_key="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")
259
+
260
+ pinecone.init(api_key = "ffb1f594-0915-4ebf-835f-c1eaa62fdcdc",
261
+ environment = "us-west4-gcp-free"
262
+ )
263
+
264
+ index_name = "test"
265
+ docstore = Pinecone.from_texts([d for d in doc_spilt],embeddings,index_name = index_name,namespace='a1')
266
+
267
+ return ''
268
 
269
 
270