yyuri commited on
Commit
4848895
·
verified ·
1 Parent(s): 00a81a6

Upload 4 files

Browse files
src/group_classification.py CHANGED
@@ -7,6 +7,7 @@ from langchain_community.vectorstores import AzureSearch
7
  from langchain_community.document_loaders import PyMuPDFLoader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from openai import AzureOpenAI
 
10
  from azure.search.documents.indexes import SearchIndexClient
11
  from azure.core.credentials import AzureKeyCredential
12
  import time
@@ -86,26 +87,39 @@ def main(PDF):
86
  message = [
87
  {"role": "system",
88
  "content":
89
- """You are an assistant designed to analyze provided text and answer questions regarding CO2 emissions and reduction targets.
90
- When given a prompt, you should extract the relevant information from the provided text and answer in the form (排出量, 削減目標).
91
- - 排出量 (CO2 Emission): Answer 1 if there is any mention of CO2 emissions, otherwise answer 0.
92
- - 削減目標 (Reduction Target): Answer 1 if there is any mention of a CO2 emission reduction target, otherwise answer 0.
93
- Example:
94
- - If the text mentions CO2 emissions but does not mention any reduction targets, your response should be (1, 0).
95
- - If the text mentions neither CO2 emissions nor reduction targets, your response should be (0, 0)."""
96
- },
97
  {"role": "user",
98
- "content": f"""Using the provided information: {texts}, please answer the following question in the given form (排出量, 削減目標): {query}."""}
99
  ]
100
-
101
- response = client.chat.completions.create(
102
- model=api_type,
103
- messages=message,
104
- temperature=0
105
- )
106
- # print(response.choices[0].message.content)
107
- time.sleep(1)
108
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
 
111
  if __name__ == '__main__':
 
7
  from langchain_community.document_loaders import PyMuPDFLoader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from openai import AzureOpenAI
10
+ from openai import OpenAIError
11
  from azure.search.documents.indexes import SearchIndexClient
12
  from azure.core.credentials import AzureKeyCredential
13
  import time
 
87
  message = [
88
  {"role": "system",
89
  "content":
90
+ """You are an assistant designed to analyze provided text and answer questions regarding CO2 emissions and reduction targets.
91
+ When given a prompt, you should extract the relevant information from the provided text and answer in the form (排出量, 削減目標).
92
+ - 排出量 (CO2 Emission): Answer 1 if there is any mention of CO2 emissions, otherwise answer 0.
93
+ - 削減目標 (Reduction Target): Answer 1 if there is any mention of a CO2 emission reduction target, otherwise answer 0.
94
+ Example:
95
+ - If the text mentions CO2 emissions but does not mention any reduction targets, your response should be (1, 0).
96
+ - If the text mentions neither CO2 emissions nor reduction targets, your response should be (0, 0)."""
97
+ },
98
  {"role": "user",
99
+ "content": f"""Using the provided information: {texts}, please answer the following question in the given form (排出量, 削減目標): {query}."""}
100
  ]
101
+ retries = 0
102
+ max_retries = 100
103
+ delay = 5
104
+ while retries < max_retries:
105
+ try:
106
+ response = client.chat.completions.create(
107
+ model=api_type,
108
+ messages=message,
109
+ temperature=0
110
+ )
111
+ # print(response.choices[0].message.content)
112
+ time.sleep(1)
113
+ return response.choices[0].message.content
114
+ except OpenAIError as e:
115
+ print(f"Error occurred: {e}. Retrying in {delay} seconds...")
116
+ retries += 1
117
+ time.sleep(delay)
118
+ except Exception as e:
119
+ print(f"Unexpected error: {e}. Retrying in {delay} seconds...")
120
+ retries += 1
121
+ time.sleep(delay)
122
+ raise RuntimeError("Maximum retries exceeded. Could not get a valid response.")
123
 
124
 
125
  if __name__ == '__main__':
src/research_html_scoring.py CHANGED
@@ -13,6 +13,7 @@ import chromadb
13
  from rank_bm25 import BM25Okapi
14
  from janome.tokenizer import Tokenizer
15
  from openai import OpenAI
 
16
  from src.myLogger import set_logger
17
  import time
18
 
@@ -50,31 +51,42 @@ def normalize_text(s, sep_token = " \n "):
50
  return s
51
 
52
 
53
- def generate_answer_(reference, system_prompt, json_schema):
54
  api_key = os.getenv("OPENAI_API_KEY")
55
  client = OpenAI(
56
  api_key=api_key,
57
  )
58
- response = client.chat.completions.create(
59
- model="gpt-3.5-turbo",
60
- messages=[
61
- {
62
- "role": "system",
63
- "content": system_prompt,
64
- },
65
- {
66
- "role": "user",
67
- "content": reference,
68
- },
69
- ],
70
- functions=[{"name": "generate_queries", "parameters": json_schema}],
71
- function_call={"name": "generate_queries"},
72
- temperature=0.0,
73
- top_p=0.0,
74
- )
75
- output = response.choices[0].message.function_call.arguments
76
- time.sleep(1)
77
- return output
 
 
 
 
 
 
 
 
 
 
 
78
 
79
 
80
  def find_context(pdf_url):
 
13
  from rank_bm25 import BM25Okapi
14
  from janome.tokenizer import Tokenizer
15
  from openai import OpenAI
16
+ from openai import OpenAIError
17
  from src.myLogger import set_logger
18
  import time
19
 
 
51
  return s
52
 
53
 
54
+ def generate_answer_(reference, system_prompt, json_schema, max_retries=100, delay=5):
55
  api_key = os.getenv("OPENAI_API_KEY")
56
  client = OpenAI(
57
  api_key=api_key,
58
  )
59
+ retries = 0
60
+ while retries < max_retries:
61
+ try:
62
+ response = client.chat.completions.create(
63
+ model="gpt-3.5-turbo",
64
+ messages=[
65
+ {
66
+ "role": "system",
67
+ "content": system_prompt,
68
+ },
69
+ {
70
+ "role": "user",
71
+ "content": reference,
72
+ },
73
+ ],
74
+ functions=[{"name": "generate_queries", "parameters": json_schema}],
75
+ function_call={"name": "generate_queries"},
76
+ temperature=0.0,
77
+ top_p=0.0,
78
+ )
79
+ output = response.choices[0].message.function_call.arguments
80
+ return output # Return successfully if no exception occurs
81
+ except OpenAIError as e:
82
+ print(f"Error occurred: {e}. Retrying in {delay} seconds...")
83
+ retries += 1
84
+ time.sleep(delay)
85
+ except Exception as e:
86
+ print(f"Unexpected error: {e}. Retrying in {delay} seconds...")
87
+ retries += 1
88
+ time.sleep(delay)
89
+ raise RuntimeError("Maximum retries exceeded. Could not get a valid response.")
90
 
91
 
92
  def find_context(pdf_url):
src/research_pdf_scoring.py CHANGED
@@ -4,6 +4,7 @@ from langchain.document_loaders import PyPDFLoader
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import Chroma
6
  from langchain_openai import OpenAIEmbeddings
 
7
  import os
8
  import re
9
  import json
@@ -42,33 +43,44 @@ def normalize_text(s, sep_token = " \n "):
42
  return s
43
 
44
 
45
- def generate_answer_(reference, system_prompt, json_schema):
46
  api_key = os.getenv("OPENAI_API_KEY")
47
- print("completion start")
48
  client = OpenAI(
49
  api_key=api_key,
50
  )
51
- response = client.chat.completions.create(
52
- model="gpt-3.5-turbo",
53
- messages=[
54
- {
55
- "role": "system",
56
- "content": system_prompt,
57
- },
58
- {
59
- "role": "user",
60
- "content": reference,
61
- },
62
- ],
63
- functions=[{"name": "generate_queries", "parameters": json_schema}],
64
- function_call={"name": "generate_queries"},
65
- temperature=0.0,
66
- top_p=0.0,
67
- )
68
- print("completion end")
69
- output = response.choices[0].message.function_call.arguments
70
- time.sleep(1)
71
- return output
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
 
74
  def find_context(pdf_url):
 
4
  from langchain.text_splitter import RecursiveCharacterTextSplitter
5
  from langchain.vectorstores import Chroma
6
  from langchain_openai import OpenAIEmbeddings
7
+ from openai import OpenAIError
8
  import os
9
  import re
10
  import json
 
43
  return s
44
 
45
 
46
+ def generate_answer_(reference, system_prompt, json_schema, max_retries=100, delay=5):
47
  api_key = os.getenv("OPENAI_API_KEY")
 
48
  client = OpenAI(
49
  api_key=api_key,
50
  )
51
+ retries = 0
52
+ while retries < max_retries:
53
+ try:
54
+ response = client.chat.completions.create(
55
+ model="gpt-3.5-turbo",
56
+ messages=[
57
+ {
58
+ "role": "system",
59
+ "content": system_prompt,
60
+ },
61
+ {
62
+ "role": "user",
63
+ "content": reference,
64
+ },
65
+ ],
66
+ functions=[{"name": "generate_queries", "parameters": json_schema}],
67
+ function_call={"name": "generate_queries"},
68
+ temperature=0.0,
69
+ top_p=0.0,
70
+ )
71
+ print("completion end")
72
+ output = response.choices[0].message.function_call.arguments
73
+ time.sleep(1)
74
+ return output
75
+ except OpenAIError as e:
76
+ print(f"Error occurred: {e}. Retrying in {delay} seconds...")
77
+ retries += 1
78
+ time.sleep(delay)
79
+ except Exception as e:
80
+ print(f"Unexpected error: {e}. Retrying in {delay} seconds...")
81
+ retries += 1
82
+ time.sleep(delay)
83
+ raise RuntimeError("Maximum retries exceeded. Could not get a valid response.")
84
 
85
 
86
  def find_context(pdf_url):
src/scoring_utils.py CHANGED
@@ -1,6 +1,7 @@
1
  import json
2
  import os
3
  import pandas as pd
 
4
  import gspread
5
  from google.oauth2.service_account import Credentials
6
  from src.research_html_scoring import research_html_hybrid, group1_html
@@ -8,6 +9,7 @@ from src.research_pdf_scoring import research_pdf_hybrid, group1_pdf
8
  from utils_groupclassification.check_openai import co
9
  from src.myLogger import set_logger
10
  from openai import AzureOpenAI
 
11
  from dotenv import load_dotenv
12
 
13
  logger = set_logger("my_app", level="INFO")
@@ -61,12 +63,28 @@ def summarize(sentence, user_prompt):
61
  {"role": "user",
62
  "content": user_prompt}
63
  ]
64
- response = client.chat.completions.create(
65
- messages=messages,
66
- model=api_type,
67
- temperature=0,
68
- )
69
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  def summarize5(sentence):
 
1
  import json
2
  import os
3
  import pandas as pd
4
+ import time
5
  import gspread
6
  from google.oauth2.service_account import Credentials
7
  from src.research_html_scoring import research_html_hybrid, group1_html
 
9
  from utils_groupclassification.check_openai import co
10
  from src.myLogger import set_logger
11
  from openai import AzureOpenAI
12
+ from openai import OpenAIError
13
  from dotenv import load_dotenv
14
 
15
  logger = set_logger("my_app", level="INFO")
 
63
  {"role": "user",
64
  "content": user_prompt}
65
  ]
66
+
67
+ retries = 0
68
+ max_retries = 100
69
+ delay = 5
70
+
71
+ while retries < max_retries:
72
+ try:
73
+ response = client.chat.completions.create(
74
+ messages=messages,
75
+ model=api_type,
76
+ temperature=0,
77
+ )
78
+ return response.choices[0].message.content
79
+ except OpenAIError as e:
80
+ print(f"Error occurred: {e}. Retrying in {delay} seconds...")
81
+ retries += 1
82
+ time.sleep(delay)
83
+ except Exception as e:
84
+ print(f"Unexpected error: {e}. Retrying in {delay} seconds...")
85
+ retries += 1
86
+ time.sleep(delay)
87
+ raise RuntimeError("Maximum retries exceeded. Could not get a valid response.")
88
 
89
 
90
  def summarize5(sentence):