Spaces:
Running
Running
Commit
·
cc75837
1
Parent(s):
bf58fe8
file upload gallery
Browse files
app.py
CHANGED
|
@@ -681,6 +681,9 @@ def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version
|
|
| 681 |
def load_prompt_yaml(filename):
|
| 682 |
with open(filename, 'r') as file:
|
| 683 |
st.session_state['prompt_info'] = yaml.safe_load(file)
|
|
|
|
|
|
|
|
|
|
| 684 |
st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
|
| 685 |
st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
|
| 686 |
st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
|
|
@@ -692,11 +695,14 @@ def load_prompt_yaml(filename):
|
|
| 692 |
|
| 693 |
def save_prompt_yaml(filename, col_right_save):
|
| 694 |
yaml_content = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 695 |
'instructions': st.session_state['instructions'],
|
| 696 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
| 697 |
'rules': st.session_state['rules'],
|
| 698 |
'mapping': st.session_state['mapping'],
|
| 699 |
-
'LLM': st.session_state['LLM']
|
| 700 |
}
|
| 701 |
|
| 702 |
dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
|
|
@@ -716,28 +722,30 @@ def save_prompt_yaml(filename, col_right_save):
|
|
| 716 |
def upload_to_drive(filepath, filename):
|
| 717 |
# Parse the service account info from the environment variable
|
| 718 |
creds_info = json.loads(os.environ.get('GDRIVE_API'))
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
|
|
|
| 723 |
|
| 724 |
-
|
| 725 |
-
|
| 726 |
-
|
| 727 |
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
|
|
|
| 733 |
|
| 734 |
-
|
| 735 |
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
|
| 742 |
def check_unique_mapping_assignments():
|
| 743 |
if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
|
|
@@ -776,6 +784,9 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
|
|
| 776 |
elif not selected_yaml_file:
|
| 777 |
# Directly assigning default values since no file is selected
|
| 778 |
st.session_state['prompt_info'] = {}
|
|
|
|
|
|
|
|
|
|
| 779 |
st.session_state['instructions'] = st.session_state['default_instructions']
|
| 780 |
st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
|
| 781 |
st.session_state['rules'] = {}
|
|
@@ -784,6 +795,9 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
|
|
| 784 |
st.session_state['assigned_columns'] = []
|
| 785 |
|
| 786 |
st.session_state['prompt_info'] = {
|
|
|
|
|
|
|
|
|
|
| 787 |
'instructions': st.session_state['instructions'],
|
| 788 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
| 789 |
'rules': st.session_state['rules'],
|
|
@@ -791,6 +805,8 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
|
|
| 791 |
'LLM': st.session_state['LLM']
|
| 792 |
}
|
| 793 |
|
|
|
|
|
|
|
| 794 |
def upload_local_prompt_to_server(dir_prompt):
|
| 795 |
uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
| 796 |
if uploaded_file is not None:
|
|
@@ -818,6 +834,9 @@ def create_download_button(file_path, selected_yaml_file):
|
|
| 818 |
|
| 819 |
def build_LLM_prompt_config():
|
| 820 |
st.session_state['assigned_columns'] = []
|
|
|
|
|
|
|
|
|
|
| 821 |
st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
|
| 822 |
2. You should map the unstructured OCR text to the appropriate JSON key and then populate the field based on its rules.
|
| 823 |
3. Some JSON key fields are permitted to remain empty if the corresponding information is not found in the unstructured OCR text.
|
|
@@ -885,15 +904,30 @@ The desired null value is also given. Populate the field with the null value of
|
|
| 885 |
# Create the download button
|
| 886 |
st.write('##')
|
| 887 |
create_download_button(download_file_path, st.session_state['selected_yaml_file'] )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 888 |
|
|
|
|
|
|
|
| 889 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 890 |
# Define the options for the dropdown
|
| 891 |
llm_options = ['gpt', 'palm']
|
| 892 |
# Create the dropdown and set the value to session_state['LLM']
|
| 893 |
-
st.
|
| 894 |
-
|
|
|
|
| 895 |
|
| 896 |
-
|
| 897 |
# Instructions Section
|
| 898 |
st.header("Instructions")
|
| 899 |
st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
|
|
@@ -1114,11 +1148,14 @@ The desired null value is also given. Populate the field with the null value of
|
|
| 1114 |
with col_prompt_main_right:
|
| 1115 |
st.subheader('All Prompt Components')
|
| 1116 |
st.session_state['prompt_info'] = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1117 |
'instructions': st.session_state['instructions'],
|
| 1118 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
| 1119 |
'rules': st.session_state['rules'],
|
| 1120 |
'mapping': st.session_state['mapping'],
|
| 1121 |
-
'LLM': st.session_state['LLM']
|
| 1122 |
}
|
| 1123 |
st.json(st.session_state['prompt_info'])
|
| 1124 |
|
|
|
|
| 681 |
def load_prompt_yaml(filename):
|
| 682 |
with open(filename, 'r') as file:
|
| 683 |
st.session_state['prompt_info'] = yaml.safe_load(file)
|
| 684 |
+
st.session_state['prompt_author'] = st.session_state['prompt_info'].get('prompt_author', st.session_state['default_prompt_author'])
|
| 685 |
+
st.session_state['prompt_author_institution'] = st.session_state['prompt_info'].get('prompt_author_institution', st.session_state['default_prompt_author_institution'])
|
| 686 |
+
st.session_state['prompt_description'] = st.session_state['prompt_info'].get('prompt_description', st.session_state['default_prompt_description'])
|
| 687 |
st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
|
| 688 |
st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
|
| 689 |
st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
|
|
|
|
| 695 |
|
| 696 |
def save_prompt_yaml(filename, col_right_save):
|
| 697 |
yaml_content = {
|
| 698 |
+
'prompt_author': st.session_state['prompt_author'],
|
| 699 |
+
'prompt_author_institution': st.session_state['prompt_author_institution'],
|
| 700 |
+
'prompt_description': st.session_state['prompt_description'],
|
| 701 |
+
'LLM': st.session_state['LLM'],
|
| 702 |
'instructions': st.session_state['instructions'],
|
| 703 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
| 704 |
'rules': st.session_state['rules'],
|
| 705 |
'mapping': st.session_state['mapping'],
|
|
|
|
| 706 |
}
|
| 707 |
|
| 708 |
dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
|
|
|
|
| 722 |
def upload_to_drive(filepath, filename):
|
| 723 |
# Parse the service account info from the environment variable
|
| 724 |
creds_info = json.loads(os.environ.get('GDRIVE_API'))
|
| 725 |
+
if creds_info:
|
| 726 |
+
creds = service_account.Credentials.from_service_account_info(
|
| 727 |
+
creds_info, scopes=["https://www.googleapis.com/auth/drive"]
|
| 728 |
+
)
|
| 729 |
+
service = build('drive', 'v3', credentials=creds)
|
| 730 |
|
| 731 |
+
# Get the folder ID from the environment variable
|
| 732 |
+
folder_id = os.environ.get('GDRIVE')
|
| 733 |
+
# st.info(f"{folder_id}")
|
| 734 |
|
| 735 |
+
if folder_id:
|
| 736 |
+
file_metadata = {
|
| 737 |
+
'name': filename,
|
| 738 |
+
'parents': [folder_id]
|
| 739 |
+
}
|
| 740 |
+
# st.info(f"{file_metadata}")
|
| 741 |
|
| 742 |
+
media = MediaFileUpload(filepath, mimetype='application/x-yaml')
|
| 743 |
|
| 744 |
+
service.files().create(
|
| 745 |
+
body=file_metadata,
|
| 746 |
+
media_body=media,
|
| 747 |
+
fields='id'
|
| 748 |
+
).execute()
|
| 749 |
|
| 750 |
def check_unique_mapping_assignments():
|
| 751 |
if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
|
|
|
|
| 784 |
elif not selected_yaml_file:
|
| 785 |
# Directly assigning default values since no file is selected
|
| 786 |
st.session_state['prompt_info'] = {}
|
| 787 |
+
st.session_state['prompt_author'] = st.session_state['default_prompt_author']
|
| 788 |
+
st.session_state['prompt_author_institution'] = st.session_state['default_prompt_author_institution']
|
| 789 |
+
st.session_state['prompt_description'] = st.session_state['default_prompt_description']
|
| 790 |
st.session_state['instructions'] = st.session_state['default_instructions']
|
| 791 |
st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
|
| 792 |
st.session_state['rules'] = {}
|
|
|
|
| 795 |
st.session_state['assigned_columns'] = []
|
| 796 |
|
| 797 |
st.session_state['prompt_info'] = {
|
| 798 |
+
'prompt_author': st.session_state['prompt_author'],
|
| 799 |
+
'prompt_author_institution': st.session_state['prompt_author_institution'],
|
| 800 |
+
'prompt_description': st.session_state['prompt_description'],
|
| 801 |
'instructions': st.session_state['instructions'],
|
| 802 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
| 803 |
'rules': st.session_state['rules'],
|
|
|
|
| 805 |
'LLM': st.session_state['LLM']
|
| 806 |
}
|
| 807 |
|
| 808 |
+
|
| 809 |
+
|
| 810 |
def upload_local_prompt_to_server(dir_prompt):
|
| 811 |
uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
| 812 |
if uploaded_file is not None:
|
|
|
|
| 834 |
|
| 835 |
def build_LLM_prompt_config():
|
| 836 |
st.session_state['assigned_columns'] = []
|
| 837 |
+
st.session_state['default_prompt_author'] = 'unknown'
|
| 838 |
+
st.session_state['default_prompt_author_institution'] = 'unknown'
|
| 839 |
+
st.session_state['default_prompt_description'] = 'unknown'
|
| 840 |
st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
|
| 841 |
2. You should map the unstructured OCR text to the appropriate JSON key and then populate the field based on its rules.
|
| 842 |
3. Some JSON key fields are permitted to remain empty if the corresponding information is not found in the unstructured OCR text.
|
|
|
|
| 904 |
# Create the download button
|
| 905 |
st.write('##')
|
| 906 |
create_download_button(download_file_path, st.session_state['selected_yaml_file'] )
|
| 907 |
+
|
| 908 |
+
|
| 909 |
+
# Prompt Author Information
|
| 910 |
+
st.header("Prompt Author Information")
|
| 911 |
+
st.write("We value community contributions! Please provide your name(s) (or pseudonym if you prefer) for credit. If you leave this field blank, it will say 'unknown'.")
|
| 912 |
+
st.session_state['prompt_author'] = st.text_input("Enter names of prompt author(s)", value=st.session_state['default_prompt_author'])
|
| 913 |
|
| 914 |
+
st.write("Please provide your institution name. If you leave this field blank, it will say 'unknown'.")
|
| 915 |
+
st.session_state['prompt_author_institution'] = st.text_input("Enter name of institution", value=st.session_state['default_prompt_author_institution'])
|
| 916 |
|
| 917 |
+
st.write("Please provide a description of your prompt and its intended task. Is it designed for a specific collection? Taxa? Database structure?")
|
| 918 |
+
st.session_state['prompt_description'] = st.text_input("Enter description of prompt", value=st.session_state['default_prompt_description'])
|
| 919 |
+
|
| 920 |
+
|
| 921 |
+
st.write('---')
|
| 922 |
+
st.header("Set LLM Model Type")
|
| 923 |
# Define the options for the dropdown
|
| 924 |
llm_options = ['gpt', 'palm']
|
| 925 |
# Create the dropdown and set the value to session_state['LLM']
|
| 926 |
+
st.write("Which LLM is this prompt designed for? This will not restrict its use to a specific LLM, but some prompts will behave in different ways across models.")
|
| 927 |
+
st.write("For example, VoucherVision will automatically add multiple JSON formatting blocks to all PaLM 2 prompts to coax PaLM 2 to return a valid JSON object.")
|
| 928 |
+
st.session_state['LLM'] = st.selectbox('Set LLM', llm_options, index=llm_options.index(st.session_state.get('LLM', 'gpt')))
|
| 929 |
|
| 930 |
+
st.write('---')
|
| 931 |
# Instructions Section
|
| 932 |
st.header("Instructions")
|
| 933 |
st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
|
|
|
|
| 1148 |
with col_prompt_main_right:
|
| 1149 |
st.subheader('All Prompt Components')
|
| 1150 |
st.session_state['prompt_info'] = {
|
| 1151 |
+
'prompt_author': st.session_state['prompt_author'],
|
| 1152 |
+
'prompt_author_institution': st.session_state['prompt_author_institution'],
|
| 1153 |
+
'prompt_description': st.session_state['prompt_description'],
|
| 1154 |
+
'LLM': st.session_state['LLM'],
|
| 1155 |
'instructions': st.session_state['instructions'],
|
| 1156 |
'json_formatting_instructions': st.session_state['json_formatting_instructions'],
|
| 1157 |
'rules': st.session_state['rules'],
|
| 1158 |
'mapping': st.session_state['mapping'],
|
|
|
|
| 1159 |
}
|
| 1160 |
st.json(st.session_state['prompt_info'])
|
| 1161 |
|