Spaces:

phyloforfun
/

VoucherVision

Running

App Files Files Community

phyloforfun commited on Nov 8, 2023

Commit

cc75837

1 Parent(s): bf58fe8

file upload gallery

Browse files

Files changed (1) hide show

app.py +60 -23

app.py CHANGED Viewed

@@ -681,6 +681,9 @@ def save_changes_to_API_keys(cfg_private,openai_api_key,azure_openai_api_version
 def load_prompt_yaml(filename):
     with open(filename, 'r') as file:
         st.session_state['prompt_info'] = yaml.safe_load(file)
         st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
         st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
         st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
@@ -692,11 +695,14 @@ def load_prompt_yaml(filename):
 def save_prompt_yaml(filename, col_right_save):
     yaml_content = {
         'instructions': st.session_state['instructions'],
         'json_formatting_instructions': st.session_state['json_formatting_instructions'],
         'rules': st.session_state['rules'],
         'mapping': st.session_state['mapping'],
-        'LLM': st.session_state['LLM']
     }
     dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
@@ -716,28 +722,30 @@ def save_prompt_yaml(filename, col_right_save):
 def upload_to_drive(filepath, filename):
     # Parse the service account info from the environment variable
     creds_info = json.loads(os.environ.get('GDRIVE_API'))
-    creds = service_account.Credentials.from_service_account_info(
-        creds_info, scopes=["https://www.googleapis.com/auth/drive"]
-    )
-    service = build('drive', 'v3', credentials=creds)
-    # Get the folder ID from the environment variable
-    folder_id = os.environ.get('GDRIVE')
-    st.info(f"{folder_id}")
-    file_metadata = {
-        'name': filename,
-        'parents': [folder_id]
-    }
-    st.info(f"{file_metadata}")
-    media = MediaFileUpload(filepath, mimetype='application/x-yaml')
-    service.files().create(
-        body=file_metadata,
-        media_body=media,
-        fields='id'
-    ).execute()
 def check_unique_mapping_assignments():
     if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
@@ -776,6 +784,9 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
     elif not selected_yaml_file:
         # Directly assigning default values since no file is selected
         st.session_state['prompt_info'] = {}
         st.session_state['instructions'] = st.session_state['default_instructions']
         st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
         st.session_state['rules'] = {}
@@ -784,6 +795,9 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
         st.session_state['assigned_columns'] = []
         st.session_state['prompt_info'] = {
             'instructions': st.session_state['instructions'],
             'json_formatting_instructions': st.session_state['json_formatting_instructions'],
             'rules': st.session_state['rules'],
@@ -791,6 +805,8 @@ def btn_load_prompt(selected_yaml_file, dir_prompt):
             'LLM': st.session_state['LLM']
         }
 def upload_local_prompt_to_server(dir_prompt):
     uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
     if uploaded_file is not None:
@@ -818,6 +834,9 @@ def create_download_button(file_path, selected_yaml_file):
 def build_LLM_prompt_config():
     st.session_state['assigned_columns'] = []
     st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
 2. You should map the unstructured OCR text to the appropriate JSON key and then populate the field based on its rules.
 3. Some JSON key fields are permitted to remain empty if the corresponding information is not found in the unstructured OCR text.
@@ -885,15 +904,30 @@ The desired null value is also given. Populate the field with the null value of
                 # Create the download button
                 st.write('##')
                 create_download_button(download_file_path, st.session_state['selected_yaml_file'] )
         # Define the options for the dropdown
         llm_options = ['gpt', 'palm']
         # Create the dropdown and set the value to session_state['LLM']
-        st.session_state['LLM'] = st.selectbox('Set LLM type:', llm_options, index=llm_options.index(st.session_state.get('LLM', 'gpt')))
         # Instructions Section
         st.header("Instructions")
         st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
@@ -1114,11 +1148,14 @@ The desired null value is also given. Populate the field with the null value of
     with col_prompt_main_right:
         st.subheader('All Prompt Components')
         st.session_state['prompt_info'] = {
             'instructions': st.session_state['instructions'],
             'json_formatting_instructions': st.session_state['json_formatting_instructions'],
             'rules': st.session_state['rules'],
             'mapping': st.session_state['mapping'],
-            'LLM': st.session_state['LLM']
         }
         st.json(st.session_state['prompt_info'])

 def load_prompt_yaml(filename):
     with open(filename, 'r') as file:
         st.session_state['prompt_info'] = yaml.safe_load(file)
+        st.session_state['prompt_author'] = st.session_state['prompt_info'].get('prompt_author', st.session_state['default_prompt_author'])
+        st.session_state['prompt_author_institution'] = st.session_state['prompt_info'].get('prompt_author_institution', st.session_state['default_prompt_author_institution'])
+        st.session_state['prompt_description'] = st.session_state['prompt_info'].get('prompt_description', st.session_state['default_prompt_description'])
         st.session_state['instructions'] = st.session_state['prompt_info'].get('instructions', st.session_state['default_instructions'])
         st.session_state['json_formatting_instructions'] = st.session_state['prompt_info'].get('json_formatting_instructions', st.session_state['default_json_formatting_instructions'] )
         st.session_state['rules'] = st.session_state['prompt_info'].get('rules', {})
 def save_prompt_yaml(filename, col_right_save):
     yaml_content = {
+        'prompt_author': st.session_state['prompt_author'],
+        'prompt_author_institution': st.session_state['prompt_author_institution'],
+        'prompt_description': st.session_state['prompt_description'],
+        'LLM': st.session_state['LLM'],
         'instructions': st.session_state['instructions'],
         'json_formatting_instructions': st.session_state['json_formatting_instructions'],
         'rules': st.session_state['rules'],
         'mapping': st.session_state['mapping'],
     }
     dir_prompt = os.path.join(st.session_state.dir_home, 'custom_prompts')
 def upload_to_drive(filepath, filename):
     # Parse the service account info from the environment variable
     creds_info = json.loads(os.environ.get('GDRIVE_API'))
+    if creds_info:
+        creds = service_account.Credentials.from_service_account_info(
+            creds_info, scopes=["https://www.googleapis.com/auth/drive"]
+        )
+        service = build('drive', 'v3', credentials=creds)
+        # Get the folder ID from the environment variable
+        folder_id = os.environ.get('GDRIVE')
+        # st.info(f"{folder_id}")
+        if folder_id:
+            file_metadata = {
+                'name': filename,
+                'parents': [folder_id]
+            }
+            # st.info(f"{file_metadata}")
+            media = MediaFileUpload(filepath, mimetype='application/x-yaml')
+            service.files().create(
+                body=file_metadata,
+                media_body=media,
+                fields='id'
+            ).execute()
 def check_unique_mapping_assignments():
     if len(st.session_state['assigned_columns']) != len(set(st.session_state['assigned_columns'])):
     elif not selected_yaml_file:
         # Directly assigning default values since no file is selected
         st.session_state['prompt_info'] = {}
+        st.session_state['prompt_author'] = st.session_state['default_prompt_author']
+        st.session_state['prompt_author_institution'] = st.session_state['default_prompt_author_institution']
+        st.session_state['prompt_description'] = st.session_state['default_prompt_description']
         st.session_state['instructions'] = st.session_state['default_instructions']
         st.session_state['json_formatting_instructions'] = st.session_state['default_json_formatting_instructions']
         st.session_state['rules'] = {}
         st.session_state['assigned_columns'] = []
         st.session_state['prompt_info'] = {
+            'prompt_author': st.session_state['prompt_author'],
+            'prompt_author_institution': st.session_state['prompt_author_institution'],
+            'prompt_description': st.session_state['prompt_description'],
             'instructions': st.session_state['instructions'],
             'json_formatting_instructions': st.session_state['json_formatting_instructions'],
             'rules': st.session_state['rules'],
             'LLM': st.session_state['LLM']
         }
 def upload_local_prompt_to_server(dir_prompt):
     uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
     if uploaded_file is not None:
 def build_LLM_prompt_config():
     st.session_state['assigned_columns'] = []
+    st.session_state['default_prompt_author'] = 'unknown'
+    st.session_state['default_prompt_author_institution'] = 'unknown'
+    st.session_state['default_prompt_description'] = 'unknown'
     st.session_state['default_instructions'] = """1. Refactor the unstructured OCR text into a dictionary based on the JSON structure outlined below.
 2. You should map the unstructured OCR text to the appropriate JSON key and then populate the field based on its rules.
 3. Some JSON key fields are permitted to remain empty if the corresponding information is not found in the unstructured OCR text.
                 # Create the download button
                 st.write('##')
                 create_download_button(download_file_path, st.session_state['selected_yaml_file'] )
+        # Prompt Author Information
+        st.header("Prompt Author Information")
+        st.write("We value community contributions! Please provide your name(s) (or pseudonym if you prefer) for credit. If you leave this field blank, it will say 'unknown'.")
+        st.session_state['prompt_author'] = st.text_input("Enter names of prompt author(s)", value=st.session_state['default_prompt_author'])
+        st.write("Please provide your institution name. If you leave this field blank, it will say 'unknown'.")
+        st.session_state['prompt_author_institution'] = st.text_input("Enter name of institution", value=st.session_state['default_prompt_author_institution'])
+        st.write("Please provide a description of your prompt and its intended task. Is it designed for a specific collection? Taxa? Database structure?")
+        st.session_state['prompt_description'] = st.text_input("Enter description of prompt", value=st.session_state['default_prompt_description'])
+        st.write('---')
+        st.header("Set LLM Model Type")
         # Define the options for the dropdown
         llm_options = ['gpt', 'palm']
         # Create the dropdown and set the value to session_state['LLM']
+        st.write("Which LLM is this prompt designed for? This will not restrict its use to a specific LLM, but some prompts will behave in different ways across models.")
+        st.write("For example, VoucherVision will automatically add multiple JSON formatting blocks to all PaLM 2 prompts to coax PaLM 2 to return a valid JSON object.")
+        st.session_state['LLM'] = st.selectbox('Set LLM', llm_options, index=llm_options.index(st.session_state.get('LLM', 'gpt')))
+        st.write('---')
         # Instructions Section
         st.header("Instructions")
         st.write("These are the general instructions that guide the LLM through the transcription task. We recommend using the default instructions unless you have a specific reason to change them.")
     with col_prompt_main_right:
         st.subheader('All Prompt Components')
         st.session_state['prompt_info'] = {
+            'prompt_author': st.session_state['prompt_author'],
+            'prompt_author_institution': st.session_state['prompt_author_institution'],
+            'prompt_description': st.session_state['prompt_description'],
+            'LLM': st.session_state['LLM'],
             'instructions': st.session_state['instructions'],
             'json_formatting_instructions': st.session_state['json_formatting_instructions'],
             'rules': st.session_state['rules'],
             'mapping': st.session_state['mapping'],
         }
         st.json(st.session_state['prompt_info'])