semsearch / utils.py
initial google ai
77b927d
raw
history blame
7.91 kB
import json
import pandas as pd
import os
# from google.cloud import secretmanager
import openai
import streamlit as st
def get_variable(name):
res = os.getenv(name)
if not res:
try:
res = st.secrets[name]
except Exception as e:
pass
return res
OPENAI_API_KEY = get_variable("OPENAI_API_KEY") # app.pinecone.io
OPENAI_ORGANIZATION_ID = get_variable("OPENAI_ORGANIZATION_ID")
def check_password():
"""Returns `True` if the user had the correct password."""
if get_variable('SKIP_PASSWORD')=="True":
return True
def password_entered():
"""Checks whether a password entered by the user is correct."""
if st.session_state["password"] == get_variable("password"):
st.session_state["password_correct"] = True
del st.session_state["password"] # don't store password
else:
st.session_state["password_correct"] = False
if "password_correct" not in st.session_state:
# First run, show input for password.
st.text_input(
"Password", type="password", on_change=password_entered, key="password"
)
return False
elif not st.session_state["password_correct"]:
# Password not correct, show input + error.
st.text_input(
"Password", type="password", on_change=password_entered, key="password"
)
st.error("😕 Password incorrect")
return False
else:
# Password correct.
return True
def get_prompt(title):
return ""
assistant_instructions = """Start like this:
Please find here a list of startups that match the criteria you gave me (right now make a list up, later we will retrieve the list in a step before this).
I like you to present a list view with the option to open up a more detailed view per startup including the location of the startup, the founders and the founding year.
Ask the user to select startups that are of interest for them (just indicate the numbers).
Also invite users to think of other criteria that could help them qualify the startups further such as
1) founder and team characteristics:
- serial entrepreneurs in the team
- strong tech capabilities in the team
- female founders or younger / older founders in the team
- founders who graduated from top 100 universities
ask the user if they would like to use those criteria for filtering (with the downside of seeing potentially very few startups) or rather apply it for ranking the companies (with the downside that there will be a lot of companies at the bottom of the list that are not a match at all).
Invite users to name other criteria even if we are currently not able to provide such features. Ideally, they are possible to extract from a company's website or public founder profiles on social media.
Output a json that specifies the filter criteria important to a user with the output variable.
Also name the ranking criteria and suggest how to combine them to best meet the user's preferences.
"""
default_prompt = """
You are an assistant and your job is to help the user discover and analyze startups companies. You need to create a report with an analysis of companies relevant to the user's query.
Use only information from the explicit list of companies provided! Don't teach the user about investments and don't provide general information.
Below is the user query followed by a list of company descriptions that match the user query.
First advise the user on ways to improve the query followed by the token '-----'
If the user provide instructions, follow them to create a response. If not, create the following report:
The report should mention the most important companies and how they compare to each other and contain the following sections
- summarize what those companies they are doing (up to 20 words per company)
- name customers and technology if they are mentioned
- compare the companies to each other and point out what they do differently or what is their unique selling proposition
----"""
query_finetune_prompt = """
You are an assistant and your job is to help the user discover and analyze startups companies.
Below is a User Query. The user is looking for companies that match. You first need to understand what type of startups the user is looking for based on that query.
Respond with a list of query keywords that will be used to retreive companies that are relevant to the user. If you are not sure, just respond with the user's original query.
"""
summarization_prompt = """
Below is the user query followed by a list of company descriptions that match the user query. Your job is to create a summary report that will help the user find relevant companies.
Use only information from the explicit list of companies provided!
If you don't have enough information in the user query, advise the user on how to improve the query.
Don't teach the user about investments and don't provide general information.
The report should mention the most important companies and how they compare to each other and contain the following sections
- summarize what those companies they are doing
- name customers and technology if they are mentioned
- compare the companies to each other and point out what they do differently or what is their unique selling proposition
"""
clustering_prompt = """Please create a document with the following headings:
H2: Recap of your question
H2: Clusters of relevant companies
H3: Name of Cluster 1
* List of the companies
H3: Name of Cluster 2
* List of the companies
H3: Name of Cluster x
* List of the companies
H2: How you could improve your search
As an input you will get
- a list of 20 startup companies
- the original user query that was used to retrieve those companies via semantic search out of our database.
Detailed instructions for creating the chapters:
H1: Recap of your question
“I understand that you wanted to find companies in the area of (rephrase the “user query”). I researched our startup database to identify matching startup companies. Here is what I found. Happy to help you refine the search - see some suggestions at the end of the document.”
H1: Clusters of relevant companies
Create clusters of the companies presented by grouping companies together using three main criteria:
- Prio 1: solve the same problem
- Prio 2: target similar customers
- Prio 3: have the same business model (B2B, B2C, eCommerce & Marketplace, Manufacturing, SaaS, Advertising, Commission, Subscription)
The output should be no more than 5 clusters with the following conditions:
- All companies should be assigned to a cluster.
- Each company should only be part of one cluster and not show up in multiple clusters.
The format of the output should be:
H2: Name of Cluster in bold
One sentence that summarizes what the cluster is about.
List with all the companies in this cluster. Each list item should be structured like this:
* name of the company in bold (URL of the company, country location of the company): short summary summary of what the company does (max 30 tokens)
H1: How you could improve your search
“I hope you have already found some interesting matches. I am happy to let you refine your search. Here are some ideas on how to find matches in relation to your original question around (“user query”):”
* List of ideas on how to refine and improve the search"""
def on_prompt_selected():
title = st.session_state.advanced_prompts_select
new_prompt = get_prompt(title)
if len(new_prompt)>0 and len(new_prompt[0])>0:
print(f"Got a prompt for title {title}\n {new_prompt[0]}")
st.session_state.prompt_title_editable = st.session_state.advanced_prompts_select
st.session_state.advanced_prompt_content = new_prompt[0]
else:
print(f"No results for title {st.session_state.advanced_prompts_select}")