 # Alternative Embeddings
 
 This notebook demonstrates how to use alternative embedding functions.
 

In [1]:
import chromadb

In [2]:
client = chromadb.Client()

In [3]:
from chromadb.utils import embedding_functions

In [4]:
# Using OpenAI Embeddings. This assumes you have the openai package installed
openai_ef = embedding_functions.OpenAIEmbeddingFunction(
 api_key="OPENAI_KEY", # Replace with your own OpenAI API key
 model_name="text-embedding-ada-002"
)

In [5]:
# Create a new chroma collection
openai_collection = client.get_or_create_collection(name="openai_embeddings", embedding_function=openai_ef)

In [6]:
openai_collection.add(
 documents=["This is a document", "This is another document"],
 metadatas=[{"source": "my_source"}, {"source": "my_source"}],
 ids=["id1", "id2"]
)

In [7]:
results = openai_collection.query(
 query_texts=["This is a query document"],
 n_results=2
)
results

{'ids': [['id1', 'id2']],
 'distances': [[0.1385088860988617, 0.2017185091972351]],
 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],
 'embeddings': None,
 'documents': [['This is a document', 'This is another document']]}

In [9]:
# Using Cohere Embeddings. This assumes you have the cohere package installed
cohere_ef = embedding_functions.CohereEmbeddingFunction(
 api_key="COHERE_API_KEY", 
 model_name="large"
)

In [10]:
# Create a new chroma collection
cohere_collection = client.create_collection(name="cohere_embeddings", embedding_function=cohere_ef)

In [11]:
cohere_collection.add(
 documents=["This is a document", "This is another document"],
 metadatas=[{"source": "my_source"}, {"source": "my_source"}],
 ids=["id1", "id2"]
)

In [12]:
results = cohere_collection.query(
 query_texts=["This is a query document"],
 n_results=2
)
results

{'ids': [['id1', 'id2']],
 'embeddings': None,
 'documents': [['This is a document', 'This is another document']],
 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],
 'distances': [[4343.1328125, 5653.28759765625]]}

In [None]:
# Using Instructor models. The embedding function requires the InstructorEmbedding package. 
# To install it, run pip install InstructorEmbedding


#uses base model and cpu
instructor_ef = embedding_functions.InstructorEmbeddingFunction() 

# For task specific embeddings, add an instruction
# instructor_ef = embedding_functions.InstructorEmbeddingFunction(
# instruction="Represent the Wikipedia document for retrieval: "
# )

# Uses hkunlp/instructor-xl model and GPU
#instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-xl", device="cuda")

In [None]:
# Create a collection with the instructor embedding function
instructor_collection = client.create_collection(name="instructor_embeddings", embedding_function=instructor_ef)

In [None]:
instructor_collection.add(
 documents=["This is a document", "This is another document"],
 metadatas=[{"source": "my_source"}, {"source": "my_source"}],
 ids=["id1", "id2"]
)

# Adding documents with an instruction
# instructor_ef = embedding_functions.InstructorEmbeddingFunction(
# instruction="Represent the Science sentence: "
# )
# instructor_collection = client.create_collection(name="instructor_embeddings", embedding_function=instructor_ef)
# instructor_collection.add(documents=["Parton energy loss in QCD matter"], ids=["id1"])

In [None]:
results = instructor_collection.query(
 query_texts=["This is a query document"],
 n_results=2
)
results

# Querying with an instruction
# instructor_ef = embedding_functions.InstructorEmbeddingFunction(instruction="Represent the Wikipedia question for retrieving supporting documents: ")
# instructor_collection = client.get_collection(name="instructor_embeddings", embedding_function=instructor_ef)
# results = instructor_collection.query(query_texts=["where is the food stored in a yam plant"])

In [5]:
# Using HuggingFace models. The embedding function a huggingface api_key
huggingface_ef = embedding_functions.HuggingFaceEmbeddingFunction(
 api_key="HUGGINGFACE_API_KEY", # Replace with your own HuggingFace API key
 model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [6]:
# Create a new HuggingFace collection
huggingface_collection = client.create_collection(name="huggingface_embeddings", embedding_function=huggingface_ef)

In [7]:
huggingface_collection.add(
 documents=["This is a document", "This is another document"],
 metadatas=[{"source": "my_source"}, {"source": "my_source"}],
 ids=["id1", "id2"]
)

In [8]:
results = huggingface_collection.query(
 query_texts=["This is a query document"],
 n_results=2
)
results

{'ids': [['id1', 'id2']],
 'embeddings': None,
 'documents': [['This is a document', 'This is another document']],
 'metadatas': [[{'source': 'my_source'}, {'source': 'my_source'}]],
 'distances': [[0.7111215591430664, 1.010978102684021]]}