File size: 4,995 Bytes
287a0bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cohere\n",
    "\n",
    "This notebook demonstrates how to use Cohere Embeddings with Chroma.\n",
    "\n",
    "If you have not already, [create a Cohere account](https://dashboard.cohere.ai/welcome/register) and get your API Key.\n",
    "\n",
    "First a basic example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n",
      "\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.1.2\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49m/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "! pip install chromadb --quiet\n",
    "! pip install cohere --quiet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import getpass\n",
    "\n",
    "os.environ[\"COHERE_API_KEY\"] = getpass.getpass(\"Cohere API Key:\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'ids': [['3']], 'embeddings': None, 'documents': [['I like oranges']], 'metadatas': [[{'fruit': 'orange'}]], 'distances': [[6729.3291015625]]}\n"
     ]
    }
   ],
   "source": [
    "import chromadb\n",
    "from chromadb.utils import embedding_functions\n",
    "\n",
    "cohere_ef = embedding_functions.CohereEmbeddingFunction(api_key=os.environ[\"COHERE_API_KEY\"],  model_name=\"large\")\n",
    "\n",
    "client = chromadb.Client()\n",
    "collection = client.create_collection(\"cohere_python\", embedding_function=cohere_ef)\n",
    "\n",
    "collection.add(\n",
    "    ids=[\"1\", \"2\", \"3\"],\n",
    "    documents=[\"I like apples\", \"I like bananas\", \"I like oranges\"],\n",
    "    metadatas=[{\"fruit\": \"apple\"}, {\"fruit\": \"banana\"}, {\"fruit\": \"orange\"}],\n",
    ")\n",
    "\n",
    "print(collection.query(query_texts=[\"citrus\"], n_results=1))\n"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Multilingual Example\n",
    "\n",
    "Cohere can support many languages! In this example we store text in many languages, and then query in English."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'ids': [['9']], 'embeddings': None, 'documents': [['나는 오렌지를 좋아한다']], 'metadatas': [[None]], 'distances': [[30.728900909423828]]}\n"
     ]
    }
   ],
   "source": [
    "cohere_mutlilingual = embedding_functions.CohereEmbeddingFunction(\n",
    "        api_key=os.environ[\"COHERE_API_KEY\"], \n",
    "        model_name=\"multilingual-22-12\")\n",
    "\n",
    "# 나는 오렌지를 좋아한다 is \"I like oranges\" in Korean\n",
    "multilingual_texts = [ 'Hello from Cohere!', 'مرحبًا من كوهير!', \n",
    "        'Hallo von Cohere!', 'Bonjour de Cohere!', \n",
    "        '¡Hola desde Cohere!', 'Olá do Cohere!', \n",
    "        'Ciao da Cohere!', '您好,来自 Cohere!',\n",
    "        'कोहेरे से नमस्ते!', '나는 오렌지를 좋아한다'  ]\n",
    "\n",
    "collection = client.create_collection(\"cohere_multilingual\", embedding_function=cohere_mutlilingual)\n",
    "\n",
    "collection.add(\n",
    "    ids=[str(i) for i in range(len(multilingual_texts))],\n",
    "    documents=multilingual_texts\n",
    ")\n",
    "\n",
    "print(collection.query(query_texts=[\"citrus\"], n_results=1))\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}