Spaces:

jdelavande
/

chat-ui-energy

Running on CPU Upgrade

App Files Files Community

Luke

nsarrazin commited on Apr 23, 2024

Commit

15042e9

unverified ·

1 Parent(s): 0f622fd

Support Gemini 1.5 Pro from Vertex AI (#1041)

Browse files

* fix support for gemini on vertex ai

- use native messages api
- provide full content
- hide "Continue" button
- support "safety settings"
- fix confusion in readme

* respect new lines in model description

* ignore google service accounts matching `gcp-*.json`

* type checks

* copy service account, if exists, to container

* narrow response type

* fix streaming generation

---------

Co-authored-by: Nathan Sarrazin <[email protected]>

Files changed (7) hide show

.gitignore +2 -1
Dockerfile +1 -0
README.md +12 -6
package-lock.json +5 -4
package.json +1 -1
src/lib/server/endpoints/google/endpointVertex.ts +108 -43
src/routes/models/+page.svelte +3 -1

.gitignore CHANGED Viewed

@@ -11,4 +11,5 @@ SECRET_CONFIG
 .idea
 !.env.ci
 !.env
-!.env.template

 .idea
 !.env.ci
 !.env
+!.env.template
+gcp-*.json

Dockerfile CHANGED Viewed

@@ -37,5 +37,6 @@ ENV HOME=/home/user \
 COPY --from=builder-production --chown=1000 /app/node_modules /app/node_modules
 COPY --link --chown=1000 package.json /app/package.json
 COPY --from=builder --chown=1000 /app/build /app/build
 CMD pm2 start /app/build/index.js -i $CPU_CORES --no-daemon

 COPY --from=builder-production --chown=1000 /app/node_modules /app/node_modules
 COPY --link --chown=1000 package.json /app/package.json
 COPY --from=builder --chown=1000 /app/build /app/build
+COPY --chown=1000 gcp-*.json /app/
 CMD pm2 start /app/build/index.js -i $CPU_CORES --no-daemon

README.md CHANGED Viewed

@@ -601,18 +601,24 @@ The service account credentials file can be imported as an environmental variabl
     GOOGLE_APPLICATION_CREDENTIALS = clientid.json
 ```
-Make sure docker has access to the file. Afterwards Google Vertex endpoints can be configured as following:
 ```
 MODELS=`[
 //...
     {
-       "name": "gemini-1.0-pro", //model-name
-       "displayName": "Vertex Gemini Pro 1.0",
-       "location": "europe-west3",
-       "apiEndpoint": "", //alternative api endpoint url
        "endpoints" : [{
-         "type": "vertex"
        }]
      },
 ]`

     GOOGLE_APPLICATION_CREDENTIALS = clientid.json
 ```
+Make sure your docker container has access to the file and the variable is correctly set.
+Afterwards Google Vertex endpoints can be configured as following:
 ```
 MODELS=`[
 //...
     {
+       "name": "gemini-1.5-pro",
+       "displayName": "Vertex Gemini Pro 1.5",
        "endpoints" : [{
+          "type": "vertex",
+          "project": "abc-xyz",
+          "location": "europe-west3",
+          "model": "gemini-1.5-pro-preview-0409", // model-name
+          // Optional
+          "safetyThreshold": "BLOCK_MEDIUM_AND_ABOVE",
+          "apiEndpoint": "", // alternative api endpoint url
        }]
      },
 ]`

package-lock.json CHANGED Viewed

@@ -8,6 +8,7 @@
 			"name": "chat-ui",
 			"version": "0.8.2",
 			"dependencies": {
 				"@huggingface/hub": "^0.5.1",
 				"@huggingface/inference": "^2.6.3",
 				"@iconify-json/bi": "^1.1.21",
@@ -72,7 +73,7 @@
 			},
 			"optionalDependencies": {
 				"@anthropic-ai/sdk": "^0.17.1",
-				"@google-cloud/vertexai": "^0.5.0",
 				"aws4fetch": "^1.0.17",
 				"cohere-ai": "^7.9.0",
 				"openai": "^4.14.2"
@@ -630,9 +631,9 @@
 			}
 		},
 		"node_modules/@google-cloud/vertexai": {
-			"version": "0.5.0",
-			"resolved": "https://registry.npmjs.org/@google-cloud/vertexai/-/vertexai-0.5.0.tgz",
-			"integrity": "sha512-qIFHYTXA5UCLdm9JG+Xf1suomCXxRqa1PKdYjqXuhZsCm8mn37Rb0Tf8djlhDzuRVWyWoQTmsWpsk28ZTmbqJg==",
 			"optional": true,
 			"dependencies": {
 				"google-auth-library": "^9.1.0"

 			"name": "chat-ui",
 			"version": "0.8.2",
 			"dependencies": {
+				"@google-cloud/vertexai": "^1.1.0",
 				"@huggingface/hub": "^0.5.1",
 				"@huggingface/inference": "^2.6.3",
 				"@iconify-json/bi": "^1.1.21",
 			},
 			"optionalDependencies": {
 				"@anthropic-ai/sdk": "^0.17.1",
+				"@google-cloud/vertexai": "^1.1.0",
 				"aws4fetch": "^1.0.17",
 				"cohere-ai": "^7.9.0",
 				"openai": "^4.14.2"
 			}
 		},
 		"node_modules/@google-cloud/vertexai": {
+			"version": "1.1.0",
+			"resolved": "https://registry.npmjs.org/@google-cloud/vertexai/-/vertexai-1.1.0.tgz",
+			"integrity": "sha512-hfwfdlVpJ+kM6o2b5UFfPnweBcz8tgHAFRswnqUKYqLJsvKU0DDD0Z2/YKoHyAUoPJAv20qg6KlC3msNeUKUiw==",
 			"optional": true,
 			"dependencies": {
 				"google-auth-library": "^9.1.0"

package.json CHANGED Viewed

@@ -82,7 +82,7 @@
 	},
 	"optionalDependencies": {
 		"@anthropic-ai/sdk": "^0.17.1",
-		"@google-cloud/vertexai": "^0.5.0",
 		"aws4fetch": "^1.0.17",
 		"cohere-ai": "^7.9.0",
 		"openai": "^4.14.2"

 	},
 	"optionalDependencies": {
 		"@anthropic-ai/sdk": "^0.17.1",
+		"@google-cloud/vertexai": "^1.1.0",
 		"aws4fetch": "^1.0.17",
 		"cohere-ai": "^7.9.0",
 		"openai": "^4.14.2"

src/lib/server/endpoints/google/endpointVertex.ts CHANGED Viewed

@@ -1,8 +1,14 @@
-import { VertexAI, HarmCategory, HarmBlockThreshold } from "@google-cloud/vertexai";
-import { buildPrompt } from "$lib/buildPrompt";
-import type { TextGenerationStreamOutput } from "@huggingface/inference";
 import type { Endpoint } from "../endpoints";
 import { z } from "zod";
 export const endpointVertexParametersSchema = z.object({
 	weight: z.number().int().positive().default(1),
@@ -11,10 +17,20 @@ export const endpointVertexParametersSchema = z.object({
 	location: z.string().default("europe-west1"),
 	project: z.string(),
 	apiEndpoint: z.string().optional(),
 });
 export function endpointVertex(input: z.input<typeof endpointVertexParametersSchema>): Endpoint {
-	const { project, location, model, apiEndpoint } = endpointVertexParametersSchema.parse(input);
 	const vertex_ai = new VertexAI({
 		project,
@@ -22,55 +38,104 @@ export function endpointVertex(input: z.input<typeof endpointVertexParametersSch
 		apiEndpoint,
 	});
-	const generativeModel = vertex_ai.getGenerativeModel({
-		model: model.id ?? model.name,
-		safety_settings: [
-			{
-				category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
-				threshold: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
 			},
-		],
-		generation_config: {},
-	});
-	return async ({ messages, preprompt, continueMessage }) => {
-		const prompt = await buildPrompt({
-			messages,
-			continueMessage,
-			preprompt,
-			model,
 		});
-		const chat = generativeModel.startChat();
-		const result = await chat.sendMessageStream(prompt);
-		let tokenId = 0;
 		return (async function* () {
 			let generatedText = "";
 			for await (const data of result.stream) {
-				if (Array.isArray(data?.candidates) && data.candidates.length > 0) {
-					const firstPart = data.candidates[0].content.parts[0];
-					if ("text" in firstPart) {
-						const content = firstPart.text;
-						generatedText += content;
-						const output: TextGenerationStreamOutput = {
-							token: {
-								id: tokenId++,
-								text: content ?? "",
-								logprob: 0,
-								special: false,
-							},
-							generated_text: generatedText,
-							details: null,
-						};
-						yield output;
-					}
-					if (!data.candidates.slice(-1)[0].finishReason) break;
-				} else {
-					break;
-				}
 			}
 		})();
 	};

+import {
+	VertexAI,
+	HarmCategory,
+	HarmBlockThreshold,
+	type Content,
+	type TextPart,
+} from "@google-cloud/vertexai";
 import type { Endpoint } from "../endpoints";
 import { z } from "zod";
+import type { Message } from "$lib/types/Message";
+import type { TextGenerationStreamOutput } from "@huggingface/inference";
 export const endpointVertexParametersSchema = z.object({
 	weight: z.number().int().positive().default(1),
 	location: z.string().default("europe-west1"),
 	project: z.string(),
 	apiEndpoint: z.string().optional(),
+	safetyThreshold: z
+		.enum([
+			HarmBlockThreshold.HARM_BLOCK_THRESHOLD_UNSPECIFIED,
+			HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
+			HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
+			HarmBlockThreshold.BLOCK_NONE,
+			HarmBlockThreshold.BLOCK_ONLY_HIGH,
+		])
+		.optional(),
 });
 export function endpointVertex(input: z.input<typeof endpointVertexParametersSchema>): Endpoint {
+	const { project, location, model, apiEndpoint, safetyThreshold } =
+		endpointVertexParametersSchema.parse(input);
 	const vertex_ai = new VertexAI({
 		project,
 		apiEndpoint,
 	});
+	return async ({ messages, preprompt, generateSettings }) => {
+		const generativeModel = vertex_ai.getGenerativeModel({
+			model: model.id ?? model.name,
+			safetySettings: safetyThreshold
+				? [
+						{
+							category: HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+							threshold: safetyThreshold,
+						},
+						{
+							category: HarmCategory.HARM_CATEGORY_HARASSMENT,
+							threshold: safetyThreshold,
+						},
+						{
+							category: HarmCategory.HARM_CATEGORY_HATE_SPEECH,
+							threshold: safetyThreshold,
+						},
+						{
+							category: HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
+							threshold: safetyThreshold,
+						},
+						{
+							category: HarmCategory.HARM_CATEGORY_UNSPECIFIED,
+							threshold: safetyThreshold,
+						},
+				  ]
+				: undefined,
+			generationConfig: {
+				maxOutputTokens: generateSettings?.max_new_tokens ?? 4096,
+				stopSequences: generateSettings?.stop,
+				temperature: generateSettings?.temperature ?? 1,
 			},
+		});
+		// Preprompt is the same as the first system message.
+		let systemMessage = preprompt;
+		if (messages[0].from === "system") {
+			systemMessage = messages[0].content;
+			messages.shift();
+		}
+		const vertexMessages = messages.map(({ from, content }: Omit<Message, "id">): Content => {
+			return {
+				role: from === "user" ? "user" : "model",
+				parts: [
+					{
+						text: content,
+					},
+				],
+			};
 		});
+		const result = await generativeModel.generateContentStream({
+			contents: vertexMessages,
+			systemInstruction: systemMessage
+				? {
+						role: "system",
+						parts: [
+							{
+								text: systemMessage,
+							},
+						],
+				  }
+				: undefined,
+		});
+		let tokenId = 0;
 		return (async function* () {
 			let generatedText = "";
 			for await (const data of result.stream) {
+				if (!data?.candidates?.length) break; // Handle case where no candidates are present
+				const candidate = data.candidates[0];
+				if (!candidate.content?.parts?.length) continue; // Skip if no parts are present
+				const firstPart = candidate.content.parts.find((part) => "text" in part) as
+					| TextPart
+					| undefined;
+				if (!firstPart) continue; // Skip if no text part is found
+				const isLastChunk = !!candidate.finishReason;
+				const content = firstPart.text;
+				generatedText += content;
+				const output: TextGenerationStreamOutput = {
+					token: {
+						id: tokenId++,
+						text: content,
+						logprob: 0,
+						special: isLastChunk,
+					},
+					generated_text: isLastChunk ? generatedText : null,
+					details: null,
+				};
+				yield output;
+				if (isLastChunk) break;
 			}
 		})();
 	};

src/routes/models/+page.svelte CHANGED Viewed

@@ -64,7 +64,9 @@
 					<dt class="flex items-center gap-2 font-semibold">
 						{model.displayName}
 					</dt>
-					<dd class="text-sm text-gray-500 dark:text-gray-400">{model.description || "-"}</dd>
 				</a>
 			{/each}
 		</dl>

 					<dt class="flex items-center gap-2 font-semibold">
 						{model.displayName}
 					</dt>
+					<dd class="whitespace-pre-wrap text-sm text-gray-500 dark:text-gray-400">
+						{model.description || "-"}
+					</dd>
 				</a>
 			{/each}
 		</dl>