Spaces:

jdelavande
/

chat-ui-energy

Running on CPU Upgrade

App Files Files Community

nsarrazin commited on Mar 11

Commit

28b6d44

1 Parent(s): 1ce36bb

fix: improve tasks performance

Browse files

Files changed (5) hide show

chart/env/prod.yaml +5 -10
src/lib/server/textGeneration/reasoning.ts +1 -2
src/lib/server/textGeneration/title.ts +2 -28
src/lib/server/tools/getToolOutput.ts +7 -1
src/lib/server/websearch/search/generateQuery.ts +1 -1

chart/env/prod.yaml CHANGED Viewed

@@ -164,7 +164,6 @@ envVars:
       },
       {
         "name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
-        "tokenizer": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
         "modelUrl": "https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
         "websiteUrl": "https://www.nvidia.com/",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nvidia-logo.png",
@@ -297,7 +296,6 @@ envVars:
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
         "websiteUrl": "https://nousresearch.com/",
         "modelUrl": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
-        "tokenizer": "NousResearch/Hermes-3-Llama-3.1-8B",
         "promptExamples": [
           {
             "title": "Write an email from bullet list",
@@ -321,7 +319,6 @@ envVars:
       },
       {
         "name": "mistralai/Mistral-Nemo-Instruct-2407",
-        "tokenizer": "mistralai/Mistral-Nemo-Instruct-2407",
         "displayName": "mistralai/Mistral-Nemo-Instruct-2407",
         "description": "A small model with good capabilities in language understanding and commonsense reasoning.",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
@@ -351,7 +348,6 @@ envVars:
       },
       {
         "name": "microsoft/Phi-3.5-mini-instruct",
-        "tokenizer": "microsoft/Phi-3.5-mini-instruct",
         "description": "One of the best small models (3.8B parameters), super fast for simple tasks.",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
         "modelUrl": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
@@ -382,12 +378,11 @@ envVars:
         "name": "meta-llama/Llama-3.1-8B-Instruct",
         "tools": true,
         "unlisted": true,
-        "endpoints": [
-          {
-            "type": "openai",
-            "baseURL": "https://internal.api-inference.huggingface.co/models/meta-llama/Llama-3.1-8B-Instruct/v1"
-          }
-        ]
       }
     ]

       },
       {
         "name": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
         "modelUrl": "https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
         "websiteUrl": "https://www.nvidia.com/",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nvidia-logo.png",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/nous-logo.png",
         "websiteUrl": "https://nousresearch.com/",
         "modelUrl": "https://huggingface.co/NousResearch/Hermes-3-Llama-3.1-8B",
         "promptExamples": [
           {
             "title": "Write an email from bullet list",
       },
       {
         "name": "mistralai/Mistral-Nemo-Instruct-2407",
         "displayName": "mistralai/Mistral-Nemo-Instruct-2407",
         "description": "A small model with good capabilities in language understanding and commonsense reasoning.",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/mistral-logo.png",
       },
       {
         "name": "microsoft/Phi-3.5-mini-instruct",
         "description": "One of the best small models (3.8B parameters), super fast for simple tasks.",
         "logoUrl": "https://huggingface.co/datasets/huggingchat/models-logo/resolve/main/microsoft-logo.png",
         "modelUrl": "https://huggingface.co/microsoft/Phi-3.5-mini-instruct",
         "name": "meta-llama/Llama-3.1-8B-Instruct",
         "tools": true,
         "unlisted": true,
+        "parameters": {
+          "stop": ["<|eot_id|>", "<|im_end|>"],
+          "temperature": 0.1,
+          "max_new_tokens": 256
+        }
       }
     ]

src/lib/server/textGeneration/reasoning.ts CHANGED Viewed

@@ -15,8 +15,7 @@ export async function generateSummaryOfReasoning(buffer: string): Promise<string
 	];
 	const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
-The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points.
-Example: "Thinking about life...", "Summarizing the results...", "Processing the input...". `;
 	if (smallModel.tools) {
 		const summaryTool = {

 	];
 	const preprompt = `You are tasked with submitting a summary of the latest reasoning steps into a tool. Never describe results of the reasoning, only the process. Remain vague in your summary.
+The text might be incomplete, try your best to summarize it in one very short sentence, starting with a gerund and ending with three points. The reasoning follows: \n`;
 	if (smallModel.tools) {
 		const summaryTool = {

src/lib/server/textGeneration/title.ts CHANGED Viewed

@@ -1,6 +1,5 @@
 import { env } from "$env/dynamic/private";
 import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
-import type { EndpointMessage } from "../endpoints/endpoints";
 import { logger } from "$lib/server/logger";
 import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 import type { Conversation } from "$lib/types/Conversation";
@@ -58,7 +57,7 @@ export async function generateTitle(prompt: string) {
 				},
 			],
 			preprompt:
-				"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence.",
 			tool: titleTool,
 			endpoint,
 		});
@@ -71,34 +70,9 @@ export async function generateTitle(prompt: string) {
 		}
 	}
-	const messages: Array<EndpointMessage> = [
-		{
-			from: "system",
-			content:
-				"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence of four words or less. Always start your answer with an emoji relevant to the summary",
-		},
-		{ from: "user", content: "Who is the president of Gabon?" },
-		{ from: "assistant", content: "🇬🇦 President of Gabon" },
-		{ from: "user", content: "Who is Julien Chaumond?" },
-		{ from: "assistant", content: "🧑 Julien Chaumond" },
-		{ from: "user", content: "what is 1 + 1?" },
-		{ from: "assistant", content: "🔢 Simple math operation" },
-		{ from: "user", content: "What are the latest news?" },
-		{ from: "assistant", content: "📰 Latest news" },
-		{ from: "user", content: "How to make a great cheesecake?" },
-		{ from: "assistant", content: "🍰 Cheesecake recipe" },
-		{ from: "user", content: "what is your favorite movie? do a short answer." },
-		{ from: "assistant", content: "🎥 Favorite movie" },
-		{ from: "user", content: "Explain the concept of artificial intelligence in one sentence" },
-		{ from: "assistant", content: "🤖 AI definition" },
-		{ from: "user", content: "Draw a cute cat" },
-		{ from: "assistant", content: "🐱 Cute cat drawing" },
-		{ from: "user", content: prompt },
-	];
 	return await getReturnFromGenerator(
 		generateFromDefaultEndpoint({
-			messages,
 			preprompt:
 				"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
 			generateSettings: {

 import { env } from "$env/dynamic/private";
 import { generateFromDefaultEndpoint } from "$lib/server/generateFromDefaultEndpoint";
 import { logger } from "$lib/server/logger";
 import { MessageUpdateType, type MessageUpdate } from "$lib/types/MessageUpdate";
 import type { Conversation } from "$lib/types/Conversation";
 				},
 			],
 			preprompt:
+				"You are a summarization AI. You'll never answer a user's question directly, but instead summarize the user's request into a single short sentence. Here is the user message: \n",
 			tool: titleTool,
 			endpoint,
 		});
 		}
 	}
 	return await getReturnFromGenerator(
 		generateFromDefaultEndpoint({
+			messages: [{ from: "user", content: prompt }],
 			preprompt:
 				"You are a summarization AI. Summarize the user's request into a single short sentence of four words or less. Do not try to answer it, only summarize the user's query. Always start your answer with an emoji relevant to the summary",
 			generateSettings: {

src/lib/server/tools/getToolOutput.ts CHANGED Viewed

@@ -42,6 +42,10 @@ export async function getToolOutput<T = string>({
 				);
 				calls.push(...extractedCalls);
 			}
 		}
 		if (calls.length > 0) {
@@ -52,7 +56,9 @@ export async function getToolOutput<T = string>({
 			if (toolCall?.parameters) {
 				// Get the first parameter value since most tools have a single main parameter
 				const firstParamValue = Object.values(toolCall.parameters)[0];
-				return firstParamValue as T;
 			}
 		}

 				);
 				calls.push(...extractedCalls);
 			}
+			if (calls.length > 0) {
+				break;
+			}
 		}
 		if (calls.length > 0) {
 			if (toolCall?.parameters) {
 				// Get the first parameter value since most tools have a single main parameter
 				const firstParamValue = Object.values(toolCall.parameters)[0];
+				if (typeof firstParamValue === "string") {
+					return firstParamValue as T;
+				}
 			}
 		}

src/lib/server/websearch/search/generateQuery.ts CHANGED Viewed

@@ -59,7 +59,7 @@ export async function generateQuery(messages: Message[]) {
 	const webQuery = await getReturnFromGenerator(
 		generateFromDefaultEndpoint({
 			messages: convQuery,
-			preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}`,
 			generateSettings: {
 				max_new_tokens: 30,
 			},

 	const webQuery = await getReturnFromGenerator(
 		generateFromDefaultEndpoint({
 			messages: convQuery,
+			preprompt: `The user wants you to search the web for information. Give a relevant google search query to answer the question. Answer with only the query. Today is ${currentDate}. The conversation follows: \n`,
 			generateSettings: {
 				max_new_tokens: 30,
 			},