jbilcke-hf HF Staff commited on
Commit
e90c5a9
·
1 Parent(s): cc9779d

updated the AI news bot

Browse files
.env CHANGED
@@ -1,4 +1,4 @@
1
 
2
  AI_NEWS_HUGGINGFACE_API_KEY=""
3
-
4
  AI_NEWS_NEWSDATA_API_KEY=""
 
1
 
2
  AI_NEWS_HUGGINGFACE_API_KEY=""
3
+ AI_NEWS_HUGGINGFACE_INFERENCE_API_MODEL="HuggingFaceH4/zephyr-7b-beta"
4
  AI_NEWS_NEWSDATA_API_KEY=""
src/config.mts CHANGED
@@ -19,4 +19,6 @@ try {
19
  }
20
 
21
  export const huggingfaceApiKey = `${process.env.AI_NEWS_HUGGINGFACE_API_KEY || ""}`
 
 
22
  export const newsdataApiKey = `${process.env.AI_NEWS_NEWSDATA_API_KEY || ""}`
 
19
  }
20
 
21
  export const huggingfaceApiKey = `${process.env.AI_NEWS_HUGGINGFACE_API_KEY || ""}`
22
+ export const hfInferenceApiModel = `${process.env.AI_NEWS_HUGGINGFACE_INFERENCE_API_MODEL || ""}`
23
+
24
  export const newsdataApiKey = `${process.env.AI_NEWS_NEWSDATA_API_KEY || ""}`
src/createZephyrPrompt.mts ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ interface Message {
3
+ role: "system" | "user" | "assistant";
4
+ content: string;
5
+ }
6
+
7
+ /**
8
+ * Formats the messages for the chat with the LLM model in the style of a pirate.
9
+ * @param messages - Array of message objects with role and content.
10
+ * @returns The formatted chat prompt.
11
+ */
12
+ export function createZephyrPrompt(messages: Message[]): string {
13
+ let prompt = ``;
14
+
15
+ // Iterate over messages and generate corresponding chat entries.
16
+ messages.forEach(message => {
17
+ prompt += `<|${message.role}|>\n${message.content.trim()}</s>`;
18
+ });
19
+
20
+ if (messages.at(-1)?.role === "user") {
21
+ // Append the assistant's tag for the next response but without a closing tag.
22
+ prompt += `<|assistant|>`;
23
+ }
24
+
25
+ return prompt;
26
+ }
src/getNews.mts CHANGED
@@ -4,22 +4,26 @@ import { ApiResponse } from "./types.mts"
4
  export async function getNews(): Promise<ApiResponse> {
5
 
6
  const q = [
7
- "ai",
8
- "artificial intelligence",
9
  "openai",
10
  "huggingface",
11
- "hugging face",
12
- "stable diffusion",
13
- "ai ethics",
14
- "tesla optimus",
15
- "Optimus Gen2",
 
 
 
 
16
  "sdxl",
17
- "gaussian splatting",
18
- "latent space",
19
- "gpu",
20
- "nvidia",
21
- "spatial computing",
22
- "apple vision pro"
23
  ].join(" OR ")
24
 
25
  const country = [
 
4
  export async function getNews(): Promise<ApiResponse> {
5
 
6
  const q = [
7
+ // "ai",
8
+ // "artificial intelligence",
9
  "openai",
10
  "huggingface",
11
+ // "hugging face",
12
+ // "stable diffusion",
13
+ // "ai ethics",
14
+ // "tesla optimus",
15
+ // "Optimus Gen2",
16
+ "LLM",
17
+ "chatgpt",
18
+ "runwayml",
19
+ "pikalabs",
20
  "sdxl",
21
+ // "gaussian splatting",
22
+ // "latent space",
23
+ // "gpu",
24
+ // "nvidia",
25
+ // "spatial computing",
26
+ // "apple vision pro"
27
  ].join(" OR ")
28
 
29
  const country = [
src/main.mts CHANGED
@@ -2,23 +2,29 @@ import YAML from "yaml"
2
 
3
  import { sampleDataFreePlan } from "./sampleDataFreePlan.mts"
4
  import { getNews } from "./getNews.mts"
 
 
5
 
6
  export const main = async () => {
 
 
 
7
 
8
- let delayInSeconds = 15 * 60 // let's check every 5 minutes
9
  console.log(`-------------- ${delayInSeconds} sec have elapsed --------------`)
10
  console.log("- checking NewsData.io API..")
11
 
12
 
13
- let newsItems: any[] = []
14
 
 
 
15
  // const data = await getNews()
16
  const data = sampleDataFreePlan
17
 
18
  const interestingNews = data.results.filter(news =>
19
  news.language === "english" &&
20
  news.country.includes("united states of america")
21
- )
22
 
23
  interestingNews.forEach(({
24
  article_id, // string
@@ -40,9 +46,7 @@ export const main = async () => {
40
  sentiment, // string
41
  sentiment_stats // SentimentStats
42
  }) => {
43
-
44
- console.log("language:", language)
45
- newsItems.push({
46
  title,
47
  data: pubDate,
48
  countries: country,
@@ -51,11 +55,59 @@ export const main = async () => {
51
  })
52
  })
53
 
54
- const newsPrompt = YAML.stringify(newsItems)
55
- console.log("news:", newsPrompt)
56
- console.log("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
- // loop every hour
59
  setTimeout(() => {
60
  main()
61
  }, delayInSeconds * 1000)
 
2
 
3
  import { sampleDataFreePlan } from "./sampleDataFreePlan.mts"
4
  import { getNews } from "./getNews.mts"
5
+ import { summarizeWithZephyr } from "./summarizeWithZephyr.mts"
6
+ import { sleep } from "./sleep.mts"
7
 
8
  export const main = async () => {
9
+ // console.log(JSON.stringify(await getNews(), null, 2))
10
+
11
+ let delayInSeconds = 15 * 60
12
 
 
13
  console.log(`-------------- ${delayInSeconds} sec have elapsed --------------`)
14
  console.log("- checking NewsData.io API..")
15
 
16
 
17
+ let news: any[] = []
18
 
19
+ let nbMaxNews = 5
20
+
21
  // const data = await getNews()
22
  const data = sampleDataFreePlan
23
 
24
  const interestingNews = data.results.filter(news =>
25
  news.language === "english" &&
26
  news.country.includes("united states of america")
27
+ ).slice(0, nbMaxNews)
28
 
29
  interestingNews.forEach(({
30
  article_id, // string
 
46
  sentiment, // string
47
  sentiment_stats // SentimentStats
48
  }) => {
49
+ news.push({
 
 
50
  title,
51
  data: pubDate,
52
  countries: country,
 
55
  })
56
  })
57
 
58
+ const summaries: string[] = []
59
+ for (const item of news) {
60
+ console.log("- generating summary..")
61
+
62
+ const summary = await summarizeWithZephyr({
63
+ news: YAML.stringify(item)
64
+ })
65
+ console.log(" -> summary: " + summary)
66
+ if (summary) {
67
+ summaries.push(summary)
68
+ await sleep(1000)
69
+ } else {
70
+ console.log("(!) got no data!")
71
+ await sleep(5000)
72
+ }
73
+ }
74
+
75
+ const today = new Date().toLocaleDateString('en-us', {
76
+ weekday: "long",
77
+ year: "numeric",
78
+ month: "short",
79
+ day: "numeric"
80
+ })
81
+ // "Friday, Jul 2, 2021"
82
+
83
+ const markdown = `
84
+ # Title
85
+
86
+ News Report ${today} 🔥
87
+
88
+ # Description
89
+
90
+ A summary of what happened today in the world of tech and AI
91
+
92
+ # Prompt
93
+
94
+ You are a TV news channel agent.
95
+ Your mission is to summarize news (which will be given to you as YAML) into a compact, dense news report suitable for a news anchor.
96
+ Please limit yourself to about 20 paragraphs.
97
+
98
+ Here is the summary of today's news:
99
+
100
+ article:
101
+ ${summaries.join("\n\narticle:")}
102
+ `
103
+
104
+ console.log("final markdown:", markdown)
105
+
106
+
107
+
108
+
109
+ // TODO: generate a markdow file and upload it to Hugging Face
110
 
 
111
  setTimeout(() => {
112
  main()
113
  }, delayInSeconds * 1000)
src/sampleDataFreePlan.mts CHANGED
The diff for this file is too large to render. See raw diff
 
src/sleep.mts ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ export const sleep = async (durationInMs: number) =>
2
+ new Promise((resolve) => {
3
+ setTimeout(() => {
4
+ resolve(true)
5
+ }, durationInMs)
6
+ })
src/summarizeWithZephyr.mts ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { HfInference } from "@huggingface/inference"
2
+
3
+ import { createZephyrPrompt } from "./createZephyrPrompt.mts"
4
+ import { hfInferenceApiModel, huggingfaceApiKey } from "./config.mts"
5
+
6
+
7
+ const hf = new HfInference(huggingfaceApiKey)
8
+
9
+ export async function summarizeWithZephyr({
10
+ news,
11
+ neverThrow
12
+ }: {
13
+ news: string
14
+ neverThrow?: boolean
15
+ }): Promise<string> {
16
+ try {
17
+ const inputs = createZephyrPrompt([
18
+ {
19
+ role: "system",
20
+ content: `Your must summarize the content into 2 or 3 sentence. DO NOT write more than than. Keep it dense and simple, and short.`,
21
+ },
22
+ {
23
+ role: "user",
24
+ content: news,
25
+ }
26
+ ]) //+ "\n["
27
+
28
+ const nbMaxNewTokens = 250
29
+
30
+ let rawBufferString = ""
31
+ try {
32
+ for await (const output of hf.textGenerationStream({
33
+ model: hfInferenceApiModel,
34
+ inputs,
35
+ parameters: {
36
+ do_sample: true,
37
+ max_new_tokens: nbMaxNewTokens,
38
+ return_full_text: false,
39
+ }
40
+ })) {
41
+ rawBufferString += output.token.text
42
+ // process.stdout.write(output.token.text)
43
+ if (
44
+ rawBufferString.includes("</s>") ||
45
+ rawBufferString.includes("<s>") ||
46
+ rawBufferString.includes("/s>") ||
47
+ rawBufferString.includes("[INST]") ||
48
+ rawBufferString.includes("[/INST]") ||
49
+ rawBufferString.includes("<SYS>") ||
50
+ rawBufferString.includes("<<SYS>>") ||
51
+ rawBufferString.includes("</SYS>") ||
52
+ rawBufferString.includes("<</SYS>>") ||
53
+ rawBufferString.includes("<|user|>") ||
54
+ rawBufferString.includes("<|end|>") ||
55
+ rawBufferString.includes("<|system|>") ||
56
+ rawBufferString.includes("<|assistant|>")
57
+ ) {
58
+ break
59
+ }
60
+ }
61
+ } catch (err) {
62
+ // console.error(`error during generation: ${err}`)
63
+
64
+ if (`${err}` === "Error: Model is overloaded") {
65
+ rawBufferString = ``
66
+ }
67
+ }
68
+
69
+ const tmpResult =
70
+ rawBufferString.replaceAll("</s>", "")
71
+ .replaceAll("<s>", "")
72
+ .replaceAll("/s>", "")
73
+ .replaceAll("[INST]", "")
74
+ .replaceAll("[/INST]", "")
75
+ .replaceAll("<SYS>", "")
76
+ .replaceAll("<<SYS>>", "")
77
+ .replaceAll("</SYS>", "")
78
+ .replaceAll("<</SYS>>", "")
79
+ .replaceAll("<|user|>", "")
80
+ .replaceAll("<|end|>", "")
81
+ .replaceAll("<|system|>", "")
82
+ .replaceAll("<|assistant|>", "")
83
+
84
+
85
+ return tmpResult
86
+ } catch (err) {
87
+ if (neverThrow) {
88
+ console.error(`summarizeWithZephyr():`, err)
89
+ return ""
90
+ } else {
91
+ throw err
92
+ }
93
+ }
94
+ }