File size: 3,523 Bytes
295e44e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import express from 'express';
import fs from 'fs';
import * as PlayHT from 'playht';
import Groq from 'groq-sdk';
import path from 'path';
import dotenv from 'dotenv';
import { fileURLToPath } from 'url';

// Get the current file URL and convert it to a file path
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

dotenv.config();

const app = express();
const port = 3000;

// Initialize PlayHT client
PlayHT.init({
  userId: process.env.PLAYHT_USER_ID,
  apiKey: process.env.PLAYHT_API_KEY,
});

// Initialize Groq client
const groq = new Groq({ apiKey: process.env.GROQ_API_KEY });

// Store user conversation history in memory
const userHistories = {};

// Serve static files
app.use(express.static(path.join(__dirname, 'public')));
app.use(express.json({ limit: '50mb' }));

// Route to handle audio input
app.post('/call-ai', async (req, res) => {
  try {
    const { audioData, userId } = req.body;

    if (!userId) {
      return res.status(400).send('User ID is required.');
    }

    // Initialize user history if it doesn't exist
    if (!userHistories[userId]) {
      userHistories[userId] = [{ role: 'user', content: 'You are an helpful AI assistant, you always give short and to the point answers. You always answer in 1-2 lines.' }];
    }

    // Step 1: Convert audio to text
    const audioBuffer = Buffer.from(audioData, 'base64');
    const audioFilePath = `./uploads/input_${Date.now()}.wav`;
    fs.writeFileSync(audioFilePath, audioBuffer);

    const transcription = await groq.audio.transcriptions.create({
      file: fs.createReadStream(audioFilePath),
      model: 'distil-whisper-large-v3-en',
      response_format: 'json',
      language: 'en',
    });

    const userInput = transcription.text;

    // Add user input to the conversation history
    userHistories[userId].push({ role: 'user', content: userInput });

    // Step 2: Pass the history (including the latest input) to the chat completion agent
    const completion = await groq.chat.completions.create({
      messages: userHistories[userId],
      model: 'llama3-8b-8192',
      max_tokens: 128
    });

    const botResponse = completion.choices[0]?.message?.content || 'I am sorry, I cannot respond right now.';

    // Add bot response to the conversation history
    userHistories[userId].push({ role: 'assistant', content: botResponse });

    // Step 3: Convert the bot response to audio
    const stream = await PlayHT.stream(botResponse, { voiceEngine: 'PlayDialog' });

    const audioFileName = `output_${Date.now()}.mp3`;
    const audioFilePathOutput = `./outputs/${audioFileName}`;
    const writeStream = fs.createWriteStream(audioFilePathOutput);

    stream.on('data', (chunk) => {
      writeStream.write(chunk);
    });

    stream.on('end', () => {
      writeStream.end();
      res.sendFile(path.resolve(audioFilePathOutput), () => {
        // Clean up uploaded and generated files
        fs.unlinkSync(audioFilePath);
        fs.unlinkSync(audioFilePathOutput);
      });
    });
  } catch (error) {
    console.error(error);
    res.status(500).send('An error occurred while processing your request.');
  }
});

app.listen(port, () => {
  console.log(`Server is running on http://localhost:${port}`);
});

// Serve a webpage with microphone access
app.get('/', (req, res) => {
  res.sendFile(path.join(__dirname, 'public', 'index.html'));
});