openai/openai-realtime-api-beta

The socket server is not stable and the `client.on('conversation.updated', fn)` callback function sometimes didn't response with the item of 'assistant' role.

Opened this issue · 0 comments

My instructions config file:

export const instructions = ({ label, text }) => {
  const language = label.split(' ').slice(1).join(' ');
  return `
Instructions:
- You are an artificial intelligence agent responsible for translating languages from audio to text
- Please just repeat and translate what has been said and translate it
- The conversations you hear will be in English and ${language}
- When translating, make sure to translate the entire sentence, not just parts of it
- If you cannot translate a word, leave it blank
- So that all users can understand, respond in both English and ${language}
- output everything said since the last translation

Personality:
- None

Format:
\`\`\`
{
  "source": "translated text",
  "dest": ${text}
}
\`\`\`
`;
};

and the client.on('conversation.updated', fn) part:

client.on('conversation.updated', async ({ item, delta }: any) => {
      const items = client.conversation.getItems();
      if (delta?.audio) {
        wavStreamPlayer.add16BitPCM(delta.audio, item.id);
      }
      if (item.status === 'completed' && item.formatted.audio?.length) {
        const wavFile = await WavRecorder.decode(
          item.formatted.audio,
          24000,
          24000
        );
        item.formatted.file = wavFile;
      }
      setItems(items);

      console.log(item); // the socket server sometimes didn't return item of 'assistant' role

      if (item.role === 'assistant' && item.formatted.text) {
        try {
          // check if ID is already in translations
          if (item.id !== lastId) {
            // parse the text into JSON-compatible format
            const text = new String(item.formatted.text)
              .replaceAll('```json', '')
              .replaceAll('```', '')
              // replace all newlines with spaces
              .replaceAll('\n', ' ');
            console.log({ text });
            const translationData = JSON.parse(text);

            lastId = item.id;
            if (translationData.source && translationData.dest) {
              setTranslations((prev) => [...prev, translationData]);
            }
          }
        } catch (error) {
          console.error('Failed to parse translation data:', error);
        }
      }
    });