nodejs调用azure语音合成服务并返回文件流

简介

为了实现转换语音成功后，直接返回给前端文件流，便于前端直接播放语音，下面是两个示例代码。

代码中部分配置项需要根据项目需要进行修改。

重要依赖：

js
const fs = require("fs");
const sdk = require("microsoft-cognitiveservices-speech-sdk");

普通转换

js
const play = (model, text) => {
  if (!text || !model) {
    return;
  }

  const config = getConfig();
  const speechConfig = sdk.SpeechConfig.fromSubscription(
    config.serviceConfig.azure.key,
    config.serviceConfig.azure.region
  );

  // 设置语音模型
  speechConfig.speechSynthesisVoiceName = model;

  // 指定音频输出格式为 .wav 格式
  speechConfig.speechSynthesisOutputFormat =
    sdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm;

  const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig);

  // 返回一个 Promise，用于处理异步操作
  return new Promise((resolve, reject) => {
    // 使用 speakTextAsync 播放语音
    speechSynthesizer.speakTextAsync(
      text,
      (result) => {
        if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
          // 将 ArrayBuffer 转换为 Buffer
          const audioData = Buffer.from(result.audioData);

          // 成功时，返回音频数据
          resolve(success(audioData, "success"));
        } else {
          console.error("Synthesis failed. Reason:", result.errorDetails);
          reject(error(result.errorDetails)); // 失败时返回错误
        }
        speechSynthesizer.close();
      },
      (error) => {
        console.log("Error:", error);
        reject(new Error(error)); // 失败时返回错误
        speechSynthesizer.close();
      }
    );
  });
};

SSML

js
const azurePlaySSML = (ssml) => {
  if (!ssml) {
    return;
  }
  const config = getConfig();
  const speechConfig = sdk.SpeechConfig.fromSubscription(
    config.serviceConfig.azure.key,
    config.serviceConfig.azure.region
  );

  // 指定音频输出格式为 .wav 格式
  speechConfig.speechSynthesisOutputFormat =
    sdk.SpeechSynthesisOutputFormat.Riff16Khz16BitMonoPcm;

  const speechSynthesizer = new sdk.SpeechSynthesizer(speechConfig);

  // 返回一个 Promise，用于处理异步操作
  return new Promise((resolve, reject) => {
    speechSynthesizer.speakSsmlAsync(
      `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" 
    xmlns:mstts="https://www.w3.org/2001/mstts" 
    xmlns:emo="http://www.w3.org/2009/10/emotionml"
    xml:lang="zh-CN"> 
    ${ssml}
    </speak>
    `,
      (result) => {
        if (result.reason === sdk.ResultReason.SynthesizingAudioCompleted) {
          // 将 ArrayBuffer 转换为 Buffer
          const audioData = Buffer.from(result.audioData);

          // 成功时，返回音频数据
          resolve(success(audioData, "success"));
        } else {
          console.error("Synthesis failed. Reason:", result.errorDetails);
          reject(error(result.errorDetails)); // 失败时返回错误
        }
        speechSynthesizer.close();
      },
      (error) => {
        console.log(error);
        speechSynthesizer.close();
      }
    );
  });
};

目录

简介

普通转换

SSML