#教程# – Cloudflare Workers 搭建azure tts ,使用 AI 将文本转换为逼真的语音

前言

azure tts是基于微软azure的文本转语音服务,通过调用微软 azure 的 api 接口,将文本转换为语音,利用Cloudflare Workers 搭建 azure tts 可使用 AI 将文本转换为逼真的语音,方便在视频里插入合成标准普通话语音,极大方便了视频旁白的制作。

图片[1] - #教程# – Cloudflare Workers 搭建azure tts ,使用 AI 将文本转换为逼真的语音 - 云线路

部署

把下面代码粘贴到 Worker 中即可,编辑页面可以把“https://raw.githubusercontent.com/x-dr/cf_pages/main/tts.html”修改为自己的地址,编辑 HTML 即可

addEventListener('fetch', event => {
    event.respondWith(handleRequest(event.request))
  })
  
  
  function generateUUID() {
    let uuid = 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx'.replace(/[x]/g, function (c) {
        let r = Math.random() * 16 | 0,
            v = c === 'x' ? r : (r & 0x3 | 0x8);
        return v.toString(16);
    });
    return uuid;
  }
  
  const API_URL = "https://southeastasia.api.speech.microsoft.com/accfreetrial/texttospeech/acc/v3.0-beta1/vcg/speak";
  const DEFAULT_HEADERS = {
    authority: "southeastasia.api.speech.microsoft.com",
    accept: "*/*",
    "accept-language": "zh-CN,zh;q=0.9",
    customvoiceconnectionid: generateUUID(),
    origin: "https://speech.microsoft.com",
    "sec-ch-ua":
        '"Google Chrome";v="111", "Not(A:Brand";v="8", "Chromium";v="111"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "user-agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36",
    "content-type": "application/json",
  };
  
  const speechApi = async (ssml) => {
    const data = JSON.stringify({
        ssml,
        ttsAudioFormat: "audio-24khz-160kbitrate-mono-mp3",
        offsetInPlainText: 0,
        properties: {
            SpeakTriggerSource: "AccTuningPagePlayButton",
        },
    });
  
    try {
        const response = await fetch(API_URL, {
            method: "POST",
            responseType: "arraybuffer",
            headers: DEFAULT_HEADERS,
            body: data
        });
  
        if (!response.ok) {
            throw new Error(`Request failed with status ${response.status}`);
        }
  
        return response.arrayBuffer();
    } catch (error) {
        console.error("Error during API request:", error);
        throw error;
    }
  };
  
  const handleRequest = async (request) => {
    // 解析请求 URL
    const url = new URL(request.url);
  
    const clientIP = request.headers.get("CF-Connecting-IP")
  
    if (url.pathname == "/") {
      const html = await fetch("https://raw.githubusercontent.com/x-dr/cf_pages/main/tts.html")
  
      const page =await html.text()   
        return new Response(page, {
            headers: {
                "content-type": "text/html;charset=UTF-8",
                "Access-Control-Allow-Origin": "*",
                "Access-Control-Allow-Credentials": "true",
                "Access-Control-Allow-Headers": "*",
                "Access-Control-Allow-Methods": "*",
                "ip": `Access cloudflare's ip:${clientIP}`
            },
        })
    } else if (url.pathname == "/audio") {
        // 解析查询参数
        const params = new URLSearchParams(url.search);
        // 获取查询参数中的文本
        const text = params.get("text");
        // 获取查询参数中的语速
        const rate = params.get("rate");
        // 获取查询参数中的音高
        const pitch = params.get("pitch");
        // 获取查询参数中的音色
        const voice = params.get("voice");
        // 获取查询参数中的音色风格
        const voiceStyle = params.get("voiceStyle");
        const ssml = `<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US">
    <voice name="${voice}">
    <mstts:express-as style="${voiceStyle}">
        <prosody rate="${rate}%" pitch="${pitch}%">
        ${text}
       </prosody>
        </mstts:express-as>
    </voice>
    </speak>`;
  
        const audio = await speechApi(ssml);
  
        return new Response(audio, {
            headers: {
                "Content-Type": "audio/mpeg",
                "Content-Disposition": `attachment; filename=audio.mp3`,
            },
        });
    }else{
      return new Response("page", {
        headers: {
            "content-type": "text/html;charset=UTF-8",
            "Access-Control-Allow-Origin": "*",
            "Access-Control-Allow-Credentials": "true",
            "Access-Control-Allow-Headers": "*",
            "Access-Control-Allow-Methods": "*",
            "ip": `Access cloudflare's ip:${clientIP}`
        },
    })
    }
  
  }

演示

结语

可选修改代码中的uuid 用于扶墙,转换文字大概限制于 300 字左右。

© 本站文章随意转载,但请注明出处!
THE END
点赞11 分享
评论 抢沙发
头像
务必使用真实的邮箱地址评论,虚假邮箱的评论将不通过审核及无回复。
提交
头像

昵称

取消
昵称表情代码图片

    暂无评论内容