<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>录音转文字</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
display: flex;
align-items: center;
justify-content: center;
padding: 20px;
}
.container {
background: rgba(255, 255, 255, 0.95);
backdrop-filter: blur(10px);
border-radius: 20px;
padding: 40px;
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
max-width: 700px;
width: 100%;
}
h1 {
text-align: center;
color: #333;
margin-bottom: 30px;
font-size: 2.5em;
background: linear-gradient(45deg, #667eea, #764ba2);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
}
.config-section {
margin-bottom: 30px;
padding: 20px;
background: rgba(102, 126, 234, 0.1);
border-radius: 15px;
border-left: 4px solid #667eea;
}
.config-section h3 {
color: #333;
margin-bottom: 15px;
}
.input-group {
margin-bottom: 15px;
}
label {
display: block;
color: #555;
margin-bottom: 5px;
font-weight: 500;
}
input, select {
width: 100%;
padding: 12px;
border: 2px solid #ddd;
border-radius: 10px;
font-size: 16px;
transition: all 0.3s ease;
}
input:focus, select:focus {
outline: none;
border-color: #667eea;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1);
}
small {
display: block;
margin-top: 5px;
color: #666;
font-size: 12px;
}
.record-section {
text-align: center;
margin: 30px 0;
}
.record-btn {
background: linear-gradient(45deg, #667eea, #764ba2);
color: white;
border: none;
padding: 20px 40px;
border-radius: 50px;
font-size: 18px;
font-weight: bold;
cursor: pointer;
transition: all 0.3s ease;
margin: 10px;
box-shadow: 0 10px 30px rgba(102, 126, 234, 0.3);
}
.record-btn:hover {
transform: translateY(-3px);
box-shadow: 0 15px 35px rgba(102, 126, 234, 0.4);
}
.record-btn:active {
transform: translateY(0);
}
.record-btn.recording {
background: linear-gradient(45deg, #ff6b6b, #ee5a52);
animation: pulse 2s infinite;
}
@keyframes pulse {
0% { transform: scale(1); }
50% { transform: scale(1.05); }
100% { transform: scale(1); }
}
.status {
margin: 20px 0;
padding: 15px;
border-radius: 10px;
text-align: center;
font-weight: 500;
}
.status.info {
background: rgba(52, 152, 219, 0.1);
color: #2980b9;
border: 1px solid rgba(52, 152, 219, 0.3);
}
.status.success {
background: rgba(46, 204, 113, 0.1);
color: #27ae60;
border: 1px solid rgba(46, 204, 113, 0.3);
}
.status.error {
background: rgba(231, 76, 60, 0.1);
color: #c0392b;
border: 1px solid rgba(231, 76, 60, 0.3);
}
.result-section {
margin-top: 30px;
}
.result-text {
background: #f8f9fa;
border: 2px solid #e9ecef;
border-radius: 15px;
padding: 20px;
min-height: 120px;
font-size: 16px;
line-height: 1.6;
white-space: pre-wrap;
word-wrap: break-word;
}
.audio-player {
margin: 20px 0;
width: 100%;
}
.file-upload {
margin: 20px 0;
text-align: center;
}
.file-upload input[type="file"] {
display: none;
}
.file-upload label {
display: inline-block;
padding: 15px 30px;
background: linear-gradient(45deg, #28a745, #20c997);
color: white;
border-radius: 25px;
cursor: pointer;
transition: all 0.3s ease;
font-weight: bold;
}
.file-upload label:hover {
transform: translateY(-2px);
box-shadow: 0 10px 25px rgba(40, 167, 69, 0.3);
}
.speaker-timeline {
margin: 20px 0;
padding: 15px;
background: #f8f9fa;
border-radius: 10px;
border-left: 4px solid #667eea;
}
.speaker-segment {
margin: 8px 0;
padding: 10px;
border-radius: 8px;
position: relative;
}
.speaker-1 {
background: rgba(102, 126, 234, 0.1);
border-left: 3px solid #667eea;
}
.speaker-2 {
background: rgba(255, 107, 107, 0.1);
border-left: 3px solid #ff6b6b;
}
.speaker-3 {
background: rgba(46, 204, 113, 0.1);
border-left: 3px solid #2ecc71;
}
.speaker-4 {
background: rgba(241, 196, 15, 0.1);
border-left: 3px solid #f1c40f;
}
.speaker-label {
font-weight: bold;
color: #555;
margin-bottom: 5px;
font-size: 14px;
}
.speaker-time {
font-size: 12px;
color: #888;
margin-left: 10px;
}
.speaker-text {
margin-top: 5px;
line-height: 1.4;
}
.analysis-section {
margin: 20px 0;
padding: 15px;
background: rgba(52, 152, 219, 0.1);
border-radius: 10px;
border-left: 4px solid #3498db;
}
.toggle-section {
margin: 10px 0;
}
.toggle-btn {
background: #3498db;
color: white;
border: none;
padding: 8px 16px;
border-radius: 5px;
cursor: pointer;
font-size: 14px;
}
.toggle-btn:hover {
background: #2980b9;
}
</style>
</head>
<body>
<div class="container">
<h1>🎙️ 录音转文字</h1>
<!-- API配置区域 -->
<div class="config-section">
<h3>⚙️ API配置</h3>
<div class="input-group">
<label for="apiProvider">API服务商:</label>
<select id="apiProvider">
<option value="siliconflow">SiliconFlow (纯转写)</option>
<option value="volcengine">火山引擎 (支持说话人分离)</option>
</select>
</div>
<!-- SiliconFlow配置 -->
<div id="siliconflowConfig">
<div class="input-group">
<label for="apiUrl">API地址:</label>
<input type="text" id="apiUrl" value="https://api.siliconflow.cn/v1/audio/transcriptions">
</div>
<div class="input-group">
<label for="apiKey">API密钥:</label>
<input type="password" id="apiKey" placeholder="请输入您的SiliconFlow API密钥">
</div>
<div class="input-group">
<label for="model">模型:</label>
<input type="text" id="model" value="FunAudioLLM/SenseVoiceSmall">
</div>
</div>
<!-- 火山引擎配置 -->
<div id="volcengineConfig" style="display: none;">
<div class="input-group">
<label for="volcSubmitUrl">提交任务API:</label>
<input type="text" id="volcSubmitUrl" value="https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit">
</div>
<div class="input-group">
<label for="volcQueryUrl">查询结果API:</label>
<input type="text" id="volcQueryUrl" value="https://openspeech.bytedance.com/api/v3/auc/bigmodel/query">
</div>
<div class="input-group">
<label for="volcAppKey">APP ID:</label>
<input type="text" id="volcAppKey" placeholder="火山引擎控制台获取的APP ID">
</div>
<div class="input-group">
<label for="volcAccessKey">Access Token:</label>
<input type="password" id="volcAccessKey" placeholder="火山引擎控制台获取的Access Token">
</div>
<!-- 音频URL获取方式选择 -->
<div class="input-group">
<label for="audioUrlMethod">音频URL获取方式:</label>
<select id="audioUrlMethod">
<option value="direct">直接输入音频URL</option>
<option value="upload">通过上传服务获取URL</option>
</select>
</div>
<!-- 直接输入URL选项 -->
<div id="directUrlConfig">
<div class="input-group" style="background: rgba(46, 204, 113, 0.1); padding: 15px; border-radius: 8px; border-left: 3px solid #2ecc71;">
<label for="directAudioUrl">音频文件URL:</label>
<input type="url" id="directAudioUrl" placeholder="https://example.com/your-audio-file.mp3">
<small>请输入音频文件的公网直链地址(可直接下载的URL)</small>
<button type="button" id="validateUrlBtn" style="margin-top: 8px; padding: 6px 12px; background: #3498db; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 12px;">🔍 验证链接</button>
<div id="urlValidationResult" style="margin-top: 8px; display: none;"></div>
<div style="margin-top: 10px; padding: 8px; background: rgba(255, 255, 255, 0.7); border-radius: 5px;">
<strong>✅ 有效的直链来源:</strong><br>
1. <strong>您自己的网站服务器</strong>:如 https://yoursite.com/audio/file.mp3<br>
2. <strong>GitHub Raw链接</strong>:上传到GitHub仓库,获取Raw文件链接<br>
3. <strong>专业CDN服务</strong>:如阿里云OSS、腾讯云COS等的公开链接<br>
4. <strong>免费直链托管</strong>:使用下方的"通过上传服务获取URL"<br>
<br>
<strong style="color: #e74c3c;">❌ 无效链接(不是直链):</strong><br>
• 百度网盘、OneDrive等云盘分享链接<br>
• 需要登录或验证的链接<br>
• 重定向链接<br>
<br>
<small style="color: #666;"><strong>测试方法:</strong>直接在浏览器中打开URL,应该直接下载/播放音频文件</small>
</div>
</div>
</div>
<!-- 上传服务选项 -->
<div id="uploadServiceConfig" style="display: none;">
<div class="input-group" style="background: rgba(40, 167, 69, 0.1); padding: 15px; border-radius: 8px; border-left: 3px solid #28a745;">
<label>✅ 免费直链托管服务:</label>
<p style="margin: 5px 0; color: #155724; font-size: 14px;">
<strong>自动上传并获取直链</strong><br>
• 支持 catbox.moe、file.io 等服务<br>
• 自动生成可用于火山引擎API的直链<br>
• 如果遇到网络问题,请尝试几次或使用自定义服务<br>
</p>
</div>
<div class="input-group">
<label for="customUploadUrl">自定义上传服务API (可选):</label>
<input type="text" id="customUploadUrl" placeholder="http://your-server.com/upload">
<small>如果您有自己的文件上传API,请填入。API应返回 JSON: {"url": "直链地址"}</small>
</div>
</div>
<div class="input-group">
<label>
<input type="checkbox" id="enableSpeakerDetection" style="width: auto; margin-right: 10px;" checked>
启用说话人分离
</label>
</div>
</div>
</div>
<!-- 文件上传区域 -->
<div class="file-upload">
<div id="fileUploadSection">
<label for="audioFile">📁 选择音频文件</label>
<input type="file" id="audioFile" accept="audio/*">
</div>
<div id="directUrlSection" style="display: none;">
<div style="text-align: center; padding: 20px; background: rgba(46, 204, 113, 0.1); border-radius: 15px; border: 2px dashed #2ecc71;">
<p style="margin: 0; color: #27ae60; font-weight: bold;">🔗 使用直接URL模式</p>
<p style="margin: 5px 0 0 0; color: #666; font-size: 14px;">请在上方火山引擎配置中输入音频文件URL</p>
</div>
</div>
</div>
<!-- 开始转换区域 -->
<div class="record-section">
<button id="convertBtn" class="record-btn" style="display: none; background: linear-gradient(45deg, #28a745, #20c997);">🚀 开始转换</button>
</div>
<!-- 状态显示 -->
<div id="status" class="status" style="display: none;"></div>
<!-- 音频播放器 -->
<audio id="audioPlayer" class="audio-player" controls style="display: none;"></audio>
<!-- 结果显示区域 -->
<div class="result-section">
<div style="display: flex; justify-content: space-between; align-items: center; margin-bottom: 15px;">
<h3>📝 转换结果:</h3>
<button id="exportBtn" class="toggle-btn" style="display: none; background: #28a745;">📄 导出TXT</button>
</div>
<!-- 转换结果文本 -->
<div id="resultText" class="result-text">在这里将显示语音转文字的结果...</div>
<!-- 音频分析信息 -->
<div id="audioAnalysis" class="analysis-section" style="display: none;">
<h4>🔍 音频分析:</h4>
<div id="analysisInfo"></div>
</div>
</div>
</div>
<script>
let currentAudioBlob;
let currentTranscription = ''; // 保存当前转换结果
let currentSpeakerSegments = []; // 保存当前说话人分离结果
const convertBtn = document.getElementById('convertBtn');
const exportBtn = document.getElementById('exportBtn');
const status = document.getElementById('status');
const resultText = document.getElementById('resultText');
const audioPlayer = document.getElementById('audioPlayer');
const audioFileInput = document.getElementById('audioFile');
const audioAnalysis = document.getElementById('audioAnalysis');
const analysisInfo = document.getElementById('analysisInfo');
const apiProvider = document.getElementById('apiProvider');
const siliconflowConfig = document.getElementById('siliconflowConfig');
const volcengineConfig = document.getElementById('volcengineConfig');
const audioUrlMethod = document.getElementById('audioUrlMethod');
const directUrlConfig = document.getElementById('directUrlConfig');
const uploadServiceConfig = document.getElementById('uploadServiceConfig');
const directAudioUrlInput = document.getElementById('directAudioUrl');
const customUploadUrlInput = document.getElementById('customUploadUrl');
const fileUploadSection = document.getElementById('fileUploadSection');
const directUrlSection = document.getElementById('directUrlSection');
const validateUrlBtn = document.getElementById('validateUrlBtn');
const urlValidationResult = document.getElementById('urlValidationResult');
// API提供商切换
function switchApiProvider() {
const provider = apiProvider.value;
if (provider === 'siliconflow') {
siliconflowConfig.style.display = 'block';
volcengineConfig.style.display = 'none';
// 硅基流动只支持文件上传
fileUploadSection.style.display = 'block';
directUrlSection.style.display = 'none';
// 重置火山引擎配置
directUrlConfig.style.display = 'none';
uploadServiceConfig.style.display = 'none';
customUploadUrlInput.value = '';
directAudioUrlInput.value = '';
audioUrlMethod.value = 'direct';
} else if (provider === 'volcengine') {
siliconflowConfig.style.display = 'none';
volcengineConfig.style.display = 'block';
// 重置硅流配置
document.getElementById('apiUrl').value = 'https://api.siliconflow.cn/v1/audio/transcriptions';
document.getElementById('apiKey').value = '';
document.getElementById('model').value = 'FunAudioLLM/SenseVoiceSmall';
// 根据音频URL获取方式显示配置
switchAudioUrlMethod();
}
// 重置结果显示
resetResults();
// 重置转换按钮状态
updateConvertButtonState();
}
// 音频URL获取方式切换
function switchAudioUrlMethod() {
if (apiProvider.value !== 'volcengine') return;
if (audioUrlMethod.value === 'direct') {
directUrlConfig.style.display = 'block';
uploadServiceConfig.style.display = 'none';
fileUploadSection.style.display = 'none';
directUrlSection.style.display = 'block';
customUploadUrlInput.value = '';
} else {
directUrlConfig.style.display = 'none';
uploadServiceConfig.style.display = 'block';
fileUploadSection.style.display = 'block';
directUrlSection.style.display = 'none';
directAudioUrlInput.value = '';
}
updateConvertButtonState();
}
// 更新转换按钮状态
function updateConvertButtonState() {
const provider = apiProvider.value;
let canConvert = false;
if (provider === 'siliconflow') {
// 硅基流动需要文件
canConvert = currentAudioBlob !== null;
} else if (provider === 'volcengine') {
if (audioUrlMethod.value === 'direct') {
// 直接URL模式,检查URL是否已填写
const url = directAudioUrlInput.value.trim();
canConvert = url && (url.startsWith('http://') || url.startsWith('https://'));
} else {
// 上传服务模式,需要文件
canConvert = currentAudioBlob !== null;
}
}
if (canConvert) {
convertBtn.style.display = 'inline-block';
if (provider === 'volcengine' && audioUrlMethod.value === 'direct') {
showStatus(`🔗 已配置直接URL,点击"开始转换"进行语音识别`, 'success');
}
} else {
convertBtn.style.display = 'none';
}
}
// 重置结果显示
function resetResults() {
// speakerResults.style.display = 'none'; // 移除说话人结果显示
// originalResults.style.display = 'block'; // 移除完整文本显示
audioAnalysis.style.display = 'none';
// toggleView.style.display = 'none'; // 移除视图切换按钮
exportBtn.style.display = 'none';
// currentViewMode = 'original'; // 移除视图模式变量
currentTranscription = '';
currentSpeakerSegments = [];
resultText.textContent = '在这里将显示语音转文字的结果...';
}
// 显示状态消息
function showStatus(message, type = 'info') {
status.textContent = message;
status.className = `status ${type}`;
status.style.display = 'block';
console.log(`[${type.toUpperCase()}] ${message}`); // 添加控制台日志
}
// 隐藏状态消息
function hideStatus() {
status.style.display = 'none';
}
// 导出为TXT文件
function exportToTxt() {
let content = '';
const filename = `语音转文字_${new Date().toISOString().slice(0, 19).replace(/:/g, '-')}.txt`;
if (currentSpeakerSegments.length > 0) {
// 有说话人分离结果,导出分离结果+完整文本
content = '=== 说话人分离结果 ===\n\n';
currentSpeakerSegments.forEach((segment) => {
content += `[说话人 ${segment.speaker}] (${formatTime(segment.startTime)} - ${formatTime(segment.endTime)})\n`;
content += `${segment.text}\n\n`;
});
content += `\n=== 完整文本 ===\n\n${currentTranscription}`;
} else {
// 没有说话人分离结果,导出完整文本
content = currentTranscription;
}
if (!content.trim()) {
showStatus('⚠️ 没有可导出的内容', 'error');
return;
}
// 创建下载链接
const blob = new Blob([content], { type: 'text/plain;charset=utf-8' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = filename;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
showStatus('📄 文件已导出!', 'success');
}
// 上传文件到托管服务(改进版)
async function uploadToCatbox(file) {
showStatus('📤 正在尝试免费托管服务...', 'info');
// 可用的托管服务列表
const uploadServices = [
{
name: 'catbox.moe',
upload: async (file) => {
const formData = new FormData();
formData.append('reqtype', 'fileupload');
formData.append('fileToUpload', file);
const response = await fetch('https://catbox.moe/user/api.php', {
method: 'POST',
body: formData
});
if (response.ok) {
const result = await response.text();
if (result.startsWith('https://files.catbox.moe/')) {
return result.trim();
}
}
throw new Error('catbox.moe 上传失败');
}
},
{
name: 'file.io',
upload: async (file) => {
const formData = new FormData();
formData.append('file', file);
const response = await fetch('https://file.io', {
method: 'POST',
body: formData
});
if (response.ok) {
const result = await response.json();
if (result.success && result.link) {
return result.link;
}
}
throw new Error('file.io 上传失败');
}
}
];
let lastError = null;
// 尝试各个服务
for (let i = 0; i < uploadServices.length; i++) {
const service = uploadServices[i];
try {
showStatus(`📤 正在尝试 ${service.name}... (${i + 1}/${uploadServices.length})`, 'info');
console.log(`尝试上传到: ${service.name}`);
const url = await service.upload(file);
console.log(`${service.name} 上传成功:`, url);
showStatus(`✅ 文件上传成功!使用服务: ${service.name}`, 'success');
return url;
} catch (error) {
console.warn(`${service.name} 上传失败:`, error.message);
lastError = error;
if (i < uploadServices.length - 1) {
showStatus(`⚠️ ${service.name} 失败,尝试下一个服务...`, 'info');
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
}
// 所有服务都失败了
console.error('所有免费托管服务都失败了');
showStatus(`❌ 免费托管服务暂时不可用`, 'error');
// 提供详细的解决方案
const errorMessage = `
免费托管服务暂时不可用,请尝试以下方案:
✅ 推荐方案:
1. 使用您自己的服务器上传音频文件
2. 上传到GitHub仓库,获取Raw链接
3. 使用专业CDN服务(阿里云OSS、腾讯云COS等)
🔧 GitHub直链教程:
1. 登录GitHub,创建新仓库
2. 上传音频文件到仓库
3. 点击文件 → Raw 按钮
4. 复制Raw链接地址
💡 自定义上传服务:
如果您有自己的文件上传API,请在上方填写自定义上传服务地址
`;
throw new Error(errorMessage);
}
// 生成UUID
function generateUUID() {
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
const r = Math.random() * 16 | 0;
const v = c == 'x' ? r : (r & 0x3 | 0x8);
return v.toString(16);
});
}
// 创建音频URL
async function createAudioUrl(audioBlob) {
const customUploadUrl = customUploadUrlInput.value.trim();
// 如果用户提供了自定义上传服务,优先使用
if (customUploadUrl) {
try {
showStatus('📤 正在使用自定义上传服务...', 'info');
console.log('尝试自定义上传服务:', customUploadUrl);
const formData = new FormData();
formData.append('file', audioBlob);
formData.append('audio', audioBlob); // 兼容不同的字段名
const response = await fetch(customUploadUrl, {
method: 'POST',
body: formData
});
if (!response.ok) {
throw new Error(`自定义服务响应失败: ${response.status}`);
}
const result = await response.json();
const url = result.url || result.file_url || result.link;
if (url) {
console.log('自定义服务上传成功:', url);
showStatus('✅ 自定义服务上传成功!', 'success');
return url;
} else {
throw new Error('自定义服务返回数据格式不正确');
}
} catch (error) {
console.warn('自定义上传服务失败,转用免费服务:', error.message);
showStatus('⚠️ 自定义服务失败,转用免费托管服务...', 'info');
}
}
// 使用免费托管服务
return await uploadToCatbox(audioBlob);
}
// 将音频转换为文字
async function convertToText() {
const provider = apiProvider.value;
// 检查是否需要文件
if (provider === 'siliconflow' || (provider === 'volcengine' && audioUrlMethod.value === 'upload')) {
if (!currentAudioBlob) {
showStatus('⚠️ 请先选择音频文件', 'error');
return;
}
}
console.log('开始转换,提供商:', provider, '文件大小:', currentAudioBlob ? currentAudioBlob.size : '使用URL');
try {
if (provider === 'siliconflow') {
await convertWithSiliconFlow(currentAudioBlob);
} else if (provider === 'volcengine') {
await convertWithVolcEngine(currentAudioBlob);
}
} catch (error) {
console.error('转换失败:', error);
showStatus(`❌ 转换失败: ${error.message}`, 'error');
displayErrorInfo(provider, error);
}
}
// SiliconFlow转换(纯转写)
async function convertWithSiliconFlow(audioBlob) {
const apiUrl = document.getElementById('apiUrl').value.trim();
const apiKey = document.getElementById('apiKey').value.trim();
const model = document.getElementById('model').value.trim();
if (!apiKey) {
throw new Error('请填入SiliconFlow的API密钥');
}
showStatus('🔄 正在转换为文字...', 'info');
const formData = new FormData();
formData.append('file', audioBlob, 'recording.webm');
formData.append('model', model);
console.log('发送SiliconFlow请求:', { apiUrl, model, fileSize: audioBlob.size });
const response = await fetch(apiUrl, {
method: 'POST',
headers: { 'Authorization': `Bearer ${apiKey}` },
body: formData
});
console.log('SiliconFlow响应状态:', response.status);
if (!response.ok) {
let errorMessage = `API请求失败: ${response.status} ${response.statusText}`;
try {
const errorData = await response.json();
console.log('SiliconFlow错误响应:', errorData);
if (errorData.error && errorData.error.message) {
errorMessage += ` - ${errorData.error.message}`;
}
} catch (e) {
console.log('无法解析错误响应');
}
throw new Error(errorMessage);
}
const result = await response.json();
console.log('SiliconFlow成功响应:', result);
const transcription = result.text;
if (!transcription) {
throw new Error('转换结果为空');
}
// 先重置界面
resetResults();
// 再保存和显示结果
currentTranscription = transcription;
currentSpeakerSegments = [];
resultText.textContent = transcription;
// 启用导出按钮
exportBtn.style.display = 'inline-block';
showStatus('✅ 转换完成!', 'success');
console.log('SiliconFlow转换完成,结果长度:', transcription.length);
console.log('保存的结果:', currentTranscription.substring(0, 100) + '...');
}
// 火山引擎提交任务
async function submitVolcEngineTask(audioUrl) {
const appKey = document.getElementById('volcAppKey').value.trim();
const accessKey = document.getElementById('volcAccessKey').value.trim();
const enableSpeakerDetection = document.getElementById('enableSpeakerDetection').checked;
if (!appKey || !accessKey) {
throw new Error('请填入火山引擎的APP ID和Access Token');
}
const taskId = generateUUID();
const requestBody = {
user: {
uid: "web-user-" + Date.now()
},
audio: {
format: "mp3",
url: audioUrl
},
request: {
model_name: "bigmodel",
enable_itn: true,
enable_punc: true,
enable_speaker_info: enableSpeakerDetection,
show_utterances: true
}
};
console.log('提交的请求参数:', JSON.stringify(requestBody, null, 2)); // 添加请求参数日志
console.log('说话人分离是否启用:', enableSpeakerDetection); // 明确显示参数状态
const response = await fetch('https://openspeech.bytedance.com/api/v3/auc/bigmodel/submit', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Api-App-Key': appKey,
'X-Api-Access-Key': accessKey,
'X-Api-Resource-Id': 'volc.bigasr.auc',
'X-Api-Request-Id': taskId,
'X-Api-Sequence': '-1'
},
body: JSON.stringify(requestBody)
});
console.log('提交任务响应状态:', response.status); // 添加响应状态日志
console.log('提交任务响应头:', Object.fromEntries(response.headers.entries())); // 添加响应头日志
if (!response.ok) {
const statusCode = response.headers.get('X-Api-Status-Code');
const message = response.headers.get('X-Api-Message');
throw new Error(`任务提交失败: ${statusCode} - ${message}`);
}
return taskId;
}
// 火山引擎查询结果
async function queryVolcEngineResult(taskId) {
const appKey = document.getElementById('volcAppKey').value.trim();
const accessKey = document.getElementById('volcAccessKey').value.trim();
console.log('查询任务ID:', taskId); // 添加任务ID日志
const response = await fetch('https://openspeech.bytedance.com/api/v3/auc/bigmodel/query', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'X-Api-App-Key': appKey,
'X-Api-Access-Key': accessKey,
'X-Api-Resource-Id': 'volc.bigasr.auc',
'X-Api-Request-Id': taskId,
'X-Api-Sequence': '-1'
},
body: '{}'
});
const statusCode = response.headers.get('X-Api-Status-Code');
console.log('查询响应状态码:', statusCode); // 添加状态码日志
console.log('查询响应头:', Object.fromEntries(response.headers.entries())); // 添加响应头日志
if (statusCode === '20000000') {
// 成功
const result = await response.json();
console.log('查询成功,完整返回结果:', JSON.stringify(result, null, 2)); // 添加完整结果日志
return { status: 'completed', data: result };
} else if (statusCode === '40000001') {
// 处理中
console.log('任务仍在处理中...'); // 添加处理中日志
return { status: 'processing' };
} else {
// 失败
const message = response.headers.get('X-Api-Message');
console.error('查询失败:', statusCode, message); // 添加失败日志
throw new Error(`查询失败: ${statusCode} - ${message}`);
}
}
// 火山引擎轮询结果
async function pollVolcEngineResult(taskId, maxAttempts = 30) {
for (let i = 0; i < maxAttempts; i++) {
try {
const result = await queryVolcEngineResult(taskId);
if (result.status === 'completed') {
return result.data;
} else if (result.status === 'processing') {
showStatus(`🔄 处理中... (${i + 1}/${maxAttempts})`, 'info');
await new Promise(resolve => setTimeout(resolve, 2000)); // 等待2秒
continue;
}
} catch (error) {
if (i === maxAttempts - 1) throw error;
await new Promise(resolve => setTimeout(resolve, 2000));
}
}
throw new Error('处理超时,请稍后重试');
}
// 火山引擎转换
async function convertWithVolcEngine(audioBlob) {
showStatus('🔄 准备进行语音识别...', 'info');
// 获取音频URL
let audioUrl;
if (audioUrlMethod.value === 'direct') {
audioUrl = directAudioUrlInput.value.trim();
if (!audioUrl) {
throw new Error('请先输入音频文件URL');
}
if (!audioUrl.startsWith('http://') && !audioUrl.startsWith('https://')) {
throw new Error('音频URL必须以 http:// 或 https:// 开头');
}
showStatus(`📁 使用直接URL: ${audioUrl}`, 'info');
console.log('使用直接URL:', audioUrl);
} else {
showStatus('🔄 正在上传音频文件...', 'info');
audioUrl = await createAudioUrl(audioBlob);
console.log('音频上传完成,URL:', audioUrl);
}
showStatus('🔄 正在提交识别任务...', 'info');
// 提交任务
const taskId = await submitVolcEngineTask(audioUrl);
console.log('任务提交成功,ID:', taskId);
showStatus('🔄 正在处理音频,请稍候...', 'info');
// 轮询结果
const volcResult = await pollVolcEngineResult(taskId);
console.log('火山引擎处理完成:', volcResult);
// 解析结果
const transcription = volcResult.result.text;
if (!transcription) {
throw new Error('转换结果为空');
}
// 先重置界面
resetResults();
// 保存基本结果
currentTranscription = transcription;
currentSpeakerSegments = [];
// 显示基本结果
resultText.textContent = transcription;
// 启用导出按钮
exportBtn.style.display = 'inline-block';
// 检查是否启用了说话人分离且有结果
const enableSpeakerDetection = document.getElementById('enableSpeakerDetection').checked;
if (enableSpeakerDetection && volcResult.result.utterances && volcResult.result.utterances.length > 0) {
const speakerSegments = parseVolcEngineSpeakerResult(volcResult);
console.log('解析的说话人片段:', speakerSegments); // 添加调试日志
if (speakerSegments.length > 0) {
// 保存说话人分离结果
currentSpeakerSegments = speakerSegments;
// 生成带说话人标记的文本格式
let speakerText = '';
speakerSegments.forEach((segment) => {
speakerText += `[说话人 ${segment.speaker}] (${formatTime(segment.startTime)} - ${formatTime(segment.endTime)})\n`;
speakerText += `${segment.text}\n\n`;
});
// 显示带说话人标记的文本
resultText.textContent = speakerText.trim();
// 显示分析信息
if (volcResult.audio_info) {
audioAnalysis.style.display = 'block';
const urlMethod = audioUrlMethod.value === 'direct' ? '直接URL' : '免费托管服务';
const speakerCount = Math.max(...speakerSegments.map(s => s.speaker));
analysisInfo.innerHTML = `
<p><strong>音频时长:</strong> ${(volcResult.audio_info.duration / 1000).toFixed(2)} 秒</p>
<p><strong>检测到说话人数量:</strong> ${speakerCount} 人</p>
<p><strong>音频片段数量:</strong> ${speakerSegments.length} 个</p>
<p><strong>API提供商:</strong> 火山引擎 (专业级)</p>
<p><strong>音频获取方式:</strong> ${urlMethod}</p>
`;
}
showStatus(`✅ 转换完成!已检测到 ${Math.max(...speakerSegments.map(s => s.speaker))} 个说话人`, 'success');
} else {
// 没有说话人分离结果,显示完整文本
resultText.textContent = transcription;
showStatus('✅ 转换完成!未检测到多个说话人', 'success');
}
} else {
// 未启用说话人分离或没有utterances,显示完整文本
resultText.textContent = transcription;
showStatus('✅ 转换完成!', 'success');
}
console.log('火山引擎转换完成,结果长度:', transcription.length);
console.log('保存的结果:', currentTranscription.substring(0, 100) + '...');
}
// 解析火山引擎说话人结果
function parseVolcEngineSpeakerResult(volcResult) {
console.log('完整的火山引擎返回结果:', volcResult); // 添加完整调试日志
if (!volcResult.result || !volcResult.result.utterances) {
console.log('没有找到utterances数据');
return [];
}
const utterances = volcResult.result.utterances;
console.log('utterances数据:', utterances); // 添加utterances调试日志
console.log('utterances数组长度:', utterances.length);
const segments = [];
utterances.forEach((utterance, index) => {
console.log(`utterance ${index} 完整结构:`, JSON.stringify(utterance, null, 2)); // 添加详细的utterance结构日志
// 尝试多种可能的说话人ID字段
let speakerId = 1; // 默认值
// 按优先级尝试不同的字段名
if (utterance.additions && utterance.additions.speaker !== undefined) {
speakerId = parseInt(utterance.additions.speaker); // 火山引擎的说话人ID在additions.speaker中
console.log(`找到additions.speaker: ${speakerId}`);
} else if (utterance.speaker_id !== undefined) {
speakerId = utterance.speaker_id;
console.log(`找到speaker_id: ${speakerId}`);
} else if (utterance.spk_id !== undefined) {
speakerId = utterance.spk_id;
console.log(`找到spk_id: ${speakerId}`);
} else if (utterance.speaker !== undefined) {
speakerId = utterance.speaker;
console.log(`找到speaker: ${speakerId}`);
} else if (utterance.channel_id !== undefined) {
speakerId = utterance.channel_id;
console.log(`找到channel_id: ${speakerId}`);
} else if (utterance.spk !== undefined) {
speakerId = utterance.spk;
console.log(`找到spk: ${speakerId}`);
} else {
console.log(`utterance ${index} 没有找到说话人ID字段,使用默认值1`);
}
segments.push({
speaker: speakerId,
startTime: utterance.start_time / 1000, // 转换为秒
endTime: utterance.end_time / 1000,
text: utterance.text
});
console.log(`解析结果 ${index}: speaker=${speakerId}, text="${utterance.text.substring(0, 20)}..."`);
});
console.log('解析后的segments:', segments); // 添加解析结果调试日志
// 统计说话人数量
const speakers = [...new Set(segments.map(s => s.speaker))];
console.log('检测到的说话人ID列表:', speakers);
console.log('说话人数量:', speakers.length);
return segments;
}
// 显示火山引擎说话人结果
function displayVolcEngineSpeakerResults(segments) {
// 这个函数已经不再需要,因为我们直接在resultText中显示带说话人标记的文本
console.log('displayVolcEngineSpeakerResults函数已废弃');
}
// 格式化时间
function formatTime(seconds) {
const mins = Math.floor(seconds / 60);
const secs = Math.floor(seconds % 60);
return `${mins}:${secs.toString().padStart(2, '0')}`;
}
// 切换视图(已废弃)
function toggleViewMode() {
// 这个函数已经不再需要,因为我们移除了视图切换功能
console.log('toggleViewMode函数已废弃');
}
// 显示错误信息
function displayErrorInfo(provider, error) {
if (provider === 'siliconflow') {
resultText.innerHTML = `转换失败。请检查:
1. API密钥是否正确
2. 网络连接是否正常
3. 音频文件格式是否支持
错误详情:${error.message}`;
} else if (provider === 'volcengine') {
// 检查是否是音频下载失败的错误
if (error.message.includes('audio download failed') || error.message.includes('Invalid audio URI')) {
resultText.innerHTML = `
<div style="color: #e74c3c; margin-bottom: 15px;">
<strong>❌ 音频文件下载失败</strong>
</div>
<div style="background: #fff3cd; padding: 15px; border-radius: 8px; border-left: 4px solid #ffc107; margin-bottom: 15px;">
<strong>🔍 问题原因:</strong><br>
您提供的URL不是有效的音频文件直链。火山引擎API无法直接下载该文件。
</div>
<div style="background: #d1ecf1; padding: 15px; border-radius: 8px; border-left: 4px solid #bee5eb; margin-bottom: 15px;">
<strong>✅ 解决方案:</strong><br>
<br>
<strong>1. 检查URL是否为直链:</strong><br>
• 在浏览器中直接打开您的URL<br>
• 如果直接开始下载/播放音频文件,则为有效直链<br>
• 如果跳转到登录页面或分享页面,则为无效链接<br>
<br>
<strong>2. 获取有效直链的方法:</strong><br>
• <strong>GitHub方式:</strong>上传到GitHub仓库,点击文件→Raw,复制链接<br>
• <strong>自有服务器:</strong>上传到您的网站服务器<br>
• <strong>专业CDN:</strong>使用阿里云OSS、腾讯云COS等服务<br>
• <strong>免费托管:</strong>切换到"通过上传服务获取URL"模式<br>
</div>
<div style="background: #f8d7da; padding: 10px; border-radius: 8px; border-left: 4px solid #dc3545;">
<strong>⚠️ 以下链接类型无效:</strong><br>
• 百度网盘、OneDrive、GoogleDrive分享链接<br>
• 需要登录验证的链接<br>
• 重定向链接<br>
</div>
<strong>错误详情:</strong> ${error.message}`;
} else {
resultText.innerHTML = `转换失败。请检查:
1. 火山引擎 APP ID 和 Access Token 是否正确
2. 网络连接是否正常
3. 音频文件URL是否为有效的直链地址
错误详情:${error.message}`;
}
}
}
// 验证URL是否为有效直链
function validateUrl() {
const url = directAudioUrlInput.value.trim();
const resultDiv = urlValidationResult;
resultDiv.style.display = 'none'; // 隐藏之前的结果
if (!url) {
resultDiv.textContent = '请先输入URL';
resultDiv.style.display = 'block';
return;
}
if (url.startsWith('http://') || url.startsWith('https://')) {
resultDiv.textContent = '✅ 这是一个有效的直链URL!';
resultDiv.style.color = 'green';
resultDiv.style.fontWeight = 'bold';
resultDiv.style.display = 'block';
} else {
resultDiv.textContent = '❌ 这不是一个有效的直链URL。请确保它以 http:// 或 https:// 开头。';
resultDiv.style.color = 'red';
resultDiv.style.fontWeight = 'bold';
resultDiv.style.display = 'block';
}
}
// 处理文件上传
audioFileInput.addEventListener('change', async (event) => {
const file = event.target.files[0];
if (file) {
console.log('选择的文件:', file.name, file.type, file.size);
// 检查文件类型
if (!file.type.startsWith('audio/')) {
showStatus('❌ 请选择音频文件', 'error');
return;
}
// 检查文件大小(限制为50MB)
if (file.size > 50 * 1024 * 1024) {
showStatus('❌ 文件过大,请选择小于50MB的音频文件', 'error');
return;
}
currentAudioBlob = file;
// 显示音频播放器
const audioUrl = URL.createObjectURL(file);
audioPlayer.src = audioUrl;
audioPlayer.style.display = 'block';
// 显示开始转换按钮
updateConvertButtonState(); // 更新按钮状态
}
});
// 事件监听器
convertBtn.addEventListener('click', () => {
console.log('点击转换按钮');
convertToText();
});
exportBtn.addEventListener('click', () => {
console.log('点击导出按钮');
exportToTxt();
});
// toggleView.addEventListener('click', toggleViewMode); // 移除视图切换按钮监听器
apiProvider.addEventListener('change', switchApiProvider);
audioUrlMethod.addEventListener('change', switchAudioUrlMethod);
directAudioUrlInput.addEventListener('input', updateConvertButtonState); // 添加URL输入监听器
validateUrlBtn.addEventListener('click', validateUrl); // 添加URL验证按钮监听器
// 页面加载完成后的初始化
document.addEventListener('DOMContentLoaded', () => {
console.log('页面加载完成');
showStatus('🔧 请选择API服务商并配置相关信息。', 'info');
// 初始化视图
// toggleView.style.display = 'none'; // 移除视图切换按钮
convertBtn.style.display = 'none';
exportBtn.style.display = 'none';
switchApiProvider(); // 初始化API配置显示
// 为选择的输入框添加监听器以实时更新按钮状态
document.getElementById('apiKey').addEventListener('input', updateConvertButtonState);
document.getElementById('volcAppKey').addEventListener('input', updateConvertButtonState);
document.getElementById('volcAccessKey').addEventListener('input', updateConvertButtonState);
});
</script>
</body>
</html>
分类: 未分类
-
录音转文字
-
Wan2.1工作流
链接: https://pan.baidu.com/s/1JTKPfhSXmGbpjDOg-HheDg 提取码: 842q 复制这段内容后打开百度网盘手机App,操作更方便哦
–来自百度网盘超级会员v9的分享 -
说大事专用
今日无大事。