epic-1-stt-sdk-api-spec.md 8.8 KB

STT SDK API 规范

概述

本文档定义了语音转文字数据流SDK的API接口规范,基于现有stt-demo应用的功能进行封装。

核心接口

SDK 初始化接口

interface SttSdkConfig {
  appId: string
  appCertificate?: string
  rtmConfig?: RTMConfig
  sttServiceUrl?: string
}

interface SttSdk {
  // 初始化SDK
  initialize(config: SttSdkConfig): Promise<void>

  // 销毁SDK资源
  destroy(): Promise<void>

  // 获取管理器实例
  getSttManager(): SttManager
  getRtmManager(): RtmManager

  // 创建客户端实例(通用接口)
  createSttClient(options: SttClientOptions): SttClient
  createRtmClient(options: RtmClientOptions): RtmClient
}

STT管理器接口(基于现有SttManager封装)

interface SttManager {
  // 初始化STT管理器
  init(options: { userId: string | number; channel: string; userName: string }): Promise<void>

  // 开始转录
  startTranscription(options: { languages: ILanguageItem[]; autoStart?: boolean }): Promise<void>

  // 停止转录
  stopTranscription(): Promise<void>

  // 查询转录状态
  queryTranscription(): Promise<any>

  // 更新转录配置
  updateTranscription(options: { data: any; updateMaskList: string[] }): Promise<void>

  // 延长转录时长
  extendDuration(options: { startTime?: number; duration?: number }): Promise<void>

  // 销毁管理器
  destroy(): Promise<void>

  // 事件监听
  on(event: "transcriptionStart", callback: () => void): void
  on(event: "transcriptionStop", callback: () => void): void
  on(event: "transcriptionResult", callback: (result: TranscriptionResult) => void): void
  on(event: "error", callback: (error: Error) => void): void
}

RTM管理器接口(基于现有RtmManager封装)

interface RtmManager {
  // 加入频道
  join(options: { userId: string; channel: string; userName: string }): Promise<void>

  // 离开频道
  leave(): Promise<void>

  // 更新STT数据
  updateSttData(data: ISttData): Promise<void>

  // 更新语言配置
  updateLanguages(languages: ILanguageItem[]): Promise<void>

  // 获取锁
  acquireLock(): Promise<void>

  // 释放锁
  releaseLock(): Promise<void>

  // 事件监听
  on(event: "userListChanged", callback: (users: ISimpleUserInfo[]) => void): void
  on(event: "languagesChanged", callback: (languages: ILanguageSelect) => void): void
  on(event: "sttDataChanged", callback: (data: ISttData) => void): void
  on(event: "messageReceived", callback: (message: any) => void): void
}

通用客户端接口

// STT 客户端接口
interface SttClient {
  // 状态
  readonly isInitialized: boolean
  readonly isTranscribing: boolean
  readonly transcriptionResults: TranscriptionResult[]
  readonly currentLanguage: ILanguageItem | null
  readonly error: Error | null

  // 初始化
  initialize(options: SttInitOptions): Promise<void>

  // 转录控制
  startTranscription(options: StartOptions): Promise<void>
  stopTranscription(): Promise<void>
  updateLanguages(languages: ILanguageItem[]): Promise<void>

  // 事件监听
  on(event: "initialized", callback: () => void): void
  on(event: "transcriptionStart", callback: () => void): void
  on(event: "transcriptionStop", callback: () => void): void
  on(event: "transcriptionResult", callback: (result: TranscriptionResult) => void): void
  on(event: "error", callback: (error: Error) => void): void

  // 清理
  destroy(): Promise<void>
}

// RTM 客户端接口
interface RtmClient {
  // 状态
  readonly isConnected: boolean
  readonly users: ISimpleUserInfo[]
  readonly channelLanguages: ILanguageSelect | null
  readonly sttStatus: ISttData | null

  // 连接管理
  joinChannel(options: JoinOptions): Promise<void>
  leaveChannel(): Promise<void>

  // 消息处理
  sendMessage(message: any): Promise<void>

  // 事件监听
  on(event: "connected", callback: () => void): void
  on(event: "disconnected", callback: () => void): void
  on(event: "userListChanged", callback: (users: ISimpleUserInfo[]) => void): void
  on(event: "languagesChanged", callback: (languages: ILanguageSelect) => void): void
  on(event: "sttDataChanged", callback: (data: ISttData) => void): void
  on(event: "messageReceived", callback: (message: any) => void): void

  // 清理
  destroy(): Promise<void>
}

// 客户端配置
interface SttClientOptions {
  userId: string | number
  channel: string
  userName: string
  languages?: ILanguageItem[]
}

interface RtmClientOptions {
  userId: string
  channel: string
  userName: string
}

配置接口

语言配置

interface ILanguageItem {
  source?: string // 转录语言代码
  target?: string[] // 翻译目标语言代码数组
}

interface ILanguageSelect {
  transcribe1: string
  translate1List: string[]
  transcribe2: string
  translate2List: string[]
}

STT数据接口

interface ISttData {
  status?: "start" | "end"
  taskId?: string
  token?: string
  startTime?: number
  duration?: number
}

转录结果接口

interface TranscriptionResult {
  text: string
  language: string
  confidence: number
  timestamp: number
  userId: string
  translations?: {
    [language: string]: string
  }
}

使用示例

基础使用(通用TypeScript)

import { createSttSdk } from "@agora/stt-sdk"

// 初始化SDK
const sdk = createSttSdk()
await sdk.initialize({
  appId: "your-app-id",
  appCertificate: "your-certificate",
})

// 创建STT客户端
const sttClient = sdk.createSttClient({
  userId: "user-123",
  channel: "test-channel",
  userName: "Test User",
  languages: [{ source: "zh-CN", target: ["en-US", "ja-JP"] }],
})

// 监听事件
sttClient.on("transcriptionResult", (result) => {
  console.log("转录结果:", result.text)
  console.log("语言:", result.language)
  console.log("时间:", new Date(result.timestamp).toLocaleTimeString())
})

sttClient.on("error", (error) => {
  console.error("STT错误:", error)
})

// 初始化客户端
await sttClient.initialize()

// 开始转录
await sttClient.startTranscription()

// 停止转录
await sttClient.stopTranscription()

// 清理资源
await sttClient.destroy()
await sdk.destroy()

高级配置和多框架示例

// 自定义配置
const sdk = createSttSdk();
await sdk.initialize({
  appId: 'your-app-id',
  rtmConfig: {
    enableLogUpload: true,
    logFilter: 'debug'
  },
  sttServiceUrl: 'https://custom-stt-service.com'
});

// 直接使用管理器(高级用法)
const sttManager = sdk.getSttManager();
const rtmManager = sdk.getRtmManager();

// 监听事件
sttManager.on('transcriptionResult', (result) => {
  console.log('转录结果:', result);
});

rtmManager.on('userListChanged', (users) => {
  console.log('用户列表更新:', users);
});

// React 适配器示例(可选包)
import { useSttClient } from '@agora/stt-sdk/react';

function TranscriptionComponent() {
  const sttClient = useSttClient({
    userId: 'user-123',
    channel: 'test-channel',
    userName: 'Test User'
  });

  const handleStart = async () => {
    await sttClient.startTranscription({
      languages: [{ source: 'zh-CN', target: ['en-US'] }]
    });
  };

  return (
    <div>
      <button onClick={handleStart} disabled={sttClient.isTranscribing}>
        {sttClient.isTranscribing ? '转录中...' : '开始转录'}
      </button>
      <div>
        {sttClient.transcriptionResults.map((result, index) => (
          <div key={index}>
            <p>{result.text}</p>
          </div>
        ))}
      </div>
    </div>
  );
}

// Vue 适配器示例(可选包)
import { useSttClient } from '@agora/stt-sdk/vue';

export default {
  setup() {
    const sttClient = useSttClient({
      userId: 'user-123',
      channel: 'test-channel',
      userName: 'Test User'
    });

    const handleStart = async () => {
      await sttClient.startTranscription();
    };

    return {
      sttClient,
      handleStart
    };
  }
};

错误处理

// 错误类型定义
enum SttErrorCode {
  INITIALIZATION_FAILED = "INITIALIZATION_FAILED",
  NETWORK_ERROR = "NETWORK_ERROR",
  PERMISSION_DENIED = "PERMISSION_DENIED",
  INVALID_CONFIG = "INVALID_CONFIG",
  TRANSCRIPTION_FAILED = "TRANSCRIPTION_FAILED",
}

interface SttError extends Error {
  code: SttErrorCode
  details?: any
}

// 错误处理示例
try {
  await sdk.initialize(config)
} catch (error) {
  if (error.code === SttErrorCode.INITIALIZATION_FAILED) {
    console.error("SDK初始化失败:", error.details)
  }
}

性能优化建议

  1. 连接复用: SDK内部复用RTM连接,避免重复建立连接
  2. 事件去重: 对高频事件进行适当的去重处理
  3. 内存管理: 及时清理不再使用的转录结果
  4. 错误重试: 对网络错误实现自动重试机制
  5. 懒加载: 按需加载SDK模块,减少初始包大小