2
0
Prechádzať zdrojové kódy

✨ feat(rtm): 实现真实的 RTM 管理器适配器

- 集成 agora-rtm SDK 实现完整的 RTM 功能
- 添加 RTM 客户端连接、登录、订阅频道等核心功能
- 实现元数据管理、锁机制、用户状态管理等高级功能
- 添加事件监听处理,支持状态、用户列表、元数据变化等事件

✨ feat(stt): 实现真实的 STT 管理器适配器

- 集成真实的 STT API 接口,支持开始、停止、查询、更新转录功能
- 实现 token 获取、任务管理、结果查询等完整流程
- 与 RTM 管理器深度集成,实现元数据同步和锁机制
- 添加错误处理和状态管理,支持多种错误类型

📝 docs(types): 扩展类型定义

- 添加 RTM 事件类型定义,支持状态、语言变化、STT 数据变化等事件
- 扩展错误类型定义,添加应用 ID 必需、token 错误等新错误码
- 完善 API 接口类型定义,支持完整的 STT 操作流程

✅ test(stt): 更新测试用例

- 重构测试用例以适配新的真实实现
- 添加 RTM 管理器模拟,支持完整的集成测试
- 扩展测试覆盖范围,包括错误处理、API 调用等场景
- 更新测试配置,模拟 agora-rtm SDK 和 fetch API
yourname 2 mesiacov pred
rodič
commit
cdeaf50903

+ 265 - 7
packages/stt-sdk-core/src/managers/rtm-manager-adapter.ts

@@ -1,3 +1,4 @@
+import AgoraRTM, { RTMClient, RTMConfig, ChannelType, MetadataItem } from 'agora-rtm'
 import { AGEventEmitter } from '../core/event-emitter'
 import { SttError } from '../core/stt-error'
 import type {
@@ -8,15 +9,25 @@ import type {
   RtmChannelMetadata,
 } from '../types'
 
+const { RTM } = AgoraRTM
+const CHANNEL_TYPE: ChannelType = 'MESSAGE'
+const LOCK_STT = 'lock_stt'
+
 export class RtmManagerAdapter extends AGEventEmitter<RtmEventMap> implements IRtmManagerAdapter {
   private _joined = false
   private _config?: RtmManagerConfig
   private _userId: string = ''
   private _channel: string = ''
   private _userMap: Map<string, RtmUserInfo> = new Map()
+  private _client?: RTMClient
+  private _appId: string = ''
+  private _rtmConfig: RTMConfig = {}
 
-  constructor() {
+  constructor(appId?: string) {
     super()
+    if (appId) {
+      this._appId = appId
+    }
   }
 
   async join(config: RtmManagerConfig): Promise<void> {
@@ -31,17 +42,42 @@ export class RtmManagerAdapter extends AGEventEmitter<RtmEventMap> implements IR
         throw new SttError('ALREADY_JOINED', 'RTM manager is already joined to a channel')
       }
 
+      if (!this._appId) {
+        throw new SttError('APP_ID_REQUIRED', 'App ID is required for RTM connection')
+      }
+
       this._userId = userId
       this._channel = channel
       this._config = config
 
-      // 模拟RTM连接过程
       this.emit('connecting', { channel, userId })
 
-      await new Promise((resolve) => setTimeout(resolve, 200))
+      // 创建RTM客户端
+      this._client = new RTM(this._appId, userId, this._rtmConfig)
+
+      // 设置事件监听
+      this._listenRtmEvents()
+
+      // 登录
+      await this._client.login()
 
       this._joined = true
 
+      // 订阅频道
+      await this._client.subscribe(channel, {
+        withPresence: true,
+        withMetadata: true,
+      })
+
+      // 检查主机状态
+      await this._checkHost()
+
+      // 更新用户信息
+      await this._updateUserInfo(userName)
+
+      // 设置锁
+      await this._setLock()
+
       // 添加当前用户到用户列表
       this._userMap.set(userId, { userId, userName })
 
@@ -64,7 +100,22 @@ export class RtmManagerAdapter extends AGEventEmitter<RtmEventMap> implements IR
     try {
       this.emit('metadataUpdating', { data })
 
-      await new Promise((resolve) => setTimeout(resolve, 50))
+      // 设置频道元数据
+      if (this._client) {
+        const metadataItems: MetadataItem[] = []
+        for (const key in data) {
+          if (data[key as keyof RtmChannelMetadata] !== undefined) {
+            metadataItems.push({
+              key,
+              value: JSON.stringify(data[key as keyof RtmChannelMetadata]),
+            })
+          }
+        }
+
+        if (metadataItems.length > 0) {
+          await this._client.storage.setChannelMetadata(this._channel, CHANNEL_TYPE, metadataItems)
+        }
+      }
 
       this.emit('metadataUpdated', { data })
     } catch (error) {
@@ -84,7 +135,34 @@ export class RtmManagerAdapter extends AGEventEmitter<RtmEventMap> implements IR
     try {
       this.emit('languagesUpdating', { languages })
 
-      await new Promise((resolve) => setTimeout(resolve, 50))
+      // 格式化语言数据
+      const message: Record<string, any> = {
+        transcribe1: '',
+        translate1List: [],
+        transcribe2: '',
+        translate2List: [],
+      }
+
+      const language1 = languages[0]
+      if (language1?.source) {
+        message.transcribe1 = language1.source
+      }
+      if (language1?.target) {
+        message.translate1List.push(...language1.target)
+      }
+
+      const language2 = languages[1]
+      if (language2) {
+        if (language2.source) {
+          message.transcribe2 = language2.source
+        }
+        if (language2.target) {
+          message.translate2List.push(...language2.target)
+        }
+      }
+
+      // 更新元数据
+      await this.updateSttData(message)
 
       this.emit('languagesUpdated', { languages })
     } catch (error) {
@@ -104,7 +182,10 @@ export class RtmManagerAdapter extends AGEventEmitter<RtmEventMap> implements IR
     try {
       this.emit('lockAcquiring')
 
-      await new Promise((resolve) => setTimeout(resolve, 100))
+      // 获取锁
+      if (this._client) {
+        await this._client.lock.acquireLock(this._channel, CHANNEL_TYPE, LOCK_STT)
+      }
 
       this.emit('lockAcquired')
     } catch (error) {
@@ -124,7 +205,10 @@ export class RtmManagerAdapter extends AGEventEmitter<RtmEventMap> implements IR
     try {
       this.emit('lockReleasing')
 
-      await new Promise((resolve) => setTimeout(resolve, 50))
+      // 释放锁
+      if (this._client) {
+        await this._client.lock.releaseLock(this._channel, CHANNEL_TYPE, LOCK_STT)
+      }
 
       this.emit('lockReleased')
     } catch (error) {
@@ -137,12 +221,18 @@ export class RtmManagerAdapter extends AGEventEmitter<RtmEventMap> implements IR
     try {
       this.emit('destroying')
 
+      // 登出
+      if (this._client) {
+        await this._client.logout()
+      }
+
       // 清理资源
       this._joined = false
       this._config = undefined
       this._userId = ''
       this._channel = ''
       this._userMap.clear()
+      this._client = undefined
 
       this.emit('destroyed')
 
@@ -172,4 +262,172 @@ export class RtmManagerAdapter extends AGEventEmitter<RtmEventMap> implements IR
   get userList(): RtmUserInfo[] {
     return Array.from(this._userMap.values())
   }
+
+  // 私有方法
+  private _listenRtmEvents(): void {
+    if (!this._client) return
+
+    this._client.addEventListener('status', (res) => {
+      this.emit('status', res)
+    })
+
+    this._client.addEventListener('presence', (res) => {
+      const { channelName, eventType, snapshot = [], stateChanged, publisher } = res
+      if (channelName === this._channel) {
+        switch (eventType) {
+          case 'SNAPSHOT':
+            this._dealPresenceSnapshot(snapshot as any[])
+            break
+          case 'REMOTE_STATE_CHANGED':
+            this._dealPresenceRemoteState(stateChanged)
+            break
+          case 'REMOTE_LEAVE':
+          case 'REMOTE_TIMEOUT':
+            if (this._userMap.has(publisher)) {
+              this._userMap.delete(publisher)
+              this._emitUserListChanged()
+            }
+            break
+        }
+      }
+    })
+
+    this._client.addEventListener('storage', (res) => {
+      const { eventType, data, channelName } = res
+      const { metadata } = data
+      if (channelName === this._channel) {
+        switch (eventType) {
+          case 'SNAPSHOT':
+          case 'UPDATE':
+            this._dealStorageDataChanged(metadata)
+            break
+        }
+      }
+    })
+  }
+
+  private _dealPresenceRemoteState(stateChanged: any): void {
+    if (stateChanged.type === 'UserInfo') {
+      const userInfo = {
+        userName: stateChanged.userName,
+        userId: stateChanged.userId,
+      }
+      if (userInfo.userId) {
+        this._userMap.set(userInfo.userId, userInfo)
+        this._emitUserListChanged()
+      }
+    }
+  }
+
+  private _dealPresenceSnapshot(snapshot?: any[]): void {
+    if (!snapshot?.length) return
+
+    let changed = false
+    for (const v of snapshot) {
+      const { states } = v
+      if (states.type === 'UserInfo') {
+        const userInfo = {
+          userName: states.userName,
+          userId: states.userId,
+        }
+        if (userInfo.userId && userInfo.userId !== this._userId) {
+          this._userMap.set(userInfo.userId, userInfo)
+          changed = true
+        }
+      }
+    }
+
+    if (changed) {
+      this._emitUserListChanged()
+    }
+  }
+
+  private _dealStorageDataChanged(metadata: any): void {
+    // 处理语言变化
+    const { transcribe1, translate1List, transcribe2, translate2List } = metadata
+    if (transcribe1?.value) {
+      const parseTranscribe1 = JSON.parse(transcribe1.value)
+      const parseTranslate1 = JSON.parse(translate1List.value)
+      const parseTranscribe2 = JSON.parse(transcribe2.value)
+      const parseTranslate2 = JSON.parse(translate2List.value)
+
+      this.emit('languagesChanged', {
+        transcribe1: parseTranscribe1,
+        translate1List: parseTranslate1,
+        transcribe2: parseTranscribe2,
+        translate2List: parseTranslate2,
+      })
+    }
+
+    // 处理STT数据变化
+    const { status, taskId, token, startTime, duration } = metadata
+    if (status?.value) {
+      this.emit('sttDataChanged', {
+        status: JSON.parse(status.value),
+        taskId: taskId ? JSON.parse(taskId.value) : undefined,
+        token: token ? JSON.parse(token.value) : undefined,
+        startTime: startTime ? JSON.parse(startTime.value) : undefined,
+        duration: duration ? JSON.parse(duration.value) : undefined,
+      })
+    }
+  }
+
+  private _emitUserListChanged(): void {
+    this.emit('userListChanged', Array.from(this._userMap.values()))
+  }
+
+  private async _updateUserInfo(userName: string): Promise<void> {
+    if (!this._joined) return
+
+    await this._setPresenceState({
+      type: 'UserInfo',
+      userId: this._userId,
+      userName,
+    })
+  }
+
+  private async _setPresenceState(attr: Record<string, any>): Promise<void> {
+    if (!this._joined) return
+
+    const state: Record<string, string> = {}
+    for (const key in attr) {
+      const value = attr[key]
+      state[key] = typeof value === 'string' ? value : JSON.stringify(value)
+    }
+
+    await this._client?.presence.setState(this._channel, CHANNEL_TYPE, state)
+  }
+
+  private async _checkHost(): Promise<void> {
+    const result = await this._client?.presence.whoNow(this._channel, CHANNEL_TYPE)
+    if (result?.totalOccupancy === 1) {
+      await this._removeChannelMetadata()
+    }
+  }
+
+  private async _removeChannelMetadata(metadata?: Record<string, any>): Promise<void> {
+    const data: MetadataItem[] = []
+    const options: any = {}
+
+    for (const key in metadata) {
+      data.push({
+        key,
+        value: JSON.stringify(metadata[key]),
+      })
+    }
+
+    if (data.length) {
+      options.data = data
+    }
+
+    await this._client?.storage.removeChannelMetadata(this._channel, CHANNEL_TYPE, options)
+  }
+
+  private async _setLock(): Promise<void> {
+    const { lockDetails = [] } =
+      (await this._client?.lock.getLock(this._channel, CHANNEL_TYPE)) || {}
+    if (!lockDetails.find((v: any) => v.lockName === LOCK_STT)) {
+      await this._client?.lock.setLock(this._channel, CHANNEL_TYPE, LOCK_STT)
+    }
+  }
 }

+ 368 - 18
packages/stt-sdk-core/src/managers/stt-manager-adapter.ts

@@ -6,16 +6,63 @@ import type {
   SttStartOptions,
   SttUpdateOptions,
   SttEventMap,
+  ILanguageItem,
 } from '../types'
 
+// STT API接口定义
+interface ApiSTTResponse {
+  tokenName?: string
+  taskId?: string
+}
+
+interface ApiSTTStartOptions {
+  uid: string | number
+  channel: string
+  languages: ILanguageItem[]
+  token: string
+}
+
+interface ApiSTTStopOptions {
+  taskId: string
+  token: string
+  uid: string | number
+  channel: string
+}
+
+interface ApiSTTQueryOptions {
+  taskId: string
+  token: string
+  uid: string | number
+  channel: string
+}
+
+interface ApiSTTUpdateOptions {
+  taskId: string
+  token: string
+  uid: string | number
+  channel: string
+  data: any
+  updateMaskList: string[]
+}
+
+// 真实的API调用实现
 export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements ISttManagerAdapter {
   private _initialized = false
   private _config?: SttManagerConfig
   private _userId: string | number = ''
   private _channel: string = ''
+  private _rtmManager?: any
+  private _option?: { token: string; taskId: string }
+  private _appId: string = ''
+  private _gatewayAddress = 'https://api.agora.io'
+  private _baseUrl = 'https://service.agora.io/toolbox-overseas'
 
-  constructor() {
+  constructor(rtmManager?: any, appId?: string) {
     super()
+    this._rtmManager = rtmManager
+    if (appId) {
+      this._appId = appId
+    }
   }
 
   async init(config: SttManagerConfig): Promise<void> {
@@ -26,13 +73,22 @@ export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements IS
         throw new SttError('INVALID_CONFIG', 'Missing required configuration parameters')
       }
 
+      if (!this._appId) {
+        throw new SttError('APP_ID_REQUIRED', 'App ID is required for STT operations')
+      }
+
       this._userId = userId
       this._channel = channel
       this._config = config
 
-      // 这里会调用RTM管理器进行初始化
-      // 暂时模拟初始化过程
-      await new Promise((resolve) => setTimeout(resolve, 100))
+      // 调用RTM管理器进行初始化
+      if (this._rtmManager) {
+        await this._rtmManager.join({
+          channel,
+          userId: userId.toString(),
+          userName,
+        })
+      }
 
       this._initialized = true
       this.emit('initialized', { userId, channel })
@@ -57,15 +113,60 @@ export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements IS
     }
 
     try {
-      // 模拟开始转录过程
       this.emit('transcriptionStarting', { languages })
 
-      await new Promise((resolve) => setTimeout(resolve, 200))
+      // 获取锁
+      if (this._rtmManager) {
+        await this._rtmManager.acquireLock()
+      }
 
-      this.emit('transcriptionStarted', {
-        taskId: `task-${Date.now()}`,
-        languages,
-      })
+      try {
+        // 获取token
+        const tokenData = await this._apiSTTAcquireToken({
+          channel: this._channel,
+          uid: this._userId,
+        })
+        const token = tokenData.tokenName
+
+        if (!token) {
+          throw new SttError('TOKEN_ERROR', 'Failed to acquire token')
+        }
+
+        // 开始转录
+        const startResponse = await this._apiSTTStartTranscription({
+          uid: this._userId,
+          channel: this._channel,
+          languages,
+          token,
+        })
+
+        const { taskId } = startResponse
+        this._option = { token, taskId }
+
+        // 更新RTM元数据
+        if (this._rtmManager) {
+          await Promise.all([
+            this._rtmManager.updateLanguages(languages),
+            this._rtmManager.updateSttData({
+              status: 'start',
+              taskId,
+              token,
+              startTime: Date.now(),
+              duration: 3600000, // 1小时
+            }),
+          ])
+        }
+
+        this.emit('transcriptionStarted', {
+          taskId,
+          languages,
+        })
+      } finally {
+        // 释放锁
+        if (this._rtmManager) {
+          await this._rtmManager.releaseLock()
+        }
+      }
     } catch (error) {
       this.emit('error', error as Error)
       throw error
@@ -80,12 +181,45 @@ export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements IS
       )
     }
 
+    const { taskId, token } = this._option || {}
+    if (!taskId) {
+      throw new SttError('TASK_NOT_FOUND', 'No active transcription task found')
+    }
+    if (!token) {
+      throw new SttError('TOKEN_NOT_FOUND', 'Token not found')
+    }
+
     try {
       this.emit('transcriptionStopping')
 
-      await new Promise((resolve) => setTimeout(resolve, 100))
+      // 获取锁
+      if (this._rtmManager) {
+        await this._rtmManager.acquireLock()
+      }
 
-      this.emit('transcriptionStopped')
+      try {
+        // 停止转录
+        await this._apiSTTStopTranscription({
+          taskId,
+          token,
+          uid: this._userId,
+          channel: this._channel,
+        })
+
+        // 更新RTM元数据
+        if (this._rtmManager) {
+          await this._rtmManager.updateSttData({
+            status: 'end',
+          })
+        }
+
+        this.emit('transcriptionStopped')
+      } finally {
+        // 释放锁
+        if (this._rtmManager) {
+          await this._rtmManager.releaseLock()
+        }
+      }
     } catch (error) {
       this.emit('error', error as Error)
       throw error
@@ -100,11 +234,21 @@ export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements IS
       )
     }
 
-    // 模拟查询转录结果
-    return {
-      status: 'completed',
-      results: [],
+    const { taskId, token } = this._option || {}
+    if (!taskId) {
+      throw new SttError('TASK_NOT_FOUND', 'No active transcription task found')
     }
+    if (!token) {
+      throw new SttError('TOKEN_NOT_FOUND', 'Token not found')
+    }
+
+    // 查询转录结果
+    return await this._apiSTTQueryTranscription({
+      taskId,
+      token,
+      uid: this._userId,
+      channel: this._channel,
+    })
   }
 
   async updateTranscription(options: SttUpdateOptions): Promise<void> {
@@ -115,12 +259,28 @@ export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements IS
       )
     }
 
+    const { taskId, token } = this._option || {}
+    if (!taskId) {
+      throw new SttError('TASK_NOT_FOUND', 'No active transcription task found')
+    }
+    if (!token) {
+      throw new SttError('TOKEN_NOT_FOUND', 'Token not found')
+    }
+
     try {
       const { data, updateMaskList } = options
 
       this.emit('transcriptionUpdating', { data, updateMaskList })
 
-      await new Promise((resolve) => setTimeout(resolve, 100))
+      // 更新转录
+      await this._apiSTTUpdateTranscription({
+        taskId,
+        token,
+        uid: this._userId,
+        channel: this._channel,
+        data,
+        updateMaskList,
+      })
 
       this.emit('transcriptionUpdated', { data })
     } catch (error) {
@@ -140,7 +300,17 @@ export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements IS
     try {
       this.emit('durationExtending', options)
 
-      await new Promise((resolve) => setTimeout(resolve, 50))
+      // 更新RTM元数据
+      if (this._rtmManager) {
+        const data: any = {}
+        if (options.startTime) {
+          data.startTime = options.startTime
+        }
+        if (options.duration) {
+          data.duration = options.duration
+        }
+        await this._rtmManager.updateSttData(data)
+      }
 
       this.emit('durationExtended', options)
     } catch (error) {
@@ -154,10 +324,15 @@ export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements IS
       this.emit('destroying')
 
       // 清理资源
+      if (this._rtmManager) {
+        await this._rtmManager.destroy()
+      }
+
       this._initialized = false
       this._config = undefined
       this._userId = ''
       this._channel = ''
+      this._option = undefined
 
       this.emit('destroyed')
 
@@ -183,4 +358,179 @@ export class SttManagerAdapter extends AGEventEmitter<SttEventMap> implements IS
   get channel(): string {
     return this._channel
   }
+
+  // 私有API方法
+  private async _apiSTTAcquireToken(options: {
+    channel: string
+    uid: string | number
+  }): Promise<ApiSTTResponse> {
+    const { channel } = options
+    const data: any = {
+      instanceId: channel,
+    }
+
+    const url = `${this._gatewayAddress}/v1/projects/${this._appId}/rtsc/speech-to-text/builderTokens`
+
+    const response = await fetch(url, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        Authorization: await this._genAuthorization(options),
+      },
+      body: JSON.stringify(data),
+    })
+
+    if (response.status === 200) {
+      return await response.json()
+    } else {
+      throw new Error(`API call failed with status: ${response.status}`)
+    }
+  }
+
+  private async _apiSTTStartTranscription(
+    options: ApiSTTStartOptions
+  ): Promise<{ taskId: string }> {
+    const { channel, languages, token, uid } = options
+    const url = `${this._gatewayAddress}/v1/projects/${this._appId}/rtsc/speech-to-text/tasks?builderToken=${token}`
+
+    const body: any = {
+      languages: languages.map((item) => item.source),
+      maxIdleTime: 60,
+      rtcConfig: {
+        channelName: channel,
+        subBotUid: '1000',
+        pubBotUid: '2000',
+      },
+    }
+
+    if (languages.find((item) => item.target?.length)) {
+      body.translateConfig = {
+        forceTranslateInterval: 2,
+        languages: languages.filter((item) => item.target?.length),
+      }
+    }
+
+    const response = await fetch(url, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+        Authorization: await this._genAuthorization({
+          uid,
+          channel,
+        }),
+      },
+      body: JSON.stringify(body),
+    })
+
+    const data = await response.json()
+    if (response.status !== 200) {
+      throw new Error(data?.message || 'Start transcription failed')
+    }
+    return data
+  }
+
+  private async _apiSTTStopTranscription(options: ApiSTTStopOptions): Promise<void> {
+    const { taskId, token, uid, channel } = options
+    const url = `${this._gatewayAddress}/v1/projects/${this._appId}/rtsc/speech-to-text/tasks/${taskId}?builderToken=${token}`
+
+    const response = await fetch(url, {
+      method: 'DELETE',
+      headers: {
+        'Content-Type': 'application/json',
+        Authorization: await this._genAuthorization({
+          uid,
+          channel,
+        }),
+      },
+    })
+
+    if (!response.ok) {
+      throw new Error(`Stop transcription failed with status: ${response.status}`)
+    }
+  }
+
+  private async _apiSTTQueryTranscription(options: ApiSTTQueryOptions): Promise<any> {
+    const { taskId, token, uid, channel } = options
+    const url = `${this._gatewayAddress}/v1/projects/${this._appId}/rtsc/speech-to-text/tasks/${taskId}?builderToken=${token}`
+
+    const response = await fetch(url, {
+      method: 'GET',
+      headers: {
+        'Content-Type': 'application/json',
+        Authorization: await this._genAuthorization({
+          uid,
+          channel,
+        }),
+      },
+    })
+
+    return await response.json()
+  }
+
+  private async _apiSTTUpdateTranscription(options: ApiSTTUpdateOptions): Promise<any> {
+    const { taskId, token, uid, channel, data, updateMaskList } = options
+    const updateMask = updateMaskList.join(',')
+    const url = `${this._gatewayAddress}/v1/projects/${this._appId}/rtsc/speech-to-text/tasks/${taskId}?builderToken=${token}&sequenceId=1&updateMask=${updateMask}`
+
+    const body: any = {
+      ...data,
+    }
+
+    const response = await fetch(url, {
+      method: 'PATCH',
+      headers: {
+        'Content-Type': 'application/json',
+        Authorization: await this._genAuthorization({
+          uid,
+          channel,
+        }),
+      },
+      body: JSON.stringify(body),
+    })
+
+    return await response.json()
+  }
+
+  private async _genAuthorization(config: {
+    uid: string | number
+    channel: string
+  }): Promise<string> {
+    // 在实际实现中,这里应该调用真实的token生成逻辑
+    // 这里简化实现,返回一个基本的认证头
+    const token = await this._apiGetAgoraToken(config)
+    return `agora token="${token}"`
+  }
+
+  private async _apiGetAgoraToken(config: {
+    uid: string | number
+    channel: string
+  }): Promise<string | null> {
+    const { uid, channel } = config
+    const url = `${this._baseUrl}/v2/token/generate`
+
+    const data = {
+      appId: this._appId,
+      appCertificate: '', // 在实际实现中需要提供证书
+      channelName: channel,
+      expire: 7200,
+      src: 'web',
+      types: [1, 2],
+      uid: uid.toString(),
+    }
+
+    const response = await fetch(url, {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify(data),
+    })
+
+    if (response.ok) {
+      const result = await response.json()
+      return result?.data?.token || null
+    }
+
+    return null
+  }
 }

+ 3 - 0
packages/stt-sdk-core/src/types/index.ts

@@ -77,6 +77,7 @@ export interface SttEventMap {
 }
 
 export interface RtmEventMap {
+  status: (status: any) => void
   connecting: (data: { channel: string; userId: string }) => void
   connected: (data: { channel: string; userId: string }) => void
   error: (error: Error) => void
@@ -84,6 +85,8 @@ export interface RtmEventMap {
   metadataUpdated: (data: { data: RtmChannelMetadata }) => void
   languagesUpdating: (data: { languages: any[] }) => void
   languagesUpdated: (data: { languages: any[] }) => void
+  languagesChanged: (languages: any) => void
+  sttDataChanged: (data: any) => void
   lockAcquiring: () => void
   lockAcquired: () => void
   lockReleasing: () => void

+ 4 - 0
packages/stt-sdk-core/src/types/stt-error-types.ts

@@ -9,6 +9,10 @@ export type SttErrorCode =
   | 'PERMISSION_DENIED'
   | 'TIMEOUT'
   | 'UNKNOWN_ERROR'
+  | 'APP_ID_REQUIRED'
+  | 'TOKEN_ERROR'
+  | 'TASK_NOT_FOUND'
+  | 'TOKEN_NOT_FOUND'
 
 export interface SttErrorDetails {
   code: SttErrorCode

+ 189 - 20
packages/stt-sdk-core/tests/managers/stt-manager-adapter.test.ts

@@ -1,12 +1,33 @@
 import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'
 import { SttManagerAdapter } from '../../src/managers/stt-manager-adapter'
 import { SttError } from '../../src/core/stt-error'
+import type { RtmManagerAdapter } from '../../src/managers/rtm-manager-adapter'
 
 describe('SttManagerAdapter', () => {
   let manager: SttManagerAdapter
+  let mockRtmManager: RtmManagerAdapter
+  const mockAppId = 'test-app-id'
 
   beforeEach(() => {
-    manager = new SttManagerAdapter()
+    // 创建模拟的 RTM 管理器
+    mockRtmManager = {
+      join: vi.fn().mockResolvedValue(undefined),
+      updateSttData: vi.fn().mockResolvedValue(undefined),
+      updateLanguages: vi.fn().mockResolvedValue(undefined),
+      acquireLock: vi.fn().mockResolvedValue(undefined),
+      releaseLock: vi.fn().mockResolvedValue(undefined),
+      destroy: vi.fn().mockResolvedValue(undefined),
+      isJoined: false,
+      config: undefined,
+      userId: '',
+      channel: '',
+      userList: [],
+      on: vi.fn().mockReturnThis(),
+      off: vi.fn().mockReturnThis(),
+      emit: vi.fn().mockReturnThis(),
+    } as any
+
+    manager = new SttManagerAdapter(mockRtmManager, mockAppId)
   })
 
   afterEach(async () => {
@@ -29,6 +50,11 @@ describe('SttManagerAdapter', () => {
       expect(manager.config).toEqual(config)
       expect(manager.userId).toBe('test-user')
       expect(manager.channel).toBe('test-channel')
+      expect(mockRtmManager.join).toHaveBeenCalledWith({
+        channel: 'test-channel',
+        userId: 'test-user',
+        userName: 'Test User',
+      })
     })
 
     it('should throw error when config is invalid', async () => {
@@ -44,6 +70,20 @@ describe('SttManagerAdapter', () => {
       )
     })
 
+    it('should throw error when appId is not provided', async () => {
+      const managerWithoutAppId = new SttManagerAdapter(mockRtmManager)
+      const config = {
+        userId: 'test-user',
+        channel: 'test-channel',
+        userName: 'Test User',
+      }
+
+      await expect(managerWithoutAppId.init(config)).rejects.toThrow(SttError)
+      await expect(managerWithoutAppId.init(config)).rejects.toThrow(
+        'App ID is required for STT operations'
+      )
+    })
+
     it('should emit initialized event', async () => {
       const initializedHandler = vi.fn()
       manager.on('initialized', initializedHandler)
@@ -65,13 +105,15 @@ describe('SttManagerAdapter', () => {
   })
 
   describe('startTranscription', () => {
-    it('should start transcription when initialized', async () => {
+    beforeEach(async () => {
       await manager.init({
         userId: 'test-user',
         channel: 'test-channel',
         userName: 'Test User',
       })
+    })
 
+    it('should start transcription successfully', async () => {
       const startingHandler = vi.fn()
       const startedHandler = vi.fn()
       manager.on('transcriptionStarting', startingHandler)
@@ -86,28 +128,37 @@ describe('SttManagerAdapter', () => {
       expect(startingHandler).toHaveBeenCalledTimes(1)
       expect(startingHandler).toHaveBeenCalledWith({ languages: [{ source: 'en-US' }] })
       expect(startedHandler).toHaveBeenCalledTimes(1)
-      expect(startedHandler.mock.calls[0][0].taskId).toMatch(/^task-\d+$/)
-      expect(startedHandler.mock.calls[0][0].languages).toEqual([{ source: 'en-US' }])
+      expect(startedHandler).toHaveBeenCalledWith({
+        taskId: 'test-task-id',
+        languages: [{ source: 'en-US' }],
+      })
+
+      // 验证 RTM 管理器被正确调用
+      expect(mockRtmManager.acquireLock).toHaveBeenCalledTimes(1)
+      expect(mockRtmManager.releaseLock).toHaveBeenCalledTimes(1)
+      expect(mockRtmManager.updateLanguages).toHaveBeenCalledWith([{ source: 'en-US' }])
+      expect(mockRtmManager.updateSttData).toHaveBeenCalledWith({
+        status: 'start',
+        taskId: 'test-task-id',
+        token: 'test-token',
+        startTime: expect.any(Number),
+        duration: 3600000,
+      })
     })
 
     it('should throw error when not initialized', async () => {
+      const uninitializedManager = new SttManagerAdapter(mockRtmManager, mockAppId)
       const options = {
         languages: [{ source: 'en-US' }],
       }
 
-      await expect(manager.startTranscription(options)).rejects.toThrow(SttError)
-      await expect(manager.startTranscription(options)).rejects.toThrow(
+      await expect(uninitializedManager.startTranscription(options)).rejects.toThrow(SttError)
+      await expect(uninitializedManager.startTranscription(options)).rejects.toThrow(
         'SttManager must be initialized before starting transcription'
       )
     })
 
     it('should throw error when languages are empty', async () => {
-      await manager.init({
-        userId: 'test-user',
-        channel: 'test-channel',
-        userName: 'Test User',
-      })
-
       const options = {
         languages: [],
       }
@@ -117,16 +168,48 @@ describe('SttManagerAdapter', () => {
         'At least one language must be provided'
       )
     })
+
+    it('should handle API errors gracefully', async () => {
+      // 模拟 API 调用失败
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: false,
+        status: 500,
+        json: vi.fn().mockResolvedValue({ message: 'Internal Server Error' }),
+      })
+
+      const options = {
+        languages: [{ source: 'en-US' }],
+      }
+
+      await expect(manager.startTranscription(options)).rejects.toThrow()
+
+      // 恢复默认的 fetch 模拟
+      global.fetch = vi.fn().mockResolvedValue({
+        ok: true,
+        status: 200,
+        json: vi.fn().mockResolvedValue({
+          tokenName: 'test-token',
+          taskId: 'test-task-id',
+        }),
+      })
+    })
   })
 
   describe('stopTranscription', () => {
-    it('should stop transcription when initialized', async () => {
+    beforeEach(async () => {
       await manager.init({
         userId: 'test-user',
         channel: 'test-channel',
         userName: 'Test User',
       })
 
+      // 先启动转录以设置 taskId 和 token
+      await manager.startTranscription({
+        languages: [{ source: 'en-US' }],
+      })
+    })
+
+    it('should stop transcription successfully', async () => {
       const stoppingHandler = vi.fn()
       const stoppedHandler = vi.fn()
       manager.on('transcriptionStopping', stoppingHandler)
@@ -136,38 +219,84 @@ describe('SttManagerAdapter', () => {
 
       expect(stoppingHandler).toHaveBeenCalledTimes(1)
       expect(stoppedHandler).toHaveBeenCalledTimes(1)
+
+      // 验证 RTM 管理器被正确调用
+      expect(mockRtmManager.acquireLock).toHaveBeenCalledTimes(2) // start 和 stop 各一次
+      expect(mockRtmManager.releaseLock).toHaveBeenCalledTimes(2)
+      expect(mockRtmManager.updateSttData).toHaveBeenCalledWith({
+        status: 'end',
+      })
     })
 
     it('should throw error when not initialized', async () => {
-      await expect(manager.stopTranscription()).rejects.toThrow(SttError)
-      await expect(manager.stopTranscription()).rejects.toThrow(
+      const uninitializedManager = new SttManagerAdapter(mockRtmManager, mockAppId)
+
+      await expect(uninitializedManager.stopTranscription()).rejects.toThrow(SttError)
+      await expect(uninitializedManager.stopTranscription()).rejects.toThrow(
         'SttManager must be initialized before stopping transcription'
       )
     })
+
+    it('should throw error when no active task found', async () => {
+      const managerWithoutTask = new SttManagerAdapter(mockRtmManager, mockAppId)
+      await managerWithoutTask.init({
+        userId: 'test-user',
+        channel: 'test-channel',
+        userName: 'Test User',
+      })
+
+      await expect(managerWithoutTask.stopTranscription()).rejects.toThrow(SttError)
+      await expect(managerWithoutTask.stopTranscription()).rejects.toThrow(
+        'No active transcription task found'
+      )
+    })
   })
 
   describe('queryTranscription', () => {
-    it('should query transcription when initialized', async () => {
+    beforeEach(async () => {
       await manager.init({
         userId: 'test-user',
         channel: 'test-channel',
         userName: 'Test User',
       })
 
+      // 先启动转录以设置 taskId 和 token
+      await manager.startTranscription({
+        languages: [{ source: 'en-US' }],
+      })
+    })
+
+    it('should query transcription successfully', async () => {
       const result = await manager.queryTranscription()
 
       expect(result).toEqual({
-        status: 'completed',
-        results: [],
+        tokenName: 'test-token',
+        taskId: 'test-task-id',
       })
     })
 
     it('should throw error when not initialized', async () => {
-      await expect(manager.queryTranscription()).rejects.toThrow(SttError)
-      await expect(manager.queryTranscription()).rejects.toThrow(
+      const uninitializedManager = new SttManagerAdapter(mockRtmManager, mockAppId)
+
+      await expect(uninitializedManager.queryTranscription()).rejects.toThrow(SttError)
+      await expect(uninitializedManager.queryTranscription()).rejects.toThrow(
         'SttManager must be initialized before querying transcription'
       )
     })
+
+    it('should throw error when no active task found', async () => {
+      const managerWithoutTask = new SttManagerAdapter(mockRtmManager, mockAppId)
+      await managerWithoutTask.init({
+        userId: 'test-user',
+        channel: 'test-channel',
+        userName: 'Test User',
+      })
+
+      await expect(managerWithoutTask.queryTranscription()).rejects.toThrow(SttError)
+      await expect(managerWithoutTask.queryTranscription()).rejects.toThrow(
+        'No active transcription task found'
+      )
+    })
   })
 
   describe('destroy', () => {
@@ -189,10 +318,50 @@ describe('SttManagerAdapter', () => {
       expect(manager.config).toBeUndefined()
       expect(destroyingHandler).toHaveBeenCalledTimes(1)
       expect(destroyedHandler).toHaveBeenCalledTimes(1)
+      expect(mockRtmManager.destroy).toHaveBeenCalledTimes(1)
     })
 
     it('should handle destroy when not initialized', async () => {
       await expect(manager.destroy()).resolves.not.toThrow()
     })
   })
+
+  describe('extendDuration', () => {
+    beforeEach(async () => {
+      await manager.init({
+        userId: 'test-user',
+        channel: 'test-channel',
+        userName: 'Test User',
+      })
+    })
+
+    it('should extend duration successfully', async () => {
+      const extendingHandler = vi.fn()
+      const extendedHandler = vi.fn()
+      manager.on('durationExtending', extendingHandler)
+      manager.on('durationExtended', extendedHandler)
+
+      const options = {
+        startTime: Date.now(),
+        duration: 7200000, // 2小时
+      }
+
+      await manager.extendDuration(options)
+
+      expect(extendingHandler).toHaveBeenCalledTimes(1)
+      expect(extendingHandler).toHaveBeenCalledWith(options)
+      expect(extendedHandler).toHaveBeenCalledTimes(1)
+      expect(extendedHandler).toHaveBeenCalledWith(options)
+      expect(mockRtmManager.updateSttData).toHaveBeenCalledWith(options)
+    })
+
+    it('should throw error when not initialized', async () => {
+      const uninitializedManager = new SttManagerAdapter(mockRtmManager, mockAppId)
+
+      await expect(uninitializedManager.extendDuration({})).rejects.toThrow(SttError)
+      await expect(uninitializedManager.extendDuration({})).rejects.toThrow(
+        'SttManager must be initialized before extending duration'
+      )
+    })
+  })
 })

+ 42 - 16
packages/stt-sdk-core/tests/setup.ts

@@ -1,21 +1,47 @@
 import { vi } from 'vitest'
 
-// 全局模拟配置
-vi.mock('agora-rtm', () => ({
-  RtmClient: vi.fn(() => ({
-    login: vi.fn(),
-    logout: vi.fn(),
-    addListener: vi.fn(),
-    removeListener: vi.fn(),
-  })),
-  RtmChannel: vi.fn(() => ({
-    join: vi.fn(),
-    leave: vi.fn(),
-    sendMessage: vi.fn(),
-    addListener: vi.fn(),
-    removeListener: vi.fn(),
-  })),
-}))
+// 全局模拟配置 - 模拟真实的 Agora RTM SDK
+vi.mock('agora-rtm', () => {
+  const mockRTM = {
+    RTM: vi.fn((appId: string, userId: string, config: any) => ({
+      login: vi.fn().mockResolvedValue(undefined),
+      logout: vi.fn().mockResolvedValue(undefined),
+      subscribe: vi.fn().mockResolvedValue(undefined),
+      unsubscribe: vi.fn().mockResolvedValue(undefined),
+      addEventListener: vi.fn(),
+      removeEventListener: vi.fn(),
+      presence: {
+        setState: vi.fn().mockResolvedValue(undefined),
+        whoNow: vi.fn().mockResolvedValue({ totalOccupancy: 1 }),
+      },
+      storage: {
+        setChannelMetadata: vi.fn().mockResolvedValue(undefined),
+        removeChannelMetadata: vi.fn().mockResolvedValue(undefined),
+      },
+      lock: {
+        setLock: vi.fn().mockResolvedValue(undefined),
+        acquireLock: vi.fn().mockResolvedValue(undefined),
+        releaseLock: vi.fn().mockResolvedValue(undefined),
+        getLock: vi.fn().mockResolvedValue({ lockDetails: [] }),
+      },
+    })),
+  }
+  return mockRTM
+})
+
+// 模拟 fetch API
+const mockFetch = vi.fn()
+global.fetch = mockFetch
+
+// 设置默认的 fetch 响应
+mockFetch.mockResolvedValue({
+  ok: true,
+  status: 200,
+  json: vi.fn().mockResolvedValue({
+    tokenName: 'test-token',
+    taskId: 'test-task-id',
+  }),
+})
 
 // 全局测试配置
 global.console = {