Ver Fonte

✨ feat(rtc): add textstream parsing functionality

- add new protobuf definition file SttMessage.js for textstream structure
- implement Parser utility class to handle binary data parsing
- update RtcManagerAdapter to parse Uint8Array streams before emitting textstreamReceived event
- add type definition for ITextstream interface

🐛 fix(rtc): improve stream message handling

- add type check for stream message to ensure it's Uint8Array before parsing
- add error handling and warning logs for invalid stream data
- prevent emitting textstreamReceived event with unparsed raw data
yourname há 2 meses atrás
pai
commit
9d28639998

+ 10 - 1
packages/stt-sdk-core/src/managers/rtc-manager-adapter.ts

@@ -9,6 +9,7 @@ import { AGEventEmitter } from '../core/event-emitter'
 import { SttError } from '../core/stt-error'
 import type { IRtcManagerAdapter, RtcManagerConfig, RtcEventMap } from '../types'
 import { generateAgoraToken } from '../utils/token-utils'
+import { parser } from '../utils/parser'
 
 export class RtcManagerAdapter extends AGEventEmitter<RtcEventMap> implements IRtcManagerAdapter {
   private _joined = false
@@ -209,7 +210,15 @@ export class RtcManagerAdapter extends AGEventEmitter<RtcEventMap> implements IR
     })
 
     this._client.on('stream-message', (uid: UID, stream: any) => {
-      this.emit('textstreamReceived', stream)
+      // 解析二进制数据为文本流
+      if (stream instanceof Uint8Array) {
+        const textstream = parser.parseData(stream)
+        if (textstream) {
+          this.emit('textstreamReceived', textstream)
+        }
+      } else {
+        console.warn('[RtcManagerAdapter] Received non-Uint8Array stream message:', stream)
+      }
     })
   }
 

+ 140 - 0
packages/stt-sdk-core/src/protobuf/SttMessage.js

@@ -0,0 +1,140 @@
+/* eslint-disable block-scoped-var, id-length, no-control-regex, no-magic-numbers, no-prototype-builtins, no-redeclare, no-shadow, no-var, sort-vars */
+import * as $protobuf from 'protobufjs/light'
+
+const $root = ($protobuf.roots.default || ($protobuf.roots.default = new $protobuf.Root()))
+  .setOptions({
+    syntax: 'proto3',
+  })
+  .addJSON({
+    Agora: {
+      nested: {
+        SpeechToText: {
+          options: {
+            objc_class_prefix: 'Stt',
+            csharp_namespace: 'AgoraSTTSample.Protobuf',
+            java_package: 'io.agora.rtc.speech2text',
+            java_outer_classname: 'AgoraSpeech2TextProtobuffer',
+          },
+          nested: {
+            Text: {
+              fields: {
+                vendor: {
+                  type: 'int32',
+                  id: 1,
+                },
+                version: {
+                  type: 'int32',
+                  id: 2,
+                },
+                seqnum: {
+                  type: 'int32',
+                  id: 3,
+                },
+                uid: {
+                  type: 'int64',
+                  id: 4,
+                },
+                flag: {
+                  type: 'int32',
+                  id: 5,
+                },
+                time: {
+                  type: 'int64',
+                  id: 6,
+                },
+                lang: {
+                  type: 'int32',
+                  id: 7,
+                },
+                starttime: {
+                  type: 'int32',
+                  id: 8,
+                },
+                offtime: {
+                  type: 'int32',
+                  id: 9,
+                },
+                words: {
+                  rule: 'repeated',
+                  type: 'Word',
+                  id: 10,
+                },
+                endOfSegment: {
+                  type: 'bool',
+                  id: 11,
+                },
+                durationMs: {
+                  type: 'int32',
+                  id: 12,
+                },
+                dataType: {
+                  type: 'string',
+                  id: 13,
+                },
+                trans: {
+                  rule: 'repeated',
+                  type: 'Translation',
+                  id: 14,
+                },
+                culture: {
+                  type: 'string',
+                  id: 15,
+                },
+                textTs: {
+                  type: 'int64',
+                  id: 16,
+                },
+                sentenceEndIndex: {
+                  type: 'int32',
+                  id: 17,
+                },
+              },
+            },
+            Word: {
+              fields: {
+                text: {
+                  type: 'string',
+                  id: 1,
+                },
+                startMs: {
+                  type: 'int32',
+                  id: 2,
+                },
+                durationMs: {
+                  type: 'int32',
+                  id: 3,
+                },
+                isFinal: {
+                  type: 'bool',
+                  id: 4,
+                },
+                confidence: {
+                  type: 'double',
+                  id: 5,
+                },
+              },
+            },
+            Translation: {
+              fields: {
+                isFinal: {
+                  type: 'bool',
+                  id: 1,
+                },
+                lang: {
+                  type: 'string',
+                  id: 2,
+                },
+                texts: {
+                  rule: 'repeated',
+                  type: 'string',
+                  id: 3,
+                },
+              },
+            },
+          },
+        },
+      },
+    },
+  })
+
+export { $root as default }

+ 14 - 1
packages/stt-sdk-core/src/types/index.ts

@@ -1,3 +1,16 @@
+// 文本流相关类型
+export interface ITextstream {
+  dataType: 'transcribe' | 'translate'
+  culture: string
+  uid: string | number
+  startTextTs: number
+  textTs: number
+  time: number
+  durationMs: number
+  words: any[]
+  trans?: any[]
+}
+
 // 基础类型定义
 export interface ISttManagerAdapter {
   init(config: SttManagerConfig): Promise<void>
@@ -108,7 +121,7 @@ export interface RtcEventMap {
   localUserChanged: (tracks: any) => void
   remoteUserChanged: (user: any) => void
   networkQuality: (quality: any) => void
-  textstreamReceived: (textstream: any) => void
+  textstreamReceived: (textstream: ITextstream) => void
   destroying: () => void
   destroyed: () => void
 }

+ 27 - 0
packages/stt-sdk-core/src/utils/parser.ts

@@ -0,0 +1,27 @@
+import { ITextstream } from '../types'
+// @ts-ignore
+import protoRoot from '../protobuf/SttMessage.js'
+
+// 文本流解析器,用于处理二进制数据
+export class Parser {
+  constructor() {}
+
+  // 解析二进制数据为文本流对象
+  parseData(data: Uint8Array): ITextstream | null {
+    try {
+      // @ts-ignore
+      const textstream = protoRoot.Agora.SpeechToText.lookup('Text').decode(data) as ITextstream
+      if (!textstream) {
+        console.warn('[SDK Parser] Parse data failed.')
+        return null
+      }
+      console.log('[SDK Parser] textstream parseData', textstream)
+      return textstream
+    } catch (error) {
+      console.error('[SDK Parser] Parse error:', error)
+      return null
+    }
+  }
+}
+
+export const parser = new Parser()