Bläddra i källkod

讯飞asr测试,debug

Davidliu 1 månad sedan
förälder
incheckning
6a7fe9ddef
3 ändrade filer med 106 tillägg och 39 borttagningar
  1. 13 6
      src/core/callcenter/test.py
  2. 75 22
      src/core/voip/asr.py
  3. 18 11
      src/core/voip/bot.py

+ 13 - 6
src/core/callcenter/test.py

@@ -4,7 +4,7 @@
 # TestStr = "能帮我查一下,我家水费欠多少"
 # seg_list = jieba.cut(TestStr, cut_all=False, HMM=True)
 # print ("Default Mode:", "/ ".join(seg_list))
-
+import json
 import uuid
 import mmh3
 
@@ -19,9 +19,16 @@ def get_bucket(custom_uuid=None, buckets=[]):
     return -1, buckets[0]
 
 if __name__ == '__main__':
-    arr = ['C1879412349555838976','C1879412206890782720','C1879411969535119360','C1879411406290423808','C1879408024871899136','C1879407680997691392','C1879406254007390208','C1879404740748644352','C1879403850650226688','C1879402961977872384','C1879402509785763840','C1879402237567045632','C1879402005592674304','C1879400827102302208','C1879400778024751104','C1879400705488457728','C1879400533513605120','C1879400336188379136','C1879400327959154688','C1879399300082044928','C1879399233669435392','C1879396009050771456','C1879394097295396864','C1879393224498483200','C1879381728368398336','C1879381287505104896','C1879379466774515712','C1879376723787780096','C1879374004641468416','C1879373548330553344','C1879372415646175232','C1879367459866284032','C1879365634769424384','C1879364921326702592','C1879364787436130304','C1879363948554358784','C1879362454358724608','C1879360081448013824','C1879358294565457920','C1879358151116066816','C1879357497190518784','C1879357257641234432','C1879357023229972480','C1879355792935751680','C1879355755749052416','C1879354039309832192']
-    buckets=[Bucket(id=1, name="传统", lower=0, upper=90), Bucket(id=2, name="AI",lower=90, upper=100)]
-    for custom_uuid in arr:
-        num, bucket = get_bucket(custom_uuid=custom_uuid, buckets=buckets)
-        print(custom_uuid, num, bucket.name)
+    # arr = ['C1879412349555838976','C1879412206890782720','C1879411969535119360','C1879411406290423808','C1879408024871899136','C1879407680997691392','C1879406254007390208','C1879404740748644352','C1879403850650226688','C1879402961977872384','C1879402509785763840','C1879402237567045632','C1879402005592674304','C1879400827102302208','C1879400778024751104','C1879400705488457728','C1879400533513605120','C1879400336188379136','C1879400327959154688','C1879399300082044928','C1879399233669435392','C1879396009050771456','C1879394097295396864','C1879393224498483200','C1879381728368398336','C1879381287505104896','C1879379466774515712','C1879376723787780096','C1879374004641468416','C1879373548330553344','C1879372415646175232','C1879367459866284032','C1879365634769424384','C1879364921326702592','C1879364787436130304','C1879363948554358784','C1879362454358724608','C1879360081448013824','C1879358294565457920','C1879358151116066816','C1879357497190518784','C1879357257641234432','C1879357023229972480','C1879355792935751680','C1879355755749052416','C1879354039309832192']
+    # buckets=[Bucket(id=1, name="传统", lower=0, upper=90), Bucket(id=2, name="AI",lower=90, upper=100)]
+    # for custom_uuid in arr:
+    #     num, bucket = get_bucket(custom_uuid=custom_uuid, buckets=buckets)
+    #     print(custom_uuid, num, bucket.name)
+
+    message = """{"seg_id": 2, "cn": {"st": {"rt": [{"ws": [{"cw": [{"sc": 0.00, "w": "停水", "wp": "n", "rl": "0", "wb": 9, "wc": 0.00, "we": 64}], "wb": 9,"we": 64},{"cw": [{"sc": 0.00, "w": "咨询", "wp": "n", "rl": "0", "wb": 65, "wc": 0.00, "we": 132}], "wb": 65,"we": 132}]}], "bg": "9510", "type": "0", "ed": "10950"}}, "ls": false}"""
+    result_dict = json.loads(message)
+    st = result_dict["cn"]["st"]
+    rt = st["rt"]
+    result = ''.join(cw["w"] for item in rt for ws in item["ws"] for cw in ws["cw"])
 
+    print(result)

+ 75 - 22
src/core/voip/asr.py

@@ -5,6 +5,7 @@ import os
 import json
 import threading
 import traceback
+from src.core.callcenter import registry
 
 import nls  # 引入阿里云语音识别库
 from aliyunsdkcore.client import AcsClient
@@ -138,12 +139,12 @@ class TestSt:
     def test_on_sentence_begin(self, message, *args):
         self.logger.debug("[%s]test_on_sentence_begin:%s", self.__id, message)
         if self.message_receiver:
-            self.message_receiver(message, *args)
+            self.message_receiver(self.convert_message(message), *args)
 
     def test_on_sentence_end(self, message, *args):
         self.logger.debug("[%s]test_on_sentence_end:%s", self.__id, message)
         if self.message_receiver:
-            self.message_receiver(message, *args)
+            self.message_receiver(self.convert_message(message), *args)
 
     def test_on_start(self, message, *args):
         self.__event.set()
@@ -155,7 +156,7 @@ class TestSt:
         if not self.__event.is_set():
             self.__event.set()
         if self.message_receiver:
-            self.message_receiver(message, *args)
+            self.message_receiver(self.convert_message(message), *args)
 
     def test_on_close(self, *args):
         self.logger.debug("on_close: args=>%s", args)
@@ -166,12 +167,33 @@ class TestSt:
     def test_on_result_chg(self, message, *args):
         # self.logger.debug("test_on_chg:{}".format(message))
         if self.message_receiver:
-            self.message_receiver(message, *args)
+            self.message_receiver(self.convert_message(message), *args)
 
     def test_on_completed(self, message, *args):
         # self.logger.debug("on_completed:args=>{} message=>{}".format(args, message))
         pass
 
+    def convert_message(self, message):
+        final_result = {}
+        message = json.loads(message)
+        if message["header"]["status"] == 20000000:
+            if message["header"]["name"] == "SentenceBegin":
+                final_result['name'] = 'SentenceBegin'
+            if message["header"]["name"] == "SentenceEnd":
+                result = message["payload"]["result"]
+                # self.logger.info("asr返回内容Result:%s", result)
+                final_result['name'] = 'SentenceEnd'
+                final_result['result'] = result
+            elif message["header"]["name"] == "TranscriptionResultChanged":
+                final_result['name'] = 'TranscriptionResultChanged'
+        else:
+            final_result['name'] = 'TranscriptionResultError'
+            final_result['status'] = message['header']['status']
+            final_result['result'] = ''
+            self.logger.info(f"Status is not {message['header']['status']}")
+            registry.ASR_ERRORS.labels(message['header']['status']).inc()
+        return final_result
+
 
 # 讯飞ASR实时转写
 class XfAsr:
@@ -244,28 +266,59 @@ class XfAsr:
 
     def recv(self):
         try:
+            # {"seg_id": 0, "cn": {"st": {"rt": [{"ws": [{"cw": [{"sc": 0.00, "w": "凭", "wp": "n", "rl": "0", "wb": 13, "wc": 0.00, "we": 26}], "wb": 0, "we": 0}]}], "bg": "9510", "type": "1", "ed": "0"}}, "ls": false}
+            # {"seg_id": 1, "cn": {"st": {"rt": [{"ws": [{"cw": [{"sc": 0.00, "w": "停水", "wp": "n", "rl": "0", "wb": 26, "wc": 0.00, "we": 52}], "wb": 0,"we": 0}]}], "bg": "9770", "type": "1", "ed": "0"}}, "ls": false}
+            # {"seg_id": 2, "cn": {"st": {"rt": [{"ws": [{"cw": [{"sc": 0.00, "w": "停水", "wp": "n", "rl": "0", "wb": 9, "wc": 0.00, "we": 64}], "wb": 9,"we": 64},{"cw": [{"sc": 0.00, "w": "咨询", "wp": "n", "rl": "0", "wb": 65, "wc": 0.00, "we": 132}], "wb": 65,"we": 132}]}], "bg": "9510", "type": "0", "ed": "10950"}}, "ls": false}
             self.logger.info(f"xunfei.Asr.recv: ws.connected:{self.ws.connected}")
             while self.ws.connected:
-                result = str(self.ws.recv())
-                if len(result) == 0:
+                message = str(self.ws.recv())
+                if len(message) == 0:
                     self.logger.info("xunfei.Asr.recv: receive result end")
                     break
-                result_dict = json.loads(result)
-                # 解析结果
-                if result_dict["action"] == "started":
-                    self.logger.info("xunfei.Asr.recv: handshake success, result: " + result)
-
-                if result_dict["action"] == "result":
-                    result_1 = result_dict
-                    # result_2 = json.loads(result_1["cn"])
-                    # result_3 = json.loads(result_2["st"])
-                    # result_4 = json.loads(result_3["rt"])
-                    self.logger.info("xunfei.Asr.recv: rtasr result: " + result_1["data"])
-
-                if result_dict["action"] == "error":
-                    self.logger.info("xunfei.Asr.recv: rtasr error: " + result)
-                    self.ws.close()
-                    return
+
+                self.logger.info("xunfei.Asr.recv: message :{}", message)
+                if self.message_receiver:
+                    self.message_receiver(self.convert_message(message))
+
+                # result_dict = json.loads(result)
+                # # 解析结果
+                # if result_dict["action"] == "started":
+                #     self.logger.info("xunfei.Asr.recv: handshake success, result: " + result)
+                #
+                #
+                # if result_dict["action"] == "result":
+                #     result_1 = result_dict
+                #     # result_2 = json.loads(result_1["cn"])
+                #     # result_3 = json.loads(result_2["st"])
+                #     # result_4 = json.loads(result_3["rt"])
+                #     self.logger.info("xunfei.Asr.recv: rtasr result: " + result_1["data"])
+                #
+                # if result_dict["action"] == "error":
+                #     self.logger.info("xunfei.Asr.recv: rtasr error: " + result)
+                #     self.ws.close()
+                #     return
         except Exception as e:
             traceback.print_exc()
             self.logger.error("xunfei.Asr.recv: receive result end", e)
+
+    def convert_message(self, message):
+        final_result = {}
+        result_dict = json.loads(message)
+        if result_dict["action"] == "started":
+            final_result['name'] = 'SentenceBegin'
+        elif result_dict["action"] == "result":
+            result_dict = json.loads(message)
+            st = result_dict["cn"]["st"]
+            rt = st["rt"]
+            if st.get('type') == 1:
+                final_result['name'] = 'TranscriptionResultChanged'
+            else:
+                final_result['name'] = 'SentenceEnd'
+            final_result['result'] = ''.join(cw["w"] for item in rt for ws in item["ws"] for cw in ws["cw"])
+        elif result_dict["action"] == "error":
+            final_result['name'] = 'TranscriptionResultError'
+            final_result['status'] = ''
+            final_result['result'] = message
+            self.ws.close()
+
+        return final_result

+ 18 - 11
src/core/voip/bot.py

@@ -348,17 +348,24 @@ class MyCall(pj.Call):
         # 判断是否播放完成 否则不记录用户说的内容
         if not self.is_play_complete():
             return
-        message = json.loads(message)
-        if message["header"]["status"] == 20000000:
-            if message["header"]["name"] == "SentenceEnd":
-                result = message["payload"]["result"]
-                # self.logger.info("asr返回内容Result:%s", result)
-                self.user_asr_text_queue.put(result)
-            elif message["header"]["name"] == "TranscriptionResultChanged":
-                self.reset_wait_time()
-        else:
-            self.logger.info(f"Status is not {message['header']['status']}")
-            registry.ASR_ERRORS.labels(message['header']['status']).inc()
+        if message["name"] == "SentenceEnd":
+            self.user_asr_text_queue.put(message["result"])
+        elif message["name"] == "TranscriptionResultChanged":
+            self.reset_wait_time()
+        elif message["name"] == "TranscriptionResultError":
+            pass
+
+        # message = json.loads(message)
+        # if message["header"]["status"] == 20000000:
+        #     if message["header"]["name"] == "SentenceEnd":
+        #         result = message["payload"]["result"]
+        #         # self.logger.info("asr返回内容Result:%s", result)
+        #         self.user_asr_text_queue.put(result)
+        #     elif message["header"]["name"] == "TranscriptionResultChanged":
+        #         self.reset_wait_time()
+        # else:
+        #     self.logger.info(f"Status is not {message['header']['status']}")
+        #     registry.ASR_ERRORS.labels(message['header']['status']).inc()
 
     def on_media_player_complete(self, player_id):
         self.logger.info('player complete')