소스 검색

讯飞实时听写接口,debug

Davidliu 1 개월 전
부모
커밋
84a59e33e2
2개의 변경된 파일46개의 추가작업 그리고 96개의 파일을 삭제
  1. 44 94
      src/core/callcenter/test.py
  2. 2 2
      src/core/voip/constant.py

+ 44 - 94
src/core/callcenter/test.py

@@ -1,94 +1,44 @@
-import numpy as np
-
-
-class PowerVAD:
-    def __init__(self, sample_rate=16000, frame_duration=20):
-        self.sample_rate = sample_rate
-        self.frame_length = int(sample_rate * frame_duration / 1000)  # 每帧样本数
-        self.noise_power = None
-        self.threshold = None
-        self.speech_active = False
-        self.trigger_count = 0
-        self.silence_count = 0
-
-        # 迟滞参数(可调整)
-        self.trigger_threshold = 3  # 触发语音的连续帧数
-        self.silence_threshold = 10  # 触发静音的连续帧数
-        self.threshold_multiplier = 2.0  # 噪声功率倍数阈值
-
-    def process_frame(self, frame):
-        """
-        处理单个音频帧,返回当前语音状态
-        :param frame: 16位PCM格式的字节数据
-        :return: bool 是否检测到语音
-        """
-        # 转换为numpy数组
-        samples = np.frombuffer(frame, dtype=np.int16)
-
-        # 归一化到[-1, 1]
-        samples_float = samples.astype(np.float32) / 32768.0
-
-        # 计算功率(避免零值)
-        power = np.mean(samples_float ** 2) + 1e-8
-
-        # 初始化噪声基准(前5帧)
-        if self.noise_power is None:
-            if not hasattr(self, 'init_frames'):
-                self.init_frames = []
-            self.init_frames.append(power)
-
-            if len(self.init_frames) >= 5:
-                self.noise_power = np.mean(self.init_frames)
-                self.threshold = self.noise_power * self.threshold_multiplier
-            return False
-
-        # VAD检测逻辑
-        if power > self.threshold:
-            self.trigger_count += 1
-            self.silence_count = 0
-        else:
-            self.silence_count += 1
-            self.trigger_count = 0
-
-        # 状态转换逻辑
-        if not self.speech_active:
-            if self.trigger_count >= self.trigger_threshold:
-                self.speech_active = True
-        else:
-            if self.silence_count >= self.silence_threshold:
-                self.speech_active = False
-
-        return self.speech_active
-
-
-# 使用示例
-if __name__ == "__main__":
-    import pyaudio
-    import struct
-
-    FORMAT = pyaudio.paInt16
-    CHANNELS = 1
-    RATE = 16000
-    FRAME_DURATION = 20  # ms
-    FRAME_SIZE = int(RATE * FRAME_DURATION / 1000)
-
-    vad = PowerVAD(sample_rate=RATE, frame_duration=FRAME_DURATION)
-    audio = pyaudio.PyAudio()
-
-    stream = audio.open(
-        format=FORMAT,
-        channels=CHANNELS,
-        rate=RATE,
-        input=True,
-        frames_per_buffer=FRAME_SIZE
-    )
-
-    try:
-        while True:
-            frame = stream.read(FRAME_SIZE)
-            is_speech = vad.process_frame(frame)
-            print("Speech detected" if is_speech else "Silence")
-    except KeyboardInterrupt:
-        stream.stop_stream()
-        stream.close()
-        audio.terminate()
+#
+# import jieba
+#
+# TestStr = "能帮我查一下,我家水费欠多少"
+# seg_list = jieba.cut(TestStr, cut_all=False, HMM=True)
+# print ("Default Mode:", "/ ".join(seg_list))
+import json
+import uuid
+import mmh3
+
+from src.core.callcenter.dao import Bucket
+
+def get_bucket(custom_uuid=None, buckets=[]):
+    random_id = abs(mmh3.hash(custom_uuid))
+    for bucket in buckets:
+        num = (random_id % 100 + 100) % 100
+        if bucket.lower <= num < bucket.upper:
+            return num, bucket
+    return -1, buckets[0]
+
+if __name__ == '__main__':
+    # arr = ['C1879412349555838976','C1879412206890782720','C1879411969535119360','C1879411406290423808','C1879408024871899136','C1879407680997691392','C1879406254007390208','C1879404740748644352','C1879403850650226688','C1879402961977872384','C1879402509785763840','C1879402237567045632','C1879402005592674304','C1879400827102302208','C1879400778024751104','C1879400705488457728','C1879400533513605120','C1879400336188379136','C1879400327959154688','C1879399300082044928','C1879399233669435392','C1879396009050771456','C1879394097295396864','C1879393224498483200','C1879381728368398336','C1879381287505104896','C1879379466774515712','C1879376723787780096','C1879374004641468416','C1879373548330553344','C1879372415646175232','C1879367459866284032','C1879365634769424384','C1879364921326702592','C1879364787436130304','C1879363948554358784','C1879362454358724608','C1879360081448013824','C1879358294565457920','C1879358151116066816','C1879357497190518784','C1879357257641234432','C1879357023229972480','C1879355792935751680','C1879355755749052416','C1879354039309832192']
+    # buckets=[Bucket(id=1, name="传统", lower=0, upper=90), Bucket(id=2, name="AI",lower=90, upper=100)]
+    # for custom_uuid in arr:
+    #     num, bucket = get_bucket(custom_uuid=custom_uuid, buckets=buckets)
+    #     print(custom_uuid, num, bucket.name)
+
+    # message = """{"seg_id": 2, "cn": {"st": {"rt": [{"ws": [{"cw": [{"sc": 0.00, "w": "停水", "wp": "n", "rl": "0", "wb": 9, "wc": 0.00, "we": 64}], "wb": 9,"we": 64},{"cw": [{"sc": 0.00, "w": "咨询", "wp": "n", "rl": "0", "wb": 65, "wc": 0.00, "we": 132}], "wb": 65,"we": 132}]}], "bg": "9510", "type": "0", "ed": "10950"}}, "ls": false}"""
+    # message = '{"action":"result","code":"0","data":"{\"seg_id\":0,\"cn\":{\"st\":{\"rt\":[{\"ws\":[{\"cw\":[{\"sc\":0.00,\"w\":\"蜓\",\"wp\":\"n\",\"rl\":\"0\",\"wb\":13,\"wc\":0.00,\"we\":26}],\"wb\":0,\"we\":0}]}],\"bg\":\"9160\",\"type\":\"1\",\"ed\":\"0\"}},\"ls\":false}","desc":"success","sid":"rta108a8500@dx2f5f1b177d38000100"}'
+    # # 解析最外层 JSON
+    # message_dict = json.loads(message)
+    #
+    # # 解析嵌套的 JSON 字符串
+    # data = json.loads(message_dict['data'])
+    #
+    # # 提取 "w" 字段中的词汇
+    # words = ''.join(cw["w"] for item in data["cn"]["st"]["rt"] for ws in item["ws"] for cw in ws["cw"])
+    #
+    # print(words)
+
+    # 读取文件
+    with open('/Users/davidliu/hot_words.txt', 'r', encoding='utf-8') as f:
+        content = f.read()
+        print(content)

+ 2 - 2
src/core/voip/constant.py

@@ -11,7 +11,7 @@ def build_ep_config():
     ep_cfg.uaConfig.mainThreadOnly = False
     ep_cfg.uaConfig.maxCalls = 12
     ep_cfg.uaConfig.maxAccounts = 12
-    ep_cfg.medConfig.noVad = False
+    ep_cfg.medConfig.noVad = True
     ep_cfg.logConfig.level = 5
     ep_cfg.logConfig.consoleLevel = 5
     return ep_cfg
@@ -20,7 +20,7 @@ def build_media_config():
     media_cfg = pj.MediaConfig()
     media_cfg.jbMinPre = 4  # Minimum pre-fetch frames
     media_cfg.jbMaxPre = 16  # Maximum pre-fetch frames
-    media_cfg.noVad = False  # Disable Voice Activity Detection if needed
+    media_cfg.noVad = True  # Disable Voice Activity Detection if needed
     media_cfg.clockRate = 16000
     media_cfg.channelCount = 1
     media_cfg.audioFramePtime = 40