|
@@ -5,6 +5,8 @@
|
|
|
@File : utils.py
|
|
|
@Desc :
|
|
|
"""
|
|
|
+import sys
|
|
|
+sys.path.append("..")
|
|
|
from datetime import datetime
|
|
|
from functools import wraps
|
|
|
from typing import (Any,
|
|
@@ -18,11 +20,18 @@ GENERATED,
|
|
|
FIXED,
|
|
|
MOUDLES
|
|
|
)
|
|
|
+import pandas as pd
|
|
|
from threading import Thread
|
|
|
import json
|
|
|
logger = get_logger()
|
|
|
from database import Mysql
|
|
|
+from pypinyin import pinyin, Style
|
|
|
+import jieba
|
|
|
+import re
|
|
|
+import itertools
|
|
|
+from concurrent.futures import ThreadPoolExecutor,as_completed
|
|
|
|
|
|
+executor = ThreadPoolExecutor(max_workers=20)
|
|
|
|
|
|
def get_speech_status(bid: Text = None, options: List[Dict[Text, Text]] = None):
|
|
|
"""which speech template to choose"""
|
|
@@ -177,8 +186,6 @@ CREATE TABLE botrecords (
|
|
|
mysql.close_mysql()
|
|
|
|
|
|
|
|
|
-
|
|
|
-
|
|
|
def timetic(func):
|
|
|
@wraps(func)
|
|
|
def wrapper(*args, **kwargs):
|
|
@@ -193,3 +200,38 @@ def timetic(func):
|
|
|
logger.info("{} ==> {}s".format(func.__qualname__, cost))
|
|
|
return results
|
|
|
return wrapper
|
|
|
+
|
|
|
+
|
|
|
+def loaddict():
|
|
|
+ loc = dict()
|
|
|
+ df = pd.read_excel("../data/location.xlsx", header=0)
|
|
|
+ loc['zh'] = dict(df[['norm_name', 'name']].values)
|
|
|
+ loc['pinyin'] = dict(df[['name_pinyin', 'name']].values)
|
|
|
+ short_val = [(i, 80) for i in df['short_name'].dropna().tolist()]
|
|
|
+ norm_val = [(i, 100) for i in df['norm_name'].dropna().tolist()]
|
|
|
+ norm_val.extend(short_val)
|
|
|
+ loc['total'] = dict(norm_val)
|
|
|
+ return loc
|
|
|
+
|
|
|
+user_dict= loaddict()
|
|
|
+jieba.load_userdict(user_dict['total'])
|
|
|
+
|
|
|
+def norm_community(asr):
|
|
|
+ if asr in user_dict['zh']:
|
|
|
+ return user_dict['zh'][asr]
|
|
|
+ text = re.sub(r'[(())]', '', asr)
|
|
|
+ text = "|".join([word[0] for word in pinyin(text, style=Style.NORMAL)])
|
|
|
+ if text in user_dict['pinyin']:
|
|
|
+ return user_dict['pinyin'][text]
|
|
|
+ words = jieba.lcut(asr)
|
|
|
+ for word in words:
|
|
|
+ if word in user_dict['zh']:
|
|
|
+ return user_dict['zh'].get(word)
|
|
|
+ term = "|".join([term[0] for term in pinyin(word, style=Style.NORMAL)])
|
|
|
+ if term in user_dict['pinyin']:
|
|
|
+ return user_dict['pinyin'][term]
|
|
|
+ return asr
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ print(norm_community("我家是佳栋地堪这里"))
|