root il y a 2 mois
Parent
commit
531a39c24a

BIN
ai-slibra-assistant/data/location.xlsx


+ 2 - 2
ai-slibra-assistant/util/__init__.py

@@ -2,7 +2,6 @@
 # -*- coding: utf-8 -*-
 """
 @Time    : 2024/10/15 16:19
-@Author  : cao
 @File    : __init__.py.py
 @Desc    : 
 """
@@ -11,6 +10,7 @@ from .utils import (get_speech_status,
                     get_robot_speeches,
                     get_next_code_with_track,
                     insert_log,
-                    timetic
+                    timetic,
+                    norm_community
                     )
 from .request_utils import *

+ 4 - 1
ai-slibra-assistant/util/request_utils.py

@@ -13,7 +13,7 @@ from config import get_logger
 import requests
 import base64
 from database import *
-from util import timetic
+from util import timetic, norm_community
 import uuid
 
 logger = get_logger()
@@ -171,6 +171,9 @@ def business_service(session_id, uid, code, tools,  asr):
     asr = asr.strip(r""""$%&'()*+,,-./:;<=>?@[\]^_`{|}~。??!""")
     if tools in ["water_loc_info", "fee_user_info", "user_phone_info"] and len(asr)==0:
         return [{"title": "NO", "isFaq": False, "faqContent": '', "asr": asr, "businessContent": ''}]
+    # parse water_loc_info
+    if tools in ["water_loc_info"]:
+        asr = norm_community(asr)
     param = json.dumps(dict(nodeId=code,
                             userId=uid,
                             sessionId=session_id,

+ 44 - 2
ai-slibra-assistant/util/utils.py

@@ -5,6 +5,8 @@
 @File    : utils.py
 @Desc    : 
 """
+import sys
+sys.path.append("..")
 from datetime import datetime
 from functools import wraps
 from typing import (Any,
@@ -18,11 +20,18 @@ GENERATED,
 FIXED,
 MOUDLES
 )
+import pandas as pd
 from threading import Thread
 import json
 logger = get_logger()
 from database import Mysql
+from pypinyin import pinyin, Style
+import jieba
+import re
+import itertools
+from concurrent.futures import ThreadPoolExecutor,as_completed
 
+executor = ThreadPoolExecutor(max_workers=20)
 
 def get_speech_status(bid: Text = None, options: List[Dict[Text, Text]] = None):
     """which speech template to choose"""
@@ -177,8 +186,6 @@ CREATE TABLE botrecords (
         mysql.close_mysql()
 
 
-
-
 def timetic(func):
     @wraps(func)
     def wrapper(*args, **kwargs):
@@ -193,3 +200,38 @@ def timetic(func):
             logger.info("{} ==> {}s".format(func.__qualname__, cost))
         return results
     return wrapper
+
+
+def loaddict():
+    loc = dict()
+    df = pd.read_excel("../data/location.xlsx", header=0)
+    loc['zh'] = dict(df[['norm_name', 'name']].values)
+    loc['pinyin'] = dict(df[['name_pinyin', 'name']].values)
+    short_val = [(i, 80) for i in df['short_name'].dropna().tolist()]
+    norm_val = [(i, 100) for i in df['norm_name'].dropna().tolist()]
+    norm_val.extend(short_val)
+    loc['total'] = dict(norm_val)
+    return loc
+
+user_dict= loaddict()
+jieba.load_userdict(user_dict['total'])
+
+def norm_community(asr):
+    if asr in user_dict['zh']:
+        return user_dict['zh'][asr]
+    text = re.sub(r'[(())]', '', asr)
+    text = "|".join([word[0] for word in pinyin(text, style=Style.NORMAL)])
+    if text in user_dict['pinyin']:
+        return user_dict['pinyin'][text]
+    words = jieba.lcut(asr)
+    for word in words:
+        if word in user_dict['zh']:
+            return user_dict['zh'].get(word)
+        term = "|".join([term[0] for term in pinyin(word, style=Style.NORMAL)])
+        if term in user_dict['pinyin']:
+            return user_dict['pinyin'][term]
+    return asr
+
+
+if __name__ == "__main__":
+    print(norm_community("我家是佳栋地堪这里"))