|
@@ -217,6 +217,15 @@ user_dict= loaddict()
|
|
|
jieba.load_userdict(user_dict['total'])
|
|
|
|
|
|
def norm_community(asr):
|
|
|
+ def match_loc(comb):
|
|
|
+ cur_wd = ''.join(comb)
|
|
|
+ if cur_wd in asr:
|
|
|
+ if cur_wd in user_dict['zh']:
|
|
|
+ return user_dict['zh'].get(cur_wd)
|
|
|
+ term = "|".join([term[0] for term in pinyin(cur_wd, style=Style.NORMAL)])
|
|
|
+ if term in user_dict['pinyin']:
|
|
|
+ return user_dict['pinyin'][term]
|
|
|
+ return None
|
|
|
if asr in user_dict['zh']:
|
|
|
return user_dict['zh'][asr]
|
|
|
text = re.sub(r'[(())]', '', asr)
|
|
@@ -230,8 +239,15 @@ def norm_community(asr):
|
|
|
term = "|".join([term[0] for term in pinyin(word, style=Style.NORMAL)])
|
|
|
if term in user_dict['pinyin']:
|
|
|
return user_dict['pinyin'][term]
|
|
|
+ for r in range(1, len(words) + 1):
|
|
|
+ combinations_list = list(itertools.combinations(words, r))
|
|
|
+ features = [executor.submit(match_loc, combo) for combo in combinations_list]
|
|
|
+ result =[feature.result() for feature in as_completed(features)]
|
|
|
+ res = [i for i in filter(lambda x: x is not None, result)]
|
|
|
+ if len(res) >0:
|
|
|
+ return res[0]
|
|
|
return asr
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- print(norm_community("我家是佳栋地堪这里"))
|
|
|
+ print(norm_community("嗯,那个我们家是碧水兰庭的"))
|