123456789101112131415161718192021222324252627282930313233343536 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- """
- @Time : 2024/9/13 14:29
- @Author : cao
- @File : dirty.py
- @Desc :
- """
- import logging
- import utils
- import jieba
- logger = logging.getLogger(__name__)
- def load_dirty(file):
- with open(file) as f:
- words = [line.strip().split(" ")[0] for line in f]
- return words
- dirty = load_dirty(utils.DIRTY_DATA)
- def dirty_detect(sentence):
- if sentence in dirty:
- return True, {"status":1, "word":sentence, "query": sentence, "意图类别":"脏话", "intent": "转人工","isFaq":False, "query": sentence}
- words = jieba.lcut(sentence.lower(), cut_all=True)
- for word in words:
- if word in dirty:
- return True, {"status":1,"word": word,"query": sentence, "意图类别":"脏话", "intent": "转人工","isFaq":False, "query": sentence}
- return False, ''
- if __name__ == "__main__":
- print(dirty_detect("SB吧,怎么老停水"))
|