tokenizer_config.json 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. {
  2. "added_tokens_decoder": {
  3. "0": {
  4. "content": "[PAD]",
  5. "lstrip": false,
  6. "normalized": false,
  7. "rstrip": false,
  8. "single_word": false,
  9. "special": true
  10. },
  11. "100": {
  12. "content": "[UNK]",
  13. "lstrip": false,
  14. "normalized": false,
  15. "rstrip": false,
  16. "single_word": false,
  17. "special": true
  18. },
  19. "101": {
  20. "content": "[CLS]",
  21. "lstrip": false,
  22. "normalized": false,
  23. "rstrip": false,
  24. "single_word": false,
  25. "special": true
  26. },
  27. "102": {
  28. "content": "[SEP]",
  29. "lstrip": false,
  30. "normalized": false,
  31. "rstrip": false,
  32. "single_word": false,
  33. "special": true
  34. },
  35. "103": {
  36. "content": "[MASK]",
  37. "lstrip": false,
  38. "normalized": false,
  39. "rstrip": false,
  40. "single_word": false,
  41. "special": true
  42. }
  43. },
  44. "clean_up_tokenization_spaces": true,
  45. "cls_token": "[CLS]",
  46. "do_lower_case": true,
  47. "mask_token": "[MASK]",
  48. "model_max_length": 512,
  49. "pad_token": "[PAD]",
  50. "sep_token": "[SEP]",
  51. "strip_accents": null,
  52. "tokenize_chinese_chars": true,
  53. "tokenizer_class": "BertTokenizer",
  54. "unk_token": "[UNK]"
  55. }