unifilt.h 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1999-2010, International Business Machines Corporation and others.
  6. * All Rights Reserved.
  7. **********************************************************************
  8. * Date Name Description
  9. * 11/17/99 aliu Creation.
  10. **********************************************************************
  11. */
  12. #ifndef UNIFILT_H
  13. #define UNIFILT_H
  14. #include "unicode/unifunct.h"
  15. #include "unicode/unimatch.h"
  16. /**
  17. * \file
  18. * \brief C++ API: Unicode Filter
  19. */
  20. U_NAMESPACE_BEGIN
  21. /**
  22. * U_ETHER is used to represent character values for positions outside
  23. * a range. For example, transliterator uses this to represent
  24. * characters outside the range contextStart..contextLimit-1. This
  25. * allows explicit matching by rules and UnicodeSets of text outside a
  26. * defined range.
  27. * @stable ICU 3.0
  28. */
  29. #define U_ETHER ((UChar)0xFFFF)
  30. /**
  31. *
  32. * <code>UnicodeFilter</code> defines a protocol for selecting a
  33. * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
  34. * Currently, filters are used in conjunction with classes like {@link
  35. * Transliterator} to only process selected characters through a
  36. * transformation.
  37. *
  38. * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
  39. * of its base class, UnicodeMatcher. These methods are toPattern()
  40. * and matchesIndexValue(). This is done so that filter classes that
  41. * are not actually used as matchers -- specifically, those in the
  42. * UnicodeFilterLogic component, and those in tests -- can continue to
  43. * work without defining these methods. As long as a filter is not
  44. * used in an RBT during real transliteration, these methods will not
  45. * be called. However, this breaks the UnicodeMatcher base class
  46. * protocol, and it is not a correct solution.
  47. *
  48. * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
  49. * hierarchy and either redesign it, or simply remove the stubs in
  50. * UnicodeFilter and force subclasses to implement the full
  51. * UnicodeMatcher protocol.
  52. *
  53. * @see UnicodeFilterLogic
  54. * @stable ICU 2.0
  55. */
  56. class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
  57. public:
  58. /**
  59. * Destructor
  60. * @stable ICU 2.0
  61. */
  62. virtual ~UnicodeFilter();
  63. /**
  64. * Returns <tt>true</tt> for characters that are in the selected
  65. * subset. In other words, if a character is <b>to be
  66. * filtered</b>, then <tt>contains()</tt> returns
  67. * <b><tt>false</tt></b>.
  68. * @stable ICU 2.0
  69. */
  70. virtual UBool contains(UChar32 c) const = 0;
  71. /**
  72. * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
  73. * and return the pointer.
  74. * @stable ICU 2.4
  75. */
  76. virtual UnicodeMatcher* toMatcher() const;
  77. /**
  78. * Implement UnicodeMatcher API.
  79. * @stable ICU 2.4
  80. */
  81. virtual UMatchDegree matches(const Replaceable& text,
  82. int32_t& offset,
  83. int32_t limit,
  84. UBool incremental);
  85. /**
  86. * UnicodeFunctor API. Nothing to do.
  87. * @stable ICU 2.4
  88. */
  89. virtual void setData(const TransliterationRuleData*);
  90. /**
  91. * ICU "poor man's RTTI", returns a UClassID for this class.
  92. *
  93. * @stable ICU 2.2
  94. */
  95. static UClassID U_EXPORT2 getStaticClassID();
  96. protected:
  97. /*
  98. * Since this class has pure virtual functions,
  99. * a constructor can't be used.
  100. * @stable ICU 2.0
  101. */
  102. /* UnicodeFilter();*/
  103. };
  104. /*inline UnicodeFilter::UnicodeFilter() {}*/
  105. U_NAMESPACE_END
  106. #endif