unimatch.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
  5. **********************************************************************
  6. * Date Name Description
  7. * 07/18/01 aliu Creation.
  8. **********************************************************************
  9. */
  10. #ifndef UNIMATCH_H
  11. #define UNIMATCH_H
  12. #include "unicode/utypes.h"
  13. /**
  14. * \file
  15. * \brief C++ API: Unicode Matcher
  16. */
  17. U_NAMESPACE_BEGIN
  18. class Replaceable;
  19. class UnicodeString;
  20. class UnicodeSet;
  21. /**
  22. * Constants returned by <code>UnicodeMatcher::matches()</code>
  23. * indicating the degree of match.
  24. * @stable ICU 2.4
  25. */
  26. enum UMatchDegree {
  27. /**
  28. * Constant returned by <code>matches()</code> indicating a
  29. * mismatch between the text and this matcher. The text contains
  30. * a character which does not match, or the text does not contain
  31. * all desired characters for a non-incremental match.
  32. * @stable ICU 2.4
  33. */
  34. U_MISMATCH,
  35. /**
  36. * Constant returned by <code>matches()</code> indicating a
  37. * partial match between the text and this matcher. This value is
  38. * only returned for incremental match operations. All characters
  39. * of the text match, but more characters are required for a
  40. * complete match. Alternatively, for variable-length matchers,
  41. * all characters of the text match, and if more characters were
  42. * supplied at limit, they might also match.
  43. * @stable ICU 2.4
  44. */
  45. U_PARTIAL_MATCH,
  46. /**
  47. * Constant returned by <code>matches()</code> indicating a
  48. * complete match between the text and this matcher. For an
  49. * incremental variable-length match, this value is returned if
  50. * the given text matches, and it is known that additional
  51. * characters would not alter the extent of the match.
  52. * @stable ICU 2.4
  53. */
  54. U_MATCH
  55. };
  56. /**
  57. * <code>UnicodeMatcher</code> defines a protocol for objects that can
  58. * match a range of characters in a Replaceable string.
  59. * @stable ICU 2.4
  60. */
  61. class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
  62. public:
  63. /**
  64. * Destructor.
  65. * @stable ICU 2.4
  66. */
  67. virtual ~UnicodeMatcher();
  68. /**
  69. * Return a UMatchDegree value indicating the degree of match for
  70. * the given text at the given offset. Zero, one, or more
  71. * characters may be matched.
  72. *
  73. * Matching in the forward direction is indicated by limit >
  74. * offset. Characters from offset forwards to limit-1 will be
  75. * considered for matching.
  76. *
  77. * Matching in the reverse direction is indicated by limit <
  78. * offset. Characters from offset backwards to limit+1 will be
  79. * considered for matching.
  80. *
  81. * If limit == offset then the only match possible is a zero
  82. * character match (which subclasses may implement if desired).
  83. *
  84. * As a side effect, advance the offset parameter to the limit of
  85. * the matched substring. In the forward direction, this will be
  86. * the index of the last matched character plus one. In the
  87. * reverse direction, this will be the index of the last matched
  88. * character minus one.
  89. *
  90. * <p>Note: This method is not const because some classes may
  91. * modify their state as the result of a match.
  92. *
  93. * @param text the text to be matched
  94. * @param offset on input, the index into text at which to begin
  95. * matching. On output, the limit of the matched text. The
  96. * number of matched characters is the output value of offset
  97. * minus the input value. Offset should always point to the
  98. * HIGH SURROGATE (leading code unit) of a pair of surrogates,
  99. * both on entry and upon return.
  100. * @param limit the limit index of text to be matched. Greater
  101. * than offset for a forward direction match, less than offset for
  102. * a backward direction match. The last character to be
  103. * considered for matching will be text.charAt(limit-1) in the
  104. * forward direction or text.charAt(limit+1) in the backward
  105. * direction.
  106. * @param incremental if TRUE, then assume further characters may
  107. * be inserted at limit and check for partial matching. Otherwise
  108. * assume the text as given is complete.
  109. * @return a match degree value indicating a full match, a partial
  110. * match, or a mismatch. If incremental is FALSE then
  111. * U_PARTIAL_MATCH should never be returned.
  112. * @stable ICU 2.4
  113. */
  114. virtual UMatchDegree matches(const Replaceable& text,
  115. int32_t& offset,
  116. int32_t limit,
  117. UBool incremental) = 0;
  118. /**
  119. * Returns a string representation of this matcher. If the result of
  120. * calling this function is passed to the appropriate parser, it
  121. * will produce another matcher that is equal to this one.
  122. * @param result the string to receive the pattern. Previous
  123. * contents will be deleted.
  124. * @param escapeUnprintable if TRUE then convert unprintable
  125. * character to their hex escape representations, \\uxxxx or
  126. * \\Uxxxxxxxx. Unprintable characters are those other than
  127. * U+000A, U+0020..U+007E.
  128. * @stable ICU 2.4
  129. */
  130. virtual UnicodeString& toPattern(UnicodeString& result,
  131. UBool escapeUnprintable = FALSE) const = 0;
  132. /**
  133. * Returns TRUE if this matcher will match a character c, where c
  134. * & 0xFF == v, at offset, in the forward direction (with limit >
  135. * offset). This is used by <tt>RuleBasedTransliterator</tt> for
  136. * indexing.
  137. * @stable ICU 2.4
  138. */
  139. virtual UBool matchesIndexValue(uint8_t v) const = 0;
  140. /**
  141. * Union the set of all characters that may be matched by this object
  142. * into the given set.
  143. * @param toUnionTo the set into which to union the source characters
  144. * @stable ICU 2.4
  145. */
  146. virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
  147. };
  148. U_NAMESPACE_END
  149. #endif