usetiter.h 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (c) 2002-2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. */
  9. #ifndef USETITER_H
  10. #define USETITER_H
  11. #include "unicode/utypes.h"
  12. #include "unicode/uobject.h"
  13. #include "unicode/unistr.h"
  14. /**
  15. * \file
  16. * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
  17. */
  18. U_NAMESPACE_BEGIN
  19. class UnicodeSet;
  20. class UnicodeString;
  21. /**
  22. *
  23. * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
  24. * iterates over either code points or code point ranges. After all
  25. * code points or ranges have been returned, it returns the
  26. * multicharacter strings of the UnicodeSet, if any.
  27. *
  28. * This class is not intended to be subclassed. Consider any fields
  29. * or methods declared as "protected" to be private. The use of
  30. * protected in this class is an artifact of history.
  31. *
  32. * <p>To iterate over code points and strings, use a loop like this:
  33. * <pre>
  34. * UnicodeSetIterator it(set);
  35. * while (it.next()) {
  36. * processItem(it.getString());
  37. * }
  38. * </pre>
  39. * <p>Each item in the set is accessed as a string. Set elements
  40. * consisting of single code points are returned as strings containing
  41. * just the one code point.
  42. *
  43. * <p>To iterate over code point ranges, instead of individual code points,
  44. * use a loop like this:
  45. * <pre>
  46. * UnicodeSetIterator it(set);
  47. * while (it.nextRange()) {
  48. * if (it.isString()) {
  49. * processString(it.getString());
  50. * } else {
  51. * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
  52. * }
  53. * }
  54. * </pre>
  55. * @author M. Davis
  56. * @stable ICU 2.4
  57. */
  58. class U_COMMON_API UnicodeSetIterator : public UObject {
  59. protected:
  60. /**
  61. * Value of <tt>codepoint</tt> if the iterator points to a string.
  62. * If <tt>codepoint == IS_STRING</tt>, then examine
  63. * <tt>string</tt> for the current iteration result.
  64. * @stable ICU 2.4
  65. */
  66. enum { IS_STRING = -1 };
  67. /**
  68. * Current code point, or the special value <tt>IS_STRING</tt>, if
  69. * the iterator points to a string.
  70. * @stable ICU 2.4
  71. */
  72. UChar32 codepoint;
  73. /**
  74. * When iterating over ranges using <tt>nextRange()</tt>,
  75. * <tt>codepointEnd</tt> contains the inclusive end of the
  76. * iteration range, if <tt>codepoint != IS_STRING</tt>. If
  77. * iterating over code points using <tt>next()</tt>, or if
  78. * <tt>codepoint == IS_STRING</tt>, then the value of
  79. * <tt>codepointEnd</tt> is undefined.
  80. * @stable ICU 2.4
  81. */
  82. UChar32 codepointEnd;
  83. /**
  84. * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
  85. * to the current string. If <tt>codepoint != IS_STRING</tt>, the
  86. * value of <tt>string</tt> is undefined.
  87. * @stable ICU 2.4
  88. */
  89. const UnicodeString* string;
  90. public:
  91. /**
  92. * Create an iterator over the given set. The iterator is valid
  93. * only so long as <tt>set</tt> is valid.
  94. * @param set set to iterate over
  95. * @stable ICU 2.4
  96. */
  97. UnicodeSetIterator(const UnicodeSet& set);
  98. /**
  99. * Create an iterator over nothing. <tt>next()</tt> and
  100. * <tt>nextRange()</tt> return false. This is a convenience
  101. * constructor allowing the target to be set later.
  102. * @stable ICU 2.4
  103. */
  104. UnicodeSetIterator();
  105. /**
  106. * Destructor.
  107. * @stable ICU 2.4
  108. */
  109. virtual ~UnicodeSetIterator();
  110. /**
  111. * Returns true if the current element is a string. If so, the
  112. * caller can retrieve it with <tt>getString()</tt>. If this
  113. * method returns false, the current element is a code point or
  114. * code point range, depending on whether <tt>next()</tt> or
  115. * <tt>nextRange()</tt> was called.
  116. * Elements of types string and codepoint can both be retrieved
  117. * with the function <tt>getString()</tt>.
  118. * Elements of type codepoint can also be retrieved with
  119. * <tt>getCodepoint()</tt>.
  120. * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
  121. * of the range, and <tt>getCodepointEnd()</tt> returns the end
  122. * of the range.
  123. * @stable ICU 2.4
  124. */
  125. inline UBool isString() const;
  126. /**
  127. * Returns the current code point, if <tt>isString()</tt> returned
  128. * false. Otherwise returns an undefined result.
  129. * @stable ICU 2.4
  130. */
  131. inline UChar32 getCodepoint() const;
  132. /**
  133. * Returns the end of the current code point range, if
  134. * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
  135. * called. Otherwise returns an undefined result.
  136. * @stable ICU 2.4
  137. */
  138. inline UChar32 getCodepointEnd() const;
  139. /**
  140. * Returns the current string, if <tt>isString()</tt> returned
  141. * true. If the current iteration item is a code point, a UnicodeString
  142. * containing that single code point is returned.
  143. *
  144. * Ownership of the returned string remains with the iterator.
  145. * The string is guaranteed to remain valid only until the iterator is
  146. * advanced to the next item, or until the iterator is deleted.
  147. *
  148. * @stable ICU 2.4
  149. */
  150. const UnicodeString& getString();
  151. /**
  152. * Advances the iteration position to the next element in the set,
  153. * which can be either a single code point or a string.
  154. * If there are no more elements in the set, return false.
  155. *
  156. * <p>
  157. * If <tt>isString() == TRUE</tt>, the value is a
  158. * string, otherwise the value is a
  159. * single code point. Elements of either type can be retrieved
  160. * with the function <tt>getString()</tt>, while elements of
  161. * consisting of a single code point can be retrieved with
  162. * <tt>getCodepoint()</tt>
  163. *
  164. * <p>The order of iteration is all code points in sorted order,
  165. * followed by all strings sorted order. Do not mix
  166. * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
  167. * calling <tt>reset()</tt> between them. The results of doing so
  168. * are undefined.
  169. *
  170. * @return true if there was another element in the set.
  171. * @stable ICU 2.4
  172. */
  173. UBool next();
  174. /**
  175. * Returns the next element in the set, either a code point range
  176. * or a string. If there are no more elements in the set, return
  177. * false. If <tt>isString() == TRUE</tt>, the value is a
  178. * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a
  179. * range of one or more code points from <tt>getCodepoint()</tt> to
  180. * <tt>getCodepointeEnd()</tt> inclusive.
  181. *
  182. * <p>The order of iteration is all code points ranges in sorted
  183. * order, followed by all strings sorted order. Ranges are
  184. * disjoint and non-contiguous. The value returned from <tt>getString()</tt>
  185. * is undefined unless <tt>isString() == TRUE</tt>. Do not mix calls to
  186. * <tt>next()</tt> and <tt>nextRange()</tt> without calling
  187. * <tt>reset()</tt> between them. The results of doing so are
  188. * undefined.
  189. *
  190. * @return true if there was another element in the set.
  191. * @stable ICU 2.4
  192. */
  193. UBool nextRange();
  194. /**
  195. * Sets this iterator to visit the elements of the given set and
  196. * resets it to the start of that set. The iterator is valid only
  197. * so long as <tt>set</tt> is valid.
  198. * @param set the set to iterate over.
  199. * @stable ICU 2.4
  200. */
  201. void reset(const UnicodeSet& set);
  202. /**
  203. * Resets this iterator to the start of the set.
  204. * @stable ICU 2.4
  205. */
  206. void reset();
  207. /**
  208. * ICU "poor man's RTTI", returns a UClassID for this class.
  209. *
  210. * @stable ICU 2.4
  211. */
  212. static UClassID U_EXPORT2 getStaticClassID();
  213. /**
  214. * ICU "poor man's RTTI", returns a UClassID for the actual class.
  215. *
  216. * @stable ICU 2.4
  217. */
  218. virtual UClassID getDynamicClassID() const;
  219. // ======================= PRIVATES ===========================
  220. protected:
  221. // endElement and nextElements are really UChar32's, but we keep
  222. // them as signed int32_t's so we can do comparisons with
  223. // endElement set to -1. Leave them as int32_t's.
  224. /** The set
  225. * @stable ICU 2.4
  226. */
  227. const UnicodeSet* set;
  228. /** End range
  229. * @stable ICU 2.4
  230. */
  231. int32_t endRange;
  232. /** Range
  233. * @stable ICU 2.4
  234. */
  235. int32_t range;
  236. /** End element
  237. * @stable ICU 2.4
  238. */
  239. int32_t endElement;
  240. /** Next element
  241. * @stable ICU 2.4
  242. */
  243. int32_t nextElement;
  244. //UBool abbreviated;
  245. /** Next string
  246. * @stable ICU 2.4
  247. */
  248. int32_t nextString;
  249. /** String count
  250. * @stable ICU 2.4
  251. */
  252. int32_t stringCount;
  253. /**
  254. * Points to the string to use when the caller asks for a
  255. * string and the current iteration item is a code point, not a string.
  256. * @internal
  257. */
  258. UnicodeString *cpString;
  259. /** Copy constructor. Disallowed.
  260. * @stable ICU 2.4
  261. */
  262. UnicodeSetIterator(const UnicodeSetIterator&); // disallow
  263. /** Assignment operator. Disallowed.
  264. * @stable ICU 2.4
  265. */
  266. UnicodeSetIterator& operator=(const UnicodeSetIterator&); // disallow
  267. /** Load range
  268. * @stable ICU 2.4
  269. */
  270. virtual void loadRange(int32_t range);
  271. };
  272. inline UBool UnicodeSetIterator::isString() const {
  273. return codepoint == (UChar32)IS_STRING;
  274. }
  275. inline UChar32 UnicodeSetIterator::getCodepoint() const {
  276. return codepoint;
  277. }
  278. inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
  279. return codepointEnd;
  280. }
  281. U_NAMESPACE_END
  282. #endif