coll.h 56 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. * Copyright (C) 1996-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. ******************************************************************************
  8. */
  9. /**
  10. * \file
  11. * \brief C++ API: Collation Service.
  12. */
  13. /**
  14. * File coll.h
  15. *
  16. * Created by: Helena Shih
  17. *
  18. * Modification History:
  19. *
  20. * Date Name Description
  21. * 02/5/97 aliu Modified createDefault to load collation data from
  22. * binary files when possible. Added related methods
  23. * createCollationFromFile, chopLocale, createPathName.
  24. * 02/11/97 aliu Added members addToCache, findInCache, and fgCache.
  25. * 02/12/97 aliu Modified to create objects from RuleBasedCollator cache.
  26. * Moved cache out of Collation class.
  27. * 02/13/97 aliu Moved several methods out of this class and into
  28. * RuleBasedCollator, with modifications. Modified
  29. * createDefault() to call new RuleBasedCollator(Locale&)
  30. * constructor. General clean up and documentation.
  31. * 02/20/97 helena Added clone, operator==, operator!=, operator=, copy
  32. * constructor and getDynamicClassID.
  33. * 03/25/97 helena Updated with platform independent data types.
  34. * 05/06/97 helena Added memory allocation error detection.
  35. * 06/20/97 helena Java class name change.
  36. * 09/03/97 helena Added createCollationKeyValues().
  37. * 02/10/98 damiba Added compare() with length as parameter.
  38. * 04/23/99 stephen Removed EDecompositionMode, merged with
  39. * Normalizer::EMode.
  40. * 11/02/99 helena Collator performance enhancements. Eliminates the
  41. * UnicodeString construction and special case for NO_OP.
  42. * 11/23/99 srl More performance enhancements. Inlining of
  43. * critical accessors.
  44. * 05/15/00 helena Added version information API.
  45. * 01/29/01 synwee Modified into a C++ wrapper which calls C apis
  46. * (ucol.h).
  47. * 2012-2014 markus Rewritten in C++ again.
  48. */
  49. #ifndef COLL_H
  50. #define COLL_H
  51. #include "unicode/utypes.h"
  52. #if !UCONFIG_NO_COLLATION
  53. #include "unicode/uobject.h"
  54. #include "unicode/ucol.h"
  55. #include "unicode/normlzr.h"
  56. #include "unicode/locid.h"
  57. #include "unicode/uniset.h"
  58. #include "unicode/umisc.h"
  59. #include "unicode/uiter.h"
  60. #include "unicode/stringpiece.h"
  61. U_NAMESPACE_BEGIN
  62. class StringEnumeration;
  63. #if !UCONFIG_NO_SERVICE
  64. /**
  65. * @stable ICU 2.6
  66. */
  67. class CollatorFactory;
  68. #endif
  69. /**
  70. * @stable ICU 2.0
  71. */
  72. class CollationKey;
  73. /**
  74. * The <code>Collator</code> class performs locale-sensitive string
  75. * comparison.<br>
  76. * You use this class to build searching and sorting routines for natural
  77. * language text.
  78. * <p>
  79. * <code>Collator</code> is an abstract base class. Subclasses implement
  80. * specific collation strategies. One subclass,
  81. * <code>RuleBasedCollator</code>, is currently provided and is applicable
  82. * to a wide set of languages. Other subclasses may be created to handle more
  83. * specialized needs.
  84. * <p>
  85. * Like other locale-sensitive classes, you can use the static factory method,
  86. * <code>createInstance</code>, to obtain the appropriate
  87. * <code>Collator</code> object for a given locale. You will only need to
  88. * look at the subclasses of <code>Collator</code> if you need to
  89. * understand the details of a particular collation strategy or if you need to
  90. * modify that strategy.
  91. * <p>
  92. * The following example shows how to compare two strings using the
  93. * <code>Collator</code> for the default locale.
  94. * \htmlonly<blockquote>\endhtmlonly
  95. * <pre>
  96. * \code
  97. * // Compare two strings in the default locale
  98. * UErrorCode success = U_ZERO_ERROR;
  99. * Collator* myCollator = Collator::createInstance(success);
  100. * if (myCollator->compare("abc", "ABC") < 0)
  101. * cout << "abc is less than ABC" << endl;
  102. * else
  103. * cout << "abc is greater than or equal to ABC" << endl;
  104. * \endcode
  105. * </pre>
  106. * \htmlonly</blockquote>\endhtmlonly
  107. * <p>
  108. * You can set a <code>Collator</code>'s <em>strength</em> attribute to
  109. * determine the level of difference considered significant in comparisons.
  110. * Five strengths are provided: <code>PRIMARY</code>, <code>SECONDARY</code>,
  111. * <code>TERTIARY</code>, <code>QUATERNARY</code> and <code>IDENTICAL</code>.
  112. * The exact assignment of strengths to language features is locale dependent.
  113. * For example, in Czech, "e" and "f" are considered primary differences,
  114. * while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary
  115. * differences and "e" and "e" are identical. The following shows how both case
  116. * and accents could be ignored for US English.
  117. * \htmlonly<blockquote>\endhtmlonly
  118. * <pre>
  119. * \code
  120. * //Get the Collator for US English and set its strength to PRIMARY
  121. * UErrorCode success = U_ZERO_ERROR;
  122. * Collator* usCollator = Collator::createInstance(Locale::getUS(), success);
  123. * usCollator->setStrength(Collator::PRIMARY);
  124. * if (usCollator->compare("abc", "ABC") == 0)
  125. * cout << "'abc' and 'ABC' strings are equivalent with strength PRIMARY" << endl;
  126. * \endcode
  127. * </pre>
  128. * \htmlonly</blockquote>\endhtmlonly
  129. *
  130. * The <code>getSortKey</code> methods
  131. * convert a string to a series of bytes that can be compared bitwise against
  132. * other sort keys using <code>strcmp()</code>. Sort keys are written as
  133. * zero-terminated byte strings.
  134. *
  135. * Another set of APIs returns a <code>CollationKey</code> object that wraps
  136. * the sort key bytes instead of returning the bytes themselves.
  137. * </p>
  138. * <p>
  139. * <strong>Note:</strong> <code>Collator</code>s with different Locale,
  140. * and CollationStrength settings will return different sort
  141. * orders for the same set of strings. Locales have specific collation rules,
  142. * and the way in which secondary and tertiary differences are taken into
  143. * account, for example, will result in a different sorting order for same
  144. * strings.
  145. * </p>
  146. * @see RuleBasedCollator
  147. * @see CollationKey
  148. * @see CollationElementIterator
  149. * @see Locale
  150. * @see Normalizer
  151. * @version 2.0 11/15/01
  152. */
  153. class U_I18N_API Collator : public UObject {
  154. public:
  155. // Collator public enums -----------------------------------------------
  156. /**
  157. * Base letter represents a primary difference. Set comparison level to
  158. * PRIMARY to ignore secondary and tertiary differences.<br>
  159. * Use this to set the strength of a Collator object.<br>
  160. * Example of primary difference, "abc" &lt; "abd"
  161. *
  162. * Diacritical differences on the same base letter represent a secondary
  163. * difference. Set comparison level to SECONDARY to ignore tertiary
  164. * differences. Use this to set the strength of a Collator object.<br>
  165. * Example of secondary difference, "&auml;" >> "a".
  166. *
  167. * Uppercase and lowercase versions of the same character represents a
  168. * tertiary difference. Set comparison level to TERTIARY to include all
  169. * comparison differences. Use this to set the strength of a Collator
  170. * object.<br>
  171. * Example of tertiary difference, "abc" &lt;&lt;&lt; "ABC".
  172. *
  173. * Two characters are considered "identical" when they have the same unicode
  174. * spellings.<br>
  175. * For example, "&auml;" == "&auml;".
  176. *
  177. * UCollationStrength is also used to determine the strength of sort keys
  178. * generated from Collator objects.
  179. * @stable ICU 2.0
  180. */
  181. enum ECollationStrength
  182. {
  183. PRIMARY = UCOL_PRIMARY, // 0
  184. SECONDARY = UCOL_SECONDARY, // 1
  185. TERTIARY = UCOL_TERTIARY, // 2
  186. QUATERNARY = UCOL_QUATERNARY, // 3
  187. IDENTICAL = UCOL_IDENTICAL // 15
  188. };
  189. // Cannot use #ifndef U_HIDE_DEPRECATED_API for the following, it is
  190. // used by virtual methods that cannot have that conditional.
  191. /**
  192. * LESS is returned if source string is compared to be less than target
  193. * string in the compare() method.
  194. * EQUAL is returned if source string is compared to be equal to target
  195. * string in the compare() method.
  196. * GREATER is returned if source string is compared to be greater than
  197. * target string in the compare() method.
  198. * @see Collator#compare
  199. * @deprecated ICU 2.6. Use C enum UCollationResult defined in ucol.h
  200. */
  201. enum EComparisonResult
  202. {
  203. LESS = UCOL_LESS, // -1
  204. EQUAL = UCOL_EQUAL, // 0
  205. GREATER = UCOL_GREATER // 1
  206. };
  207. // Collator public destructor -----------------------------------------
  208. /**
  209. * Destructor
  210. * @stable ICU 2.0
  211. */
  212. virtual ~Collator();
  213. // Collator public methods --------------------------------------------
  214. /**
  215. * Returns TRUE if "other" is the same as "this".
  216. *
  217. * The base class implementation returns TRUE if "other" has the same type/class as "this":
  218. * <code>typeid(*this) == typeid(other)</code>.
  219. *
  220. * Subclass implementations should do something like the following:
  221. * <pre>
  222. * if (this == &other) { return TRUE; }
  223. * if (!Collator::operator==(other)) { return FALSE; } // not the same class
  224. *
  225. * const MyCollator &o = (const MyCollator&)other;
  226. * (compare this vs. o's subclass fields)
  227. * </pre>
  228. * @param other Collator object to be compared
  229. * @return TRUE if other is the same as this.
  230. * @stable ICU 2.0
  231. */
  232. virtual UBool operator==(const Collator& other) const;
  233. /**
  234. * Returns true if "other" is not the same as "this".
  235. * Calls ! operator==(const Collator&) const which works for all subclasses.
  236. * @param other Collator object to be compared
  237. * @return TRUE if other is not the same as this.
  238. * @stable ICU 2.0
  239. */
  240. virtual UBool operator!=(const Collator& other) const;
  241. /**
  242. * Makes a copy of this object.
  243. * @return a copy of this object, owned by the caller
  244. * @stable ICU 2.0
  245. */
  246. virtual Collator* clone(void) const = 0;
  247. /**
  248. * Creates the Collator object for the current default locale.
  249. * The default locale is determined by Locale::getDefault.
  250. * The UErrorCode& err parameter is used to return status information to the user.
  251. * To check whether the construction succeeded or not, you should check the
  252. * value of U_SUCCESS(err). If you wish more detailed information, you can
  253. * check for informational error results which still indicate success.
  254. * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
  255. * example, 'de_CH' was requested, but nothing was found there, so 'de' was
  256. * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
  257. * used; neither the requested locale nor any of its fall back locales
  258. * could be found.
  259. * The caller owns the returned object and is responsible for deleting it.
  260. *
  261. * @param err the error code status.
  262. * @return the collation object of the default locale.(for example, en_US)
  263. * @see Locale#getDefault
  264. * @stable ICU 2.0
  265. */
  266. static Collator* U_EXPORT2 createInstance(UErrorCode& err);
  267. /**
  268. * Gets the collation object for the desired locale. The
  269. * resource of the desired locale will be loaded.
  270. *
  271. * Locale::getRoot() is the base collation table and all other languages are
  272. * built on top of it with additional language-specific modifications.
  273. *
  274. * For some languages, multiple collation types are available;
  275. * for example, "de@collation=phonebook".
  276. * Starting with ICU 54, collation attributes can be specified via locale keywords as well,
  277. * in the old locale extension syntax ("el@colCaseFirst=upper")
  278. * or in language tag syntax ("el-u-kf-upper").
  279. * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>.
  280. *
  281. * The UErrorCode& err parameter is used to return status information to the user.
  282. * To check whether the construction succeeded or not, you should check
  283. * the value of U_SUCCESS(err). If you wish more detailed information, you
  284. * can check for informational error results which still indicate success.
  285. * U_USING_FALLBACK_ERROR indicates that a fall back locale was used. For
  286. * example, 'de_CH' was requested, but nothing was found there, so 'de' was
  287. * used. U_USING_DEFAULT_ERROR indicates that the default locale data was
  288. * used; neither the requested locale nor any of its fall back locales
  289. * could be found.
  290. *
  291. * The caller owns the returned object and is responsible for deleting it.
  292. * @param loc The locale ID for which to open a collator.
  293. * @param err the error code status.
  294. * @return the created table-based collation object based on the desired
  295. * locale.
  296. * @see Locale
  297. * @see ResourceLoader
  298. * @stable ICU 2.2
  299. */
  300. static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
  301. /**
  302. * The comparison function compares the character data stored in two
  303. * different strings. Returns information about whether a string is less
  304. * than, greater than or equal to another string.
  305. * @param source the source string to be compared with.
  306. * @param target the string that is to be compared with the source string.
  307. * @return Returns a byte value. GREATER if source is greater
  308. * than target; EQUAL if source is equal to target; LESS if source is less
  309. * than target
  310. * @deprecated ICU 2.6 use the overload with UErrorCode &
  311. */
  312. virtual EComparisonResult compare(const UnicodeString& source,
  313. const UnicodeString& target) const;
  314. /**
  315. * The comparison function compares the character data stored in two
  316. * different strings. Returns information about whether a string is less
  317. * than, greater than or equal to another string.
  318. * @param source the source string to be compared with.
  319. * @param target the string that is to be compared with the source string.
  320. * @param status possible error code
  321. * @return Returns an enum value. UCOL_GREATER if source is greater
  322. * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
  323. * than target
  324. * @stable ICU 2.6
  325. */
  326. virtual UCollationResult compare(const UnicodeString& source,
  327. const UnicodeString& target,
  328. UErrorCode &status) const = 0;
  329. /**
  330. * Does the same thing as compare but limits the comparison to a specified
  331. * length
  332. * @param source the source string to be compared with.
  333. * @param target the string that is to be compared with the source string.
  334. * @param length the length the comparison is limited to
  335. * @return Returns a byte value. GREATER if source (up to the specified
  336. * length) is greater than target; EQUAL if source (up to specified
  337. * length) is equal to target; LESS if source (up to the specified
  338. * length) is less than target.
  339. * @deprecated ICU 2.6 use the overload with UErrorCode &
  340. */
  341. virtual EComparisonResult compare(const UnicodeString& source,
  342. const UnicodeString& target,
  343. int32_t length) const;
  344. /**
  345. * Does the same thing as compare but limits the comparison to a specified
  346. * length
  347. * @param source the source string to be compared with.
  348. * @param target the string that is to be compared with the source string.
  349. * @param length the length the comparison is limited to
  350. * @param status possible error code
  351. * @return Returns an enum value. UCOL_GREATER if source (up to the specified
  352. * length) is greater than target; UCOL_EQUAL if source (up to specified
  353. * length) is equal to target; UCOL_LESS if source (up to the specified
  354. * length) is less than target.
  355. * @stable ICU 2.6
  356. */
  357. virtual UCollationResult compare(const UnicodeString& source,
  358. const UnicodeString& target,
  359. int32_t length,
  360. UErrorCode &status) const = 0;
  361. /**
  362. * The comparison function compares the character data stored in two
  363. * different string arrays. Returns information about whether a string array
  364. * is less than, greater than or equal to another string array.
  365. * <p>Example of use:
  366. * <pre>
  367. * . UChar ABC[] = {0x41, 0x42, 0x43, 0}; // = "ABC"
  368. * . UChar abc[] = {0x61, 0x62, 0x63, 0}; // = "abc"
  369. * . UErrorCode status = U_ZERO_ERROR;
  370. * . Collator *myCollation =
  371. * . Collator::createInstance(Locale::getUS(), status);
  372. * . if (U_FAILURE(status)) return;
  373. * . myCollation->setStrength(Collator::PRIMARY);
  374. * . // result would be Collator::EQUAL ("abc" == "ABC")
  375. * . // (no primary difference between "abc" and "ABC")
  376. * . Collator::EComparisonResult result =
  377. * . myCollation->compare(abc, 3, ABC, 3);
  378. * . myCollation->setStrength(Collator::TERTIARY);
  379. * . // result would be Collator::LESS ("abc" &lt;&lt;&lt; "ABC")
  380. * . // (with tertiary difference between "abc" and "ABC")
  381. * . result = myCollation->compare(abc, 3, ABC, 3);
  382. * </pre>
  383. * @param source the source string array to be compared with.
  384. * @param sourceLength the length of the source string array. If this value
  385. * is equal to -1, the string array is null-terminated.
  386. * @param target the string that is to be compared with the source string.
  387. * @param targetLength the length of the target string array. If this value
  388. * is equal to -1, the string array is null-terminated.
  389. * @return Returns a byte value. GREATER if source is greater than target;
  390. * EQUAL if source is equal to target; LESS if source is less than
  391. * target
  392. * @deprecated ICU 2.6 use the overload with UErrorCode &
  393. */
  394. virtual EComparisonResult compare(const UChar* source, int32_t sourceLength,
  395. const UChar* target, int32_t targetLength)
  396. const;
  397. /**
  398. * The comparison function compares the character data stored in two
  399. * different string arrays. Returns information about whether a string array
  400. * is less than, greater than or equal to another string array.
  401. * @param source the source string array to be compared with.
  402. * @param sourceLength the length of the source string array. If this value
  403. * is equal to -1, the string array is null-terminated.
  404. * @param target the string that is to be compared with the source string.
  405. * @param targetLength the length of the target string array. If this value
  406. * is equal to -1, the string array is null-terminated.
  407. * @param status possible error code
  408. * @return Returns an enum value. UCOL_GREATER if source is greater
  409. * than target; UCOL_EQUAL if source is equal to target; UCOL_LESS if source is less
  410. * than target
  411. * @stable ICU 2.6
  412. */
  413. virtual UCollationResult compare(const UChar* source, int32_t sourceLength,
  414. const UChar* target, int32_t targetLength,
  415. UErrorCode &status) const = 0;
  416. /**
  417. * Compares two strings using the Collator.
  418. * Returns whether the first one compares less than/equal to/greater than
  419. * the second one.
  420. * This version takes UCharIterator input.
  421. * @param sIter the first ("source") string iterator
  422. * @param tIter the second ("target") string iterator
  423. * @param status ICU status
  424. * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
  425. * @stable ICU 4.2
  426. */
  427. virtual UCollationResult compare(UCharIterator &sIter,
  428. UCharIterator &tIter,
  429. UErrorCode &status) const;
  430. /**
  431. * Compares two UTF-8 strings using the Collator.
  432. * Returns whether the first one compares less than/equal to/greater than
  433. * the second one.
  434. * This version takes UTF-8 input.
  435. * Note that a StringPiece can be implicitly constructed
  436. * from a std::string or a NUL-terminated const char * string.
  437. * @param source the first UTF-8 string
  438. * @param target the second UTF-8 string
  439. * @param status ICU status
  440. * @return UCOL_LESS, UCOL_EQUAL or UCOL_GREATER
  441. * @stable ICU 4.2
  442. */
  443. virtual UCollationResult compareUTF8(const StringPiece &source,
  444. const StringPiece &target,
  445. UErrorCode &status) const;
  446. /**
  447. * Transforms the string into a series of characters that can be compared
  448. * with CollationKey::compareTo. It is not possible to restore the original
  449. * string from the chars in the sort key.
  450. * <p>Use CollationKey::equals or CollationKey::compare to compare the
  451. * generated sort keys.
  452. * If the source string is null, a null collation key will be returned.
  453. *
  454. * Note that sort keys are often less efficient than simply doing comparison.
  455. * For more details, see the ICU User Guide.
  456. *
  457. * @param source the source string to be transformed into a sort key.
  458. * @param key the collation key to be filled in
  459. * @param status the error code status.
  460. * @return the collation key of the string based on the collation rules.
  461. * @see CollationKey#compare
  462. * @stable ICU 2.0
  463. */
  464. virtual CollationKey& getCollationKey(const UnicodeString& source,
  465. CollationKey& key,
  466. UErrorCode& status) const = 0;
  467. /**
  468. * Transforms the string into a series of characters that can be compared
  469. * with CollationKey::compareTo. It is not possible to restore the original
  470. * string from the chars in the sort key.
  471. * <p>Use CollationKey::equals or CollationKey::compare to compare the
  472. * generated sort keys.
  473. * <p>If the source string is null, a null collation key will be returned.
  474. *
  475. * Note that sort keys are often less efficient than simply doing comparison.
  476. * For more details, see the ICU User Guide.
  477. *
  478. * @param source the source string to be transformed into a sort key.
  479. * @param sourceLength length of the collation key
  480. * @param key the collation key to be filled in
  481. * @param status the error code status.
  482. * @return the collation key of the string based on the collation rules.
  483. * @see CollationKey#compare
  484. * @stable ICU 2.0
  485. */
  486. virtual CollationKey& getCollationKey(const UChar*source,
  487. int32_t sourceLength,
  488. CollationKey& key,
  489. UErrorCode& status) const = 0;
  490. /**
  491. * Generates the hash code for the collation object
  492. * @stable ICU 2.0
  493. */
  494. virtual int32_t hashCode(void) const = 0;
  495. /**
  496. * Gets the locale of the Collator
  497. *
  498. * @param type can be either requested, valid or actual locale. For more
  499. * information see the definition of ULocDataLocaleType in
  500. * uloc.h
  501. * @param status the error code status.
  502. * @return locale where the collation data lives. If the collator
  503. * was instantiated from rules, locale is empty.
  504. * @deprecated ICU 2.8 This API is under consideration for revision
  505. * in ICU 3.0.
  506. */
  507. virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
  508. /**
  509. * Convenience method for comparing two strings based on the collation rules.
  510. * @param source the source string to be compared with.
  511. * @param target the target string to be compared with.
  512. * @return true if the first string is greater than the second one,
  513. * according to the collation rules. false, otherwise.
  514. * @see Collator#compare
  515. * @stable ICU 2.0
  516. */
  517. UBool greater(const UnicodeString& source, const UnicodeString& target)
  518. const;
  519. /**
  520. * Convenience method for comparing two strings based on the collation rules.
  521. * @param source the source string to be compared with.
  522. * @param target the target string to be compared with.
  523. * @return true if the first string is greater than or equal to the second
  524. * one, according to the collation rules. false, otherwise.
  525. * @see Collator#compare
  526. * @stable ICU 2.0
  527. */
  528. UBool greaterOrEqual(const UnicodeString& source,
  529. const UnicodeString& target) const;
  530. /**
  531. * Convenience method for comparing two strings based on the collation rules.
  532. * @param source the source string to be compared with.
  533. * @param target the target string to be compared with.
  534. * @return true if the strings are equal according to the collation rules.
  535. * false, otherwise.
  536. * @see Collator#compare
  537. * @stable ICU 2.0
  538. */
  539. UBool equals(const UnicodeString& source, const UnicodeString& target) const;
  540. /**
  541. * Determines the minimum strength that will be used in comparison or
  542. * transformation.
  543. * <p>E.g. with strength == SECONDARY, the tertiary difference is ignored
  544. * <p>E.g. with strength == PRIMARY, the secondary and tertiary difference
  545. * are ignored.
  546. * @return the current comparison level.
  547. * @see Collator#setStrength
  548. * @deprecated ICU 2.6 Use getAttribute(UCOL_STRENGTH...) instead
  549. */
  550. virtual ECollationStrength getStrength(void) const;
  551. /**
  552. * Sets the minimum strength to be used in comparison or transformation.
  553. * <p>Example of use:
  554. * <pre>
  555. * \code
  556. * UErrorCode status = U_ZERO_ERROR;
  557. * Collator*myCollation = Collator::createInstance(Locale::getUS(), status);
  558. * if (U_FAILURE(status)) return;
  559. * myCollation->setStrength(Collator::PRIMARY);
  560. * // result will be "abc" == "ABC"
  561. * // tertiary differences will be ignored
  562. * Collator::ComparisonResult result = myCollation->compare("abc", "ABC");
  563. * \endcode
  564. * </pre>
  565. * @see Collator#getStrength
  566. * @param newStrength the new comparison level.
  567. * @deprecated ICU 2.6 Use setAttribute(UCOL_STRENGTH...) instead
  568. */
  569. virtual void setStrength(ECollationStrength newStrength);
  570. /**
  571. * Retrieves the reordering codes for this collator.
  572. * @param dest The array to fill with the script ordering.
  573. * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function
  574. * will only return the length of the result without writing any codes (pre-flighting).
  575. * @param status A reference to an error code value, which must not indicate
  576. * a failure before the function call.
  577. * @return The length of the script ordering array.
  578. * @see ucol_setReorderCodes
  579. * @see Collator#getEquivalentReorderCodes
  580. * @see Collator#setReorderCodes
  581. * @see UScriptCode
  582. * @see UColReorderCode
  583. * @stable ICU 4.8
  584. */
  585. virtual int32_t getReorderCodes(int32_t *dest,
  586. int32_t destCapacity,
  587. UErrorCode& status) const;
  588. /**
  589. * Sets the ordering of scripts for this collator.
  590. *
  591. * <p>The reordering codes are a combination of script codes and reorder codes.
  592. * @param reorderCodes An array of script codes in the new order. This can be NULL if the
  593. * length is also set to 0. An empty array will clear any reordering codes on the collator.
  594. * @param reorderCodesLength The length of reorderCodes.
  595. * @param status error code
  596. * @see ucol_setReorderCodes
  597. * @see Collator#getReorderCodes
  598. * @see Collator#getEquivalentReorderCodes
  599. * @see UScriptCode
  600. * @see UColReorderCode
  601. * @stable ICU 4.8
  602. */
  603. virtual void setReorderCodes(const int32_t* reorderCodes,
  604. int32_t reorderCodesLength,
  605. UErrorCode& status) ;
  606. /**
  607. * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder
  608. * codes will be grouped and must reorder together.
  609. * Beginning with ICU 55, scripts only reorder together if they are primary-equal,
  610. * for example Hiragana and Katakana.
  611. *
  612. * @param reorderCode The reorder code to determine equivalence for.
  613. * @param dest The array to fill with the script equivalence reordering codes.
  614. * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the
  615. * function will only return the length of the result without writing any codes (pre-flighting).
  616. * @param status A reference to an error code value, which must not indicate
  617. * a failure before the function call.
  618. * @return The length of the of the reordering code equivalence array.
  619. * @see ucol_setReorderCodes
  620. * @see Collator#getReorderCodes
  621. * @see Collator#setReorderCodes
  622. * @see UScriptCode
  623. * @see UColReorderCode
  624. * @stable ICU 4.8
  625. */
  626. static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
  627. int32_t* dest,
  628. int32_t destCapacity,
  629. UErrorCode& status);
  630. /**
  631. * Get name of the object for the desired Locale, in the desired langauge
  632. * @param objectLocale must be from getAvailableLocales
  633. * @param displayLocale specifies the desired locale for output
  634. * @param name the fill-in parameter of the return value
  635. * @return display-able name of the object for the object locale in the
  636. * desired language
  637. * @stable ICU 2.0
  638. */
  639. static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
  640. const Locale& displayLocale,
  641. UnicodeString& name);
  642. /**
  643. * Get name of the object for the desired Locale, in the langauge of the
  644. * default locale.
  645. * @param objectLocale must be from getAvailableLocales
  646. * @param name the fill-in parameter of the return value
  647. * @return name of the object for the desired locale in the default language
  648. * @stable ICU 2.0
  649. */
  650. static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
  651. UnicodeString& name);
  652. /**
  653. * Get the set of Locales for which Collations are installed.
  654. *
  655. * <p>Note this does not include locales supported by registered collators.
  656. * If collators might have been registered, use the overload of getAvailableLocales
  657. * that returns a StringEnumeration.</p>
  658. *
  659. * @param count the output parameter of number of elements in the locale list
  660. * @return the list of available locales for which collations are installed
  661. * @stable ICU 2.0
  662. */
  663. static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
  664. /**
  665. * Return a StringEnumeration over the locales available at the time of the call,
  666. * including registered locales. If a severe error occurs (such as out of memory
  667. * condition) this will return null. If there is no locale data, an empty enumeration
  668. * will be returned.
  669. * @return a StringEnumeration over the locales available at the time of the call
  670. * @stable ICU 2.6
  671. */
  672. static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
  673. /**
  674. * Create a string enumerator of all possible keywords that are relevant to
  675. * collation. At this point, the only recognized keyword for this
  676. * service is "collation".
  677. * @param status input-output error code
  678. * @return a string enumeration over locale strings. The caller is
  679. * responsible for closing the result.
  680. * @stable ICU 3.0
  681. */
  682. static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
  683. /**
  684. * Given a keyword, create a string enumeration of all values
  685. * for that keyword that are currently in use.
  686. * @param keyword a particular keyword as enumerated by
  687. * ucol_getKeywords. If any other keyword is passed in, status is set
  688. * to U_ILLEGAL_ARGUMENT_ERROR.
  689. * @param status input-output error code
  690. * @return a string enumeration over collation keyword values, or NULL
  691. * upon error. The caller is responsible for deleting the result.
  692. * @stable ICU 3.0
  693. */
  694. static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
  695. /**
  696. * Given a key and a locale, returns an array of string values in a preferred
  697. * order that would make a difference. These are all and only those values where
  698. * the open (creation) of the service with the locale formed from the input locale
  699. * plus input keyword and that value has different behavior than creation with the
  700. * input locale alone.
  701. * @param keyword one of the keys supported by this service. For now, only
  702. * "collation" is supported.
  703. * @param locale the locale
  704. * @param commonlyUsed if set to true it will return only commonly used values
  705. * with the given locale in preferred order. Otherwise,
  706. * it will return all the available values for the locale.
  707. * @param status ICU status
  708. * @return a string enumeration over keyword values for the given key and the locale.
  709. * @stable ICU 4.2
  710. */
  711. static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale,
  712. UBool commonlyUsed, UErrorCode& status);
  713. /**
  714. * Return the functionally equivalent locale for the given
  715. * requested locale, with respect to given keyword, for the
  716. * collation service. If two locales return the same result, then
  717. * collators instantiated for these locales will behave
  718. * equivalently. The converse is not always true; two collators
  719. * may in fact be equivalent, but return different results, due to
  720. * internal details. The return result has no other meaning than
  721. * that stated above, and implies nothing as to the relationship
  722. * between the two locales. This is intended for use by
  723. * applications who wish to cache collators, or otherwise reuse
  724. * collators when possible. The functional equivalent may change
  725. * over time. For more information, please see the <a
  726. * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services">
  727. * Locales and Services</a> section of the ICU User Guide.
  728. * @param keyword a particular keyword as enumerated by
  729. * ucol_getKeywords.
  730. * @param locale the requested locale
  731. * @param isAvailable reference to a fillin parameter that
  732. * indicates whether the requested locale was 'available' to the
  733. * collation service. A locale is defined as 'available' if it
  734. * physically exists within the collation locale data.
  735. * @param status reference to input-output error code
  736. * @return the functionally equivalent collation locale, or the root
  737. * locale upon error.
  738. * @stable ICU 3.0
  739. */
  740. static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
  741. UBool& isAvailable, UErrorCode& status);
  742. #if !UCONFIG_NO_SERVICE
  743. /**
  744. * Register a new Collator. The collator will be adopted.
  745. * Because ICU may choose to cache collators internally, this must be
  746. * called at application startup, prior to any calls to
  747. * Collator::createInstance to avoid undefined behavior.
  748. * @param toAdopt the Collator instance to be adopted
  749. * @param locale the locale with which the collator will be associated
  750. * @param status the in/out status code, no special meanings are assigned
  751. * @return a registry key that can be used to unregister this collator
  752. * @stable ICU 2.6
  753. */
  754. static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
  755. /**
  756. * Register a new CollatorFactory. The factory will be adopted.
  757. * Because ICU may choose to cache collators internally, this must be
  758. * called at application startup, prior to any calls to
  759. * Collator::createInstance to avoid undefined behavior.
  760. * @param toAdopt the CollatorFactory instance to be adopted
  761. * @param status the in/out status code, no special meanings are assigned
  762. * @return a registry key that can be used to unregister this collator
  763. * @stable ICU 2.6
  764. */
  765. static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
  766. /**
  767. * Unregister a previously-registered Collator or CollatorFactory
  768. * using the key returned from the register call. Key becomes
  769. * invalid after a successful call and should not be used again.
  770. * The object corresponding to the key will be deleted.
  771. * Because ICU may choose to cache collators internally, this should
  772. * be called during application shutdown, after all calls to
  773. * Collator::createInstance to avoid undefined behavior.
  774. * @param key the registry key returned by a previous call to registerInstance
  775. * @param status the in/out status code, no special meanings are assigned
  776. * @return TRUE if the collator for the key was successfully unregistered
  777. * @stable ICU 2.6
  778. */
  779. static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
  780. #endif /* UCONFIG_NO_SERVICE */
  781. /**
  782. * Gets the version information for a Collator.
  783. * @param info the version # information, the result will be filled in
  784. * @stable ICU 2.0
  785. */
  786. virtual void getVersion(UVersionInfo info) const = 0;
  787. /**
  788. * Returns a unique class ID POLYMORPHICALLY. Pure virtual method.
  789. * This method is to implement a simple version of RTTI, since not all C++
  790. * compilers support genuine RTTI. Polymorphic operator==() and clone()
  791. * methods call this method.
  792. * @return The class ID for this object. All objects of a given class have
  793. * the same class ID. Objects of other classes have different class
  794. * IDs.
  795. * @stable ICU 2.0
  796. */
  797. virtual UClassID getDynamicClassID(void) const = 0;
  798. /**
  799. * Universal attribute setter
  800. * @param attr attribute type
  801. * @param value attribute value
  802. * @param status to indicate whether the operation went on smoothly or
  803. * there were errors
  804. * @stable ICU 2.2
  805. */
  806. virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
  807. UErrorCode &status) = 0;
  808. /**
  809. * Universal attribute getter
  810. * @param attr attribute type
  811. * @param status to indicate whether the operation went on smoothly or
  812. * there were errors
  813. * @return attribute value
  814. * @stable ICU 2.2
  815. */
  816. virtual UColAttributeValue getAttribute(UColAttribute attr,
  817. UErrorCode &status) const = 0;
  818. /**
  819. * Sets the variable top to the top of the specified reordering group.
  820. * The variable top determines the highest-sorting character
  821. * which is affected by UCOL_ALTERNATE_HANDLING.
  822. * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect.
  823. *
  824. * The base class implementation sets U_UNSUPPORTED_ERROR.
  825. * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION,
  826. * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY;
  827. * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group
  828. * @param errorCode Standard ICU error code. Its input value must
  829. * pass the U_SUCCESS() test, or else the function returns
  830. * immediately. Check for U_FAILURE() on output or use with
  831. * function chaining. (See User Guide for details.)
  832. * @return *this
  833. * @see getMaxVariable
  834. * @stable ICU 53
  835. */
  836. virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
  837. /**
  838. * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
  839. *
  840. * The base class implementation returns UCOL_REORDER_CODE_PUNCTUATION.
  841. * @return the maximum variable reordering group.
  842. * @see setMaxVariable
  843. * @stable ICU 53
  844. */
  845. virtual UColReorderCode getMaxVariable() const;
  846. /**
  847. * Sets the variable top to the primary weight of the specified string.
  848. *
  849. * Beginning with ICU 53, the variable top is pinned to
  850. * the top of one of the supported reordering groups,
  851. * and it must not be beyond the last of those groups.
  852. * See setMaxVariable().
  853. * @param varTop one or more (if contraction) UChars to which the variable top should be set
  854. * @param len length of variable top string. If -1 it is considered to be zero terminated.
  855. * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
  856. * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
  857. * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
  858. * the last reordering group supported by setMaxVariable()
  859. * @return variable top primary weight
  860. * @deprecated ICU 53 Call setMaxVariable() instead.
  861. */
  862. virtual uint32_t setVariableTop(const UChar *varTop, int32_t len, UErrorCode &status) = 0;
  863. /**
  864. * Sets the variable top to the primary weight of the specified string.
  865. *
  866. * Beginning with ICU 53, the variable top is pinned to
  867. * the top of one of the supported reordering groups,
  868. * and it must not be beyond the last of those groups.
  869. * See setMaxVariable().
  870. * @param varTop a UnicodeString size 1 or more (if contraction) of UChars to which the variable top should be set
  871. * @param status error code. If error code is set, the return value is undefined. Errors set by this function are: <br>
  872. * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br>
  873. * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond
  874. * the last reordering group supported by setMaxVariable()
  875. * @return variable top primary weight
  876. * @deprecated ICU 53 Call setMaxVariable() instead.
  877. */
  878. virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0;
  879. /**
  880. * Sets the variable top to the specified primary weight.
  881. *
  882. * Beginning with ICU 53, the variable top is pinned to
  883. * the top of one of the supported reordering groups,
  884. * and it must not be beyond the last of those groups.
  885. * See setMaxVariable().
  886. * @param varTop primary weight, as returned by setVariableTop or ucol_getVariableTop
  887. * @param status error code
  888. * @deprecated ICU 53 Call setMaxVariable() instead.
  889. */
  890. virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0;
  891. /**
  892. * Gets the variable top value of a Collator.
  893. * @param status error code (not changed by function). If error code is set, the return value is undefined.
  894. * @return the variable top primary weight
  895. * @see getMaxVariable
  896. * @stable ICU 2.0
  897. */
  898. virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
  899. /**
  900. * Get a UnicodeSet that contains all the characters and sequences
  901. * tailored in this collator.
  902. * @param status error code of the operation
  903. * @return a pointer to a UnicodeSet object containing all the
  904. * code points and sequences that may sort differently than
  905. * in the root collator. The object must be disposed of by using delete
  906. * @stable ICU 2.4
  907. */
  908. virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
  909. /**
  910. * Same as clone().
  911. * The base class implementation simply calls clone().
  912. * @return a copy of this object, owned by the caller
  913. * @see clone()
  914. * @deprecated ICU 50 no need to have two methods for cloning
  915. */
  916. virtual Collator* safeClone(void) const;
  917. /**
  918. * Get the sort key as an array of bytes from a UnicodeString.
  919. * Sort key byte arrays are zero-terminated and can be compared using
  920. * strcmp().
  921. *
  922. * Note that sort keys are often less efficient than simply doing comparison.
  923. * For more details, see the ICU User Guide.
  924. *
  925. * @param source string to be processed.
  926. * @param result buffer to store result in. If NULL, number of bytes needed
  927. * will be returned.
  928. * @param resultLength length of the result buffer. If if not enough the
  929. * buffer will be filled to capacity.
  930. * @return Number of bytes needed for storing the sort key
  931. * @stable ICU 2.2
  932. */
  933. virtual int32_t getSortKey(const UnicodeString& source,
  934. uint8_t* result,
  935. int32_t resultLength) const = 0;
  936. /**
  937. * Get the sort key as an array of bytes from a UChar buffer.
  938. * Sort key byte arrays are zero-terminated and can be compared using
  939. * strcmp().
  940. *
  941. * Note that sort keys are often less efficient than simply doing comparison.
  942. * For more details, see the ICU User Guide.
  943. *
  944. * @param source string to be processed.
  945. * @param sourceLength length of string to be processed.
  946. * If -1, the string is 0 terminated and length will be decided by the
  947. * function.
  948. * @param result buffer to store result in. If NULL, number of bytes needed
  949. * will be returned.
  950. * @param resultLength length of the result buffer. If if not enough the
  951. * buffer will be filled to capacity.
  952. * @return Number of bytes needed for storing the sort key
  953. * @stable ICU 2.2
  954. */
  955. virtual int32_t getSortKey(const UChar*source, int32_t sourceLength,
  956. uint8_t*result, int32_t resultLength) const = 0;
  957. /**
  958. * Produce a bound for a given sortkey and a number of levels.
  959. * Return value is always the number of bytes needed, regardless of
  960. * whether the result buffer was big enough or even valid.<br>
  961. * Resulting bounds can be used to produce a range of strings that are
  962. * between upper and lower bounds. For example, if bounds are produced
  963. * for a sortkey of string "smith", strings between upper and lower
  964. * bounds with one level would include "Smith", "SMITH", "sMiTh".<br>
  965. * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER
  966. * is produced, strings matched would be as above. However, if bound
  967. * produced using UCOL_BOUND_UPPER_LONG is used, the above example will
  968. * also match "Smithsonian" and similar.<br>
  969. * For more on usage, see example in cintltst/capitst.c in procedure
  970. * TestBounds.
  971. * Sort keys may be compared using <TT>strcmp</TT>.
  972. * @param source The source sortkey.
  973. * @param sourceLength The length of source, or -1 if null-terminated.
  974. * (If an unmodified sortkey is passed, it is always null
  975. * terminated).
  976. * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which
  977. * produces a lower inclusive bound, UCOL_BOUND_UPPER, that
  978. * produces upper bound that matches strings of the same length
  979. * or UCOL_BOUND_UPPER_LONG that matches strings that have the
  980. * same starting substring as the source string.
  981. * @param noOfLevels Number of levels required in the resulting bound (for most
  982. * uses, the recommended value is 1). See users guide for
  983. * explanation on number of levels a sortkey can have.
  984. * @param result A pointer to a buffer to receive the resulting sortkey.
  985. * @param resultLength The maximum size of result.
  986. * @param status Used for returning error code if something went wrong. If the
  987. * number of levels requested is higher than the number of levels
  988. * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is
  989. * issued.
  990. * @return The size needed to fully store the bound.
  991. * @see ucol_keyHashCode
  992. * @stable ICU 2.1
  993. */
  994. static int32_t U_EXPORT2 getBound(const uint8_t *source,
  995. int32_t sourceLength,
  996. UColBoundMode boundType,
  997. uint32_t noOfLevels,
  998. uint8_t *result,
  999. int32_t resultLength,
  1000. UErrorCode &status);
  1001. protected:
  1002. // Collator protected constructors -------------------------------------
  1003. /**
  1004. * Default constructor.
  1005. * Constructor is different from the old default Collator constructor.
  1006. * The task for determing the default collation strength and normalization
  1007. * mode is left to the child class.
  1008. * @stable ICU 2.0
  1009. */
  1010. Collator();
  1011. #ifndef U_HIDE_DEPRECATED_API
  1012. /**
  1013. * Constructor.
  1014. * Empty constructor, does not handle the arguments.
  1015. * This constructor is done for backward compatibility with 1.7 and 1.8.
  1016. * The task for handling the argument collation strength and normalization
  1017. * mode is left to the child class.
  1018. * @param collationStrength collation strength
  1019. * @param decompositionMode
  1020. * @deprecated ICU 2.4. Subclasses should use the default constructor
  1021. * instead and handle the strength and normalization mode themselves.
  1022. */
  1023. Collator(UCollationStrength collationStrength,
  1024. UNormalizationMode decompositionMode);
  1025. #endif /* U_HIDE_DEPRECATED_API */
  1026. /**
  1027. * Copy constructor.
  1028. * @param other Collator object to be copied from
  1029. * @stable ICU 2.0
  1030. */
  1031. Collator(const Collator& other);
  1032. public:
  1033. /**
  1034. * Used internally by registration to define the requested and valid locales.
  1035. * @param requestedLocale the requested locale
  1036. * @param validLocale the valid locale
  1037. * @param actualLocale the actual locale
  1038. * @internal
  1039. */
  1040. virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
  1041. /** Get the short definition string for a collator. This internal API harvests the collator's
  1042. * locale and the attribute set and produces a string that can be used for opening
  1043. * a collator with the same attributes using the ucol_openFromShortString API.
  1044. * This string will be normalized.
  1045. * The structure and the syntax of the string is defined in the "Naming collators"
  1046. * section of the users guide:
  1047. * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme
  1048. * This function supports preflighting.
  1049. *
  1050. * This is internal, and intended to be used with delegate converters.
  1051. *
  1052. * @param locale a locale that will appear as a collators locale in the resulting
  1053. * short string definition. If NULL, the locale will be harvested
  1054. * from the collator.
  1055. * @param buffer space to hold the resulting string
  1056. * @param capacity capacity of the buffer
  1057. * @param status for returning errors. All the preflighting errors are featured
  1058. * @return length of the resulting string
  1059. * @see ucol_openFromShortString
  1060. * @see ucol_normalizeShortDefinitionString
  1061. * @see ucol_getShortDefinitionString
  1062. * @internal
  1063. */
  1064. virtual int32_t internalGetShortDefinitionString(const char *locale,
  1065. char *buffer,
  1066. int32_t capacity,
  1067. UErrorCode &status) const;
  1068. /**
  1069. * Implements ucol_strcollUTF8().
  1070. * @internal
  1071. */
  1072. virtual UCollationResult internalCompareUTF8(
  1073. const char *left, int32_t leftLength,
  1074. const char *right, int32_t rightLength,
  1075. UErrorCode &errorCode) const;
  1076. /**
  1077. * Implements ucol_nextSortKeyPart().
  1078. * @internal
  1079. */
  1080. virtual int32_t
  1081. internalNextSortKeyPart(
  1082. UCharIterator *iter, uint32_t state[2],
  1083. uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
  1084. #ifndef U_HIDE_INTERNAL_API
  1085. /** @internal */
  1086. static inline Collator *fromUCollator(UCollator *uc) {
  1087. return reinterpret_cast<Collator *>(uc);
  1088. }
  1089. /** @internal */
  1090. static inline const Collator *fromUCollator(const UCollator *uc) {
  1091. return reinterpret_cast<const Collator *>(uc);
  1092. }
  1093. /** @internal */
  1094. inline UCollator *toUCollator() {
  1095. return reinterpret_cast<UCollator *>(this);
  1096. }
  1097. /** @internal */
  1098. inline const UCollator *toUCollator() const {
  1099. return reinterpret_cast<const UCollator *>(this);
  1100. }
  1101. #endif // U_HIDE_INTERNAL_API
  1102. private:
  1103. /**
  1104. * Assignment operator. Private for now.
  1105. */
  1106. Collator& operator=(const Collator& other);
  1107. friend class CFactory;
  1108. friend class SimpleCFactory;
  1109. friend class ICUCollatorFactory;
  1110. friend class ICUCollatorService;
  1111. static Collator* makeInstance(const Locale& desiredLocale,
  1112. UErrorCode& status);
  1113. };
  1114. #if !UCONFIG_NO_SERVICE
  1115. /**
  1116. * A factory, used with registerFactory, the creates multiple collators and provides
  1117. * display names for them. A factory supports some number of locales-- these are the
  1118. * locales for which it can create collators. The factory can be visible, in which
  1119. * case the supported locales will be enumerated by getAvailableLocales, or invisible,
  1120. * in which they are not. Invisible locales are still supported, they are just not
  1121. * listed by getAvailableLocales.
  1122. * <p>
  1123. * If standard locale display names are sufficient, Collator instances can
  1124. * be registered using registerInstance instead.</p>
  1125. * <p>
  1126. * Note: if the collators are to be used from C APIs, they must be instances
  1127. * of RuleBasedCollator.</p>
  1128. *
  1129. * @stable ICU 2.6
  1130. */
  1131. class U_I18N_API CollatorFactory : public UObject {
  1132. public:
  1133. /**
  1134. * Destructor
  1135. * @stable ICU 3.0
  1136. */
  1137. virtual ~CollatorFactory();
  1138. /**
  1139. * Return true if this factory is visible. Default is true.
  1140. * If not visible, the locales supported by this factory will not
  1141. * be listed by getAvailableLocales.
  1142. * @return true if the factory is visible.
  1143. * @stable ICU 2.6
  1144. */
  1145. virtual UBool visible(void) const;
  1146. /**
  1147. * Return a collator for the provided locale. If the locale
  1148. * is not supported, return NULL.
  1149. * @param loc the locale identifying the collator to be created.
  1150. * @return a new collator if the locale is supported, otherwise NULL.
  1151. * @stable ICU 2.6
  1152. */
  1153. virtual Collator* createCollator(const Locale& loc) = 0;
  1154. /**
  1155. * Return the name of the collator for the objectLocale, localized for the displayLocale.
  1156. * If objectLocale is not supported, or the factory is not visible, set the result string
  1157. * to bogus.
  1158. * @param objectLocale the locale identifying the collator
  1159. * @param displayLocale the locale for which the display name of the collator should be localized
  1160. * @param result an output parameter for the display name, set to bogus if not supported.
  1161. * @return the display name
  1162. * @stable ICU 2.6
  1163. */
  1164. virtual UnicodeString& getDisplayName(const Locale& objectLocale,
  1165. const Locale& displayLocale,
  1166. UnicodeString& result);
  1167. /**
  1168. * Return an array of all the locale names directly supported by this factory.
  1169. * The number of names is returned in count. This array is owned by the factory.
  1170. * Its contents must never change.
  1171. * @param count output parameter for the number of locales supported by the factory
  1172. * @param status the in/out error code
  1173. * @return a pointer to an array of count UnicodeStrings.
  1174. * @stable ICU 2.6
  1175. */
  1176. virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
  1177. };
  1178. #endif /* UCONFIG_NO_SERVICE */
  1179. // Collator inline methods -----------------------------------------------
  1180. U_NAMESPACE_END
  1181. #endif /* #if !UCONFIG_NO_COLLATION */
  1182. #endif