translit.h 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1999-2014, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. * Date Name Description
  9. * 11/17/99 aliu Creation.
  10. **********************************************************************
  11. */
  12. #ifndef TRANSLIT_H
  13. #define TRANSLIT_H
  14. #include "unicode/utypes.h"
  15. /**
  16. * \file
  17. * \brief C++ API: Tranforms text from one format to another.
  18. */
  19. #if !UCONFIG_NO_TRANSLITERATION
  20. #include "unicode/uobject.h"
  21. #include "unicode/unistr.h"
  22. #include "unicode/parseerr.h"
  23. #include "unicode/utrans.h" // UTransPosition, UTransDirection
  24. #include "unicode/strenum.h"
  25. U_NAMESPACE_BEGIN
  26. class UnicodeFilter;
  27. class UnicodeSet;
  28. class CompoundTransliterator;
  29. class TransliteratorParser;
  30. class NormalizationTransliterator;
  31. class TransliteratorIDParser;
  32. /**
  33. *
  34. * <code>Transliterator</code> is an abstract class that
  35. * transliterates text from one format to another. The most common
  36. * kind of transliterator is a script, or alphabet, transliterator.
  37. * For example, a Russian to Latin transliterator changes Russian text
  38. * written in Cyrillic characters to phonetically equivalent Latin
  39. * characters. It does not <em>translate</em> Russian to English!
  40. * Transliteration, unlike translation, operates on characters, without
  41. * reference to the meanings of words and sentences.
  42. *
  43. * <p>Although script conversion is its most common use, a
  44. * transliterator can actually perform a more general class of tasks.
  45. * In fact, <code>Transliterator</code> defines a very general API
  46. * which specifies only that a segment of the input text is replaced
  47. * by new text. The particulars of this conversion are determined
  48. * entirely by subclasses of <code>Transliterator</code>.
  49. *
  50. * <p><b>Transliterators are stateless</b>
  51. *
  52. * <p><code>Transliterator</code> objects are <em>stateless</em>; they
  53. * retain no information between calls to
  54. * <code>transliterate()</code>. (However, this does <em>not</em>
  55. * mean that threads may share transliterators without synchronizing
  56. * them. Transliterators are not immutable, so they must be
  57. * synchronized when shared between threads.) This might seem to
  58. * limit the complexity of the transliteration operation. In
  59. * practice, subclasses perform complex transliterations by delaying
  60. * the replacement of text until it is known that no other
  61. * replacements are possible. In other words, although the
  62. * <code>Transliterator</code> objects are stateless, the source text
  63. * itself embodies all the needed information, and delayed operation
  64. * allows arbitrary complexity.
  65. *
  66. * <p><b>Batch transliteration</b>
  67. *
  68. * <p>The simplest way to perform transliteration is all at once, on a
  69. * string of existing text. This is referred to as <em>batch</em>
  70. * transliteration. For example, given a string <code>input</code>
  71. * and a transliterator <code>t</code>, the call
  72. *
  73. * \htmlonly<blockquote>\endhtmlonly<code>String result = t.transliterate(input);
  74. * </code>\htmlonly</blockquote>\endhtmlonly
  75. *
  76. * will transliterate it and return the result. Other methods allow
  77. * the client to specify a substring to be transliterated and to use
  78. * {@link Replaceable } objects instead of strings, in order to
  79. * preserve out-of-band information (such as text styles).
  80. *
  81. * <p><b>Keyboard transliteration</b>
  82. *
  83. * <p>Somewhat more involved is <em>keyboard</em>, or incremental
  84. * transliteration. This is the transliteration of text that is
  85. * arriving from some source (typically the user's keyboard) one
  86. * character at a time, or in some other piecemeal fashion.
  87. *
  88. * <p>In keyboard transliteration, a <code>Replaceable</code> buffer
  89. * stores the text. As text is inserted, as much as possible is
  90. * transliterated on the fly. This means a GUI that displays the
  91. * contents of the buffer may show text being modified as each new
  92. * character arrives.
  93. *
  94. * <p>Consider the simple <code>RuleBasedTransliterator</code>:
  95. *
  96. * \htmlonly<blockquote>\endhtmlonly<code>
  97. * th&gt;{theta}<br>
  98. * t&gt;{tau}
  99. * </code>\htmlonly</blockquote>\endhtmlonly
  100. *
  101. * When the user types 't', nothing will happen, since the
  102. * transliterator is waiting to see if the next character is 'h'. To
  103. * remedy this, we introduce the notion of a cursor, marked by a '|'
  104. * in the output string:
  105. *
  106. * \htmlonly<blockquote>\endhtmlonly<code>
  107. * t&gt;|{tau}<br>
  108. * {tau}h&gt;{theta}
  109. * </code>\htmlonly</blockquote>\endhtmlonly
  110. *
  111. * Now when the user types 't', tau appears, and if the next character
  112. * is 'h', the tau changes to a theta. This is accomplished by
  113. * maintaining a cursor position (independent of the insertion point,
  114. * and invisible in the GUI) across calls to
  115. * <code>transliterate()</code>. Typically, the cursor will
  116. * be coincident with the insertion point, but in a case like the one
  117. * above, it will precede the insertion point.
  118. *
  119. * <p>Keyboard transliteration methods maintain a set of three indices
  120. * that are updated with each call to
  121. * <code>transliterate()</code>, including the cursor, start,
  122. * and limit. Since these indices are changed by the method, they are
  123. * passed in an <code>int[]</code> array. The <code>START</code> index
  124. * marks the beginning of the substring that the transliterator will
  125. * look at. It is advanced as text becomes committed (but it is not
  126. * the committed index; that's the <code>CURSOR</code>). The
  127. * <code>CURSOR</code> index, described above, marks the point at
  128. * which the transliterator last stopped, either because it reached
  129. * the end, or because it required more characters to disambiguate
  130. * between possible inputs. The <code>CURSOR</code> can also be
  131. * explicitly set by rules in a <code>RuleBasedTransliterator</code>.
  132. * Any characters before the <code>CURSOR</code> index are frozen;
  133. * future keyboard transliteration calls within this input sequence
  134. * will not change them. New text is inserted at the
  135. * <code>LIMIT</code> index, which marks the end of the substring that
  136. * the transliterator looks at.
  137. *
  138. * <p>Because keyboard transliteration assumes that more characters
  139. * are to arrive, it is conservative in its operation. It only
  140. * transliterates when it can do so unambiguously. Otherwise it waits
  141. * for more characters to arrive. When the client code knows that no
  142. * more characters are forthcoming, perhaps because the user has
  143. * performed some input termination operation, then it should call
  144. * <code>finishTransliteration()</code> to complete any
  145. * pending transliterations.
  146. *
  147. * <p><b>Inverses</b>
  148. *
  149. * <p>Pairs of transliterators may be inverses of one another. For
  150. * example, if transliterator <b>A</b> transliterates characters by
  151. * incrementing their Unicode value (so "abc" -> "def"), and
  152. * transliterator <b>B</b> decrements character values, then <b>A</b>
  153. * is an inverse of <b>B</b> and vice versa. If we compose <b>A</b>
  154. * with <b>B</b> in a compound transliterator, the result is the
  155. * indentity transliterator, that is, a transliterator that does not
  156. * change its input text.
  157. *
  158. * The <code>Transliterator</code> method <code>getInverse()</code>
  159. * returns a transliterator's inverse, if one exists, or
  160. * <code>null</code> otherwise. However, the result of
  161. * <code>getInverse()</code> usually will <em>not</em> be a true
  162. * mathematical inverse. This is because true inverse transliterators
  163. * are difficult to formulate. For example, consider two
  164. * transliterators: <b>AB</b>, which transliterates the character 'A'
  165. * to 'B', and <b>BA</b>, which transliterates 'B' to 'A'. It might
  166. * seem that these are exact inverses, since
  167. *
  168. * \htmlonly<blockquote>\endhtmlonly"A" x <b>AB</b> -> "B"<br>
  169. * "B" x <b>BA</b> -> "A"\htmlonly</blockquote>\endhtmlonly
  170. *
  171. * where 'x' represents transliteration. However,
  172. *
  173. * \htmlonly<blockquote>\endhtmlonly"ABCD" x <b>AB</b> -> "BBCD"<br>
  174. * "BBCD" x <b>BA</b> -> "AACD"\htmlonly</blockquote>\endhtmlonly
  175. *
  176. * so <b>AB</b> composed with <b>BA</b> is not the
  177. * identity. Nonetheless, <b>BA</b> may be usefully considered to be
  178. * <b>AB</b>'s inverse, and it is on this basis that
  179. * <b>AB</b><code>.getInverse()</code> could legitimately return
  180. * <b>BA</b>.
  181. *
  182. * <p><b>IDs and display names</b>
  183. *
  184. * <p>A transliterator is designated by a short identifier string or
  185. * <em>ID</em>. IDs follow the format <em>source-destination</em>,
  186. * where <em>source</em> describes the entity being replaced, and
  187. * <em>destination</em> describes the entity replacing
  188. * <em>source</em>. The entities may be the names of scripts,
  189. * particular sequences of characters, or whatever else it is that the
  190. * transliterator converts to or from. For example, a transliterator
  191. * from Russian to Latin might be named "Russian-Latin". A
  192. * transliterator from keyboard escape sequences to Latin-1 characters
  193. * might be named "KeyboardEscape-Latin1". By convention, system
  194. * entity names are in English, with the initial letters of words
  195. * capitalized; user entity names may follow any format so long as
  196. * they do not contain dashes.
  197. *
  198. * <p>In addition to programmatic IDs, transliterator objects have
  199. * display names for presentation in user interfaces, returned by
  200. * {@link #getDisplayName }.
  201. *
  202. * <p><b>Factory methods and registration</b>
  203. *
  204. * <p>In general, client code should use the factory method
  205. * {@link #createInstance } to obtain an instance of a
  206. * transliterator given its ID. Valid IDs may be enumerated using
  207. * <code>getAvailableIDs()</code>. Since transliterators are mutable,
  208. * multiple calls to {@link #createInstance } with the same ID will
  209. * return distinct objects.
  210. *
  211. * <p>In addition to the system transliterators registered at startup,
  212. * user transliterators may be registered by calling
  213. * <code>registerInstance()</code> at run time. A registered instance
  214. * acts a template; future calls to {@link #createInstance } with the ID
  215. * of the registered object return clones of that object. Thus any
  216. * object passed to <tt>registerInstance()</tt> must implement
  217. * <tt>clone()</tt> propertly. To register a transliterator subclass
  218. * without instantiating it (until it is needed), users may call
  219. * {@link #registerFactory }. In this case, the objects are
  220. * instantiated by invoking the zero-argument public constructor of
  221. * the class.
  222. *
  223. * <p><b>Subclassing</b>
  224. *
  225. * Subclasses must implement the abstract method
  226. * <code>handleTransliterate()</code>. <p>Subclasses should override
  227. * the <code>transliterate()</code> method taking a
  228. * <code>Replaceable</code> and the <code>transliterate()</code>
  229. * method taking a <code>String</code> and <code>StringBuffer</code>
  230. * if the performance of these methods can be improved over the
  231. * performance obtained by the default implementations in this class.
  232. *
  233. * @author Alan Liu
  234. * @stable ICU 2.0
  235. */
  236. class U_I18N_API Transliterator : public UObject {
  237. private:
  238. /**
  239. * Programmatic name, e.g., "Latin-Arabic".
  240. */
  241. UnicodeString ID;
  242. /**
  243. * This transliterator's filter. Any character for which
  244. * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
  245. * altered by this transliterator. If <tt>filter</tt> is
  246. * <tt>null</tt> then no filtering is applied.
  247. */
  248. UnicodeFilter* filter;
  249. int32_t maximumContextLength;
  250. public:
  251. /**
  252. * A context integer or pointer for a factory function, passed by
  253. * value.
  254. * @stable ICU 2.4
  255. */
  256. union Token {
  257. /**
  258. * This token, interpreted as a 32-bit integer.
  259. * @stable ICU 2.4
  260. */
  261. int32_t integer;
  262. /**
  263. * This token, interpreted as a native pointer.
  264. * @stable ICU 2.4
  265. */
  266. void* pointer;
  267. };
  268. #ifndef U_HIDE_INTERNAL_API
  269. /**
  270. * Return a token containing an integer.
  271. * @return a token containing an integer.
  272. * @internal
  273. */
  274. inline static Token integerToken(int32_t);
  275. /**
  276. * Return a token containing a pointer.
  277. * @return a token containing a pointer.
  278. * @internal
  279. */
  280. inline static Token pointerToken(void*);
  281. #endif /* U_HIDE_INTERNAL_API */
  282. /**
  283. * A function that creates and returns a Transliterator. When
  284. * invoked, it will be passed the ID string that is being
  285. * instantiated, together with the context pointer that was passed
  286. * in when the factory function was first registered. Many
  287. * factory functions will ignore both parameters, however,
  288. * functions that are registered to more than one ID may use the
  289. * ID or the context parameter to parameterize the transliterator
  290. * they create.
  291. * @param ID the string identifier for this transliterator
  292. * @param context a context pointer that will be stored and
  293. * later passed to the factory function when an ID matching
  294. * the registration ID is being instantiated with this factory.
  295. * @stable ICU 2.4
  296. */
  297. typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
  298. protected:
  299. /**
  300. * Default constructor.
  301. * @param ID the string identifier for this transliterator
  302. * @param adoptedFilter the filter. Any character for which
  303. * <tt>filter.contains()</tt> returns <tt>false</tt> will not be
  304. * altered by this transliterator. If <tt>filter</tt> is
  305. * <tt>null</tt> then no filtering is applied.
  306. * @stable ICU 2.4
  307. */
  308. Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
  309. /**
  310. * Copy constructor.
  311. * @stable ICU 2.4
  312. */
  313. Transliterator(const Transliterator&);
  314. /**
  315. * Assignment operator.
  316. * @stable ICU 2.4
  317. */
  318. Transliterator& operator=(const Transliterator&);
  319. /**
  320. * Create a transliterator from a basic ID. This is an ID
  321. * containing only the forward direction source, target, and
  322. * variant.
  323. * @param id a basic ID of the form S-T or S-T/V.
  324. * @param canon canonical ID to assign to the object, or
  325. * NULL to leave the ID unchanged
  326. * @return a newly created Transliterator or null if the ID is
  327. * invalid.
  328. * @stable ICU 2.4
  329. */
  330. static Transliterator* createBasicInstance(const UnicodeString& id,
  331. const UnicodeString* canon);
  332. friend class TransliteratorParser; // for parseID()
  333. friend class TransliteratorIDParser; // for createBasicInstance()
  334. friend class TransliteratorAlias; // for setID()
  335. public:
  336. /**
  337. * Destructor.
  338. * @stable ICU 2.0
  339. */
  340. virtual ~Transliterator();
  341. /**
  342. * Implements Cloneable.
  343. * All subclasses are encouraged to implement this method if it is
  344. * possible and reasonable to do so. Subclasses that are to be
  345. * registered with the system using <tt>registerInstance()</tt>
  346. * are required to implement this method. If a subclass does not
  347. * implement clone() properly and is registered with the system
  348. * using registerInstance(), then the default clone() implementation
  349. * will return null, and calls to createInstance() will fail.
  350. *
  351. * @return a copy of the object.
  352. * @see #registerInstance
  353. * @stable ICU 2.0
  354. */
  355. virtual Transliterator* clone() const;
  356. /**
  357. * Transliterates a segment of a string, with optional filtering.
  358. *
  359. * @param text the string to be transliterated
  360. * @param start the beginning index, inclusive; <code>0 <= start
  361. * <= limit</code>.
  362. * @param limit the ending index, exclusive; <code>start <= limit
  363. * <= text.length()</code>.
  364. * @return The new limit index. The text previously occupying <code>[start,
  365. * limit)</code> has been transliterated, possibly to a string of a different
  366. * length, at <code>[start, </code><em>new-limit</em><code>)</code>, where
  367. * <em>new-limit</em> is the return value. If the input offsets are out of bounds,
  368. * the returned value is -1 and the input string remains unchanged.
  369. * @stable ICU 2.0
  370. */
  371. virtual int32_t transliterate(Replaceable& text,
  372. int32_t start, int32_t limit) const;
  373. /**
  374. * Transliterates an entire string in place. Convenience method.
  375. * @param text the string to be transliterated
  376. * @stable ICU 2.0
  377. */
  378. virtual void transliterate(Replaceable& text) const;
  379. /**
  380. * Transliterates the portion of the text buffer that can be
  381. * transliterated unambiguosly after new text has been inserted,
  382. * typically as a result of a keyboard event. The new text in
  383. * <code>insertion</code> will be inserted into <code>text</code>
  384. * at <code>index.limit</code>, advancing
  385. * <code>index.limit</code> by <code>insertion.length()</code>.
  386. * Then the transliterator will try to transliterate characters of
  387. * <code>text</code> between <code>index.cursor</code> and
  388. * <code>index.limit</code>. Characters before
  389. * <code>index.cursor</code> will not be changed.
  390. *
  391. * <p>Upon return, values in <code>index</code> will be updated.
  392. * <code>index.start</code> will be advanced to the first
  393. * character that future calls to this method will read.
  394. * <code>index.cursor</code> and <code>index.limit</code> will
  395. * be adjusted to delimit the range of text that future calls to
  396. * this method may change.
  397. *
  398. * <p>Typical usage of this method begins with an initial call
  399. * with <code>index.start</code> and <code>index.limit</code>
  400. * set to indicate the portion of <code>text</code> to be
  401. * transliterated, and <code>index.cursor == index.start</code>.
  402. * Thereafter, <code>index</code> can be used without
  403. * modification in future calls, provided that all changes to
  404. * <code>text</code> are made via this method.
  405. *
  406. * <p>This method assumes that future calls may be made that will
  407. * insert new text into the buffer. As a result, it only performs
  408. * unambiguous transliterations. After the last call to this
  409. * method, there may be untransliterated text that is waiting for
  410. * more input to resolve an ambiguity. In order to perform these
  411. * pending transliterations, clients should call {@link
  412. * #finishTransliteration } after the last call to this
  413. * method has been made.
  414. *
  415. * @param text the buffer holding transliterated and untransliterated text
  416. * @param index an array of three integers.
  417. *
  418. * <ul><li><code>index.start</code>: the beginning index,
  419. * inclusive; <code>0 <= index.start <= index.limit</code>.
  420. *
  421. * <li><code>index.limit</code>: the ending index, exclusive;
  422. * <code>index.start <= index.limit <= text.length()</code>.
  423. * <code>insertion</code> is inserted at
  424. * <code>index.limit</code>.
  425. *
  426. * <li><code>index.cursor</code>: the next character to be
  427. * considered for transliteration; <code>index.start <=
  428. * index.cursor <= index.limit</code>. Characters before
  429. * <code>index.cursor</code> will not be changed by future calls
  430. * to this method.</ul>
  431. *
  432. * @param insertion text to be inserted and possibly
  433. * transliterated into the translation buffer at
  434. * <code>index.limit</code>. If <code>null</code> then no text
  435. * is inserted.
  436. * @param status Output param to filled in with a success or an error.
  437. * @see #handleTransliterate
  438. * @exception IllegalArgumentException if <code>index</code>
  439. * is invalid
  440. * @see UTransPosition
  441. * @stable ICU 2.0
  442. */
  443. virtual void transliterate(Replaceable& text, UTransPosition& index,
  444. const UnicodeString& insertion,
  445. UErrorCode& status) const;
  446. /**
  447. * Transliterates the portion of the text buffer that can be
  448. * transliterated unambiguosly after a new character has been
  449. * inserted, typically as a result of a keyboard event. This is a
  450. * convenience method.
  451. * @param text the buffer holding transliterated and
  452. * untransliterated text
  453. * @param index an array of three integers.
  454. * @param insertion text to be inserted and possibly
  455. * transliterated into the translation buffer at
  456. * <code>index.limit</code>.
  457. * @param status Output param to filled in with a success or an error.
  458. * @see #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const
  459. * @stable ICU 2.0
  460. */
  461. virtual void transliterate(Replaceable& text, UTransPosition& index,
  462. UChar32 insertion,
  463. UErrorCode& status) const;
  464. /**
  465. * Transliterates the portion of the text buffer that can be
  466. * transliterated unambiguosly. This is a convenience method; see
  467. * {@link
  468. * #transliterate(Replaceable&, UTransPosition&, const UnicodeString&, UErrorCode&) const }
  469. * for details.
  470. * @param text the buffer holding transliterated and
  471. * untransliterated text
  472. * @param index an array of three integers. See {@link #transliterate(Replaceable&, UTransPosition&, const UnicodeString*, UErrorCode&) const }.
  473. * @param status Output param to filled in with a success or an error.
  474. * @see #transliterate(Replaceable, int[], String)
  475. * @stable ICU 2.0
  476. */
  477. virtual void transliterate(Replaceable& text, UTransPosition& index,
  478. UErrorCode& status) const;
  479. /**
  480. * Finishes any pending transliterations that were waiting for
  481. * more characters. Clients should call this method as the last
  482. * call after a sequence of one or more calls to
  483. * <code>transliterate()</code>.
  484. * @param text the buffer holding transliterated and
  485. * untransliterated text.
  486. * @param index the array of indices previously passed to {@link
  487. * #transliterate }
  488. * @stable ICU 2.0
  489. */
  490. virtual void finishTransliteration(Replaceable& text,
  491. UTransPosition& index) const;
  492. private:
  493. /**
  494. * This internal method does incremental transliteration. If the
  495. * 'insertion' is non-null then we append it to 'text' before
  496. * proceeding. This method calls through to the pure virtual
  497. * framework method handleTransliterate() to do the actual
  498. * work.
  499. * @param text the buffer holding transliterated and
  500. * untransliterated text
  501. * @param index an array of three integers. See {@link
  502. * #transliterate(Replaceable, int[], String)}.
  503. * @param insertion text to be inserted and possibly
  504. * transliterated into the translation buffer at
  505. * <code>index.limit</code>.
  506. * @param status Output param to filled in with a success or an error.
  507. */
  508. void _transliterate(Replaceable& text,
  509. UTransPosition& index,
  510. const UnicodeString* insertion,
  511. UErrorCode &status) const;
  512. protected:
  513. /**
  514. * Abstract method that concrete subclasses define to implement
  515. * their transliteration algorithm. This method handles both
  516. * incremental and non-incremental transliteration. Let
  517. * <code>originalStart</code> refer to the value of
  518. * <code>pos.start</code> upon entry.
  519. *
  520. * <ul>
  521. * <li>If <code>incremental</code> is false, then this method
  522. * should transliterate all characters between
  523. * <code>pos.start</code> and <code>pos.limit</code>. Upon return
  524. * <code>pos.start</code> must == <code> pos.limit</code>.</li>
  525. *
  526. * <li>If <code>incremental</code> is true, then this method
  527. * should transliterate all characters between
  528. * <code>pos.start</code> and <code>pos.limit</code> that can be
  529. * unambiguously transliterated, regardless of future insertions
  530. * of text at <code>pos.limit</code>. Upon return,
  531. * <code>pos.start</code> should be in the range
  532. * [<code>originalStart</code>, <code>pos.limit</code>).
  533. * <code>pos.start</code> should be positioned such that
  534. * characters [<code>originalStart</code>, <code>
  535. * pos.start</code>) will not be changed in the future by this
  536. * transliterator and characters [<code>pos.start</code>,
  537. * <code>pos.limit</code>) are unchanged.</li>
  538. * </ul>
  539. *
  540. * <p>Implementations of this method should also obey the
  541. * following invariants:</p>
  542. *
  543. * <ul>
  544. * <li> <code>pos.limit</code> and <code>pos.contextLimit</code>
  545. * should be updated to reflect changes in length of the text
  546. * between <code>pos.start</code> and <code>pos.limit</code>. The
  547. * difference <code> pos.contextLimit - pos.limit</code> should
  548. * not change.</li>
  549. *
  550. * <li><code>pos.contextStart</code> should not change.</li>
  551. *
  552. * <li>Upon return, neither <code>pos.start</code> nor
  553. * <code>pos.limit</code> should be less than
  554. * <code>originalStart</code>.</li>
  555. *
  556. * <li>Text before <code>originalStart</code> and text after
  557. * <code>pos.limit</code> should not change.</li>
  558. *
  559. * <li>Text before <code>pos.contextStart</code> and text after
  560. * <code> pos.contextLimit</code> should be ignored.</li>
  561. * </ul>
  562. *
  563. * <p>Subclasses may safely assume that all characters in
  564. * [<code>pos.start</code>, <code>pos.limit</code>) are filtered.
  565. * In other words, the filter has already been applied by the time
  566. * this method is called. See
  567. * <code>filteredTransliterate()</code>.
  568. *
  569. * <p>This method is <b>not</b> for public consumption. Calling
  570. * this method directly will transliterate
  571. * [<code>pos.start</code>, <code>pos.limit</code>) without
  572. * applying the filter. End user code should call <code>
  573. * transliterate()</code> instead of this method. Subclass code
  574. * and wrapping transliterators should call
  575. * <code>filteredTransliterate()</code> instead of this method.<p>
  576. *
  577. * @param text the buffer holding transliterated and
  578. * untransliterated text
  579. *
  580. * @param pos the indices indicating the start, limit, context
  581. * start, and context limit of the text.
  582. *
  583. * @param incremental if true, assume more text may be inserted at
  584. * <code>pos.limit</code> and act accordingly. Otherwise,
  585. * transliterate all text between <code>pos.start</code> and
  586. * <code>pos.limit</code> and move <code>pos.start</code> up to
  587. * <code>pos.limit</code>.
  588. *
  589. * @see #transliterate
  590. * @stable ICU 2.4
  591. */
  592. virtual void handleTransliterate(Replaceable& text,
  593. UTransPosition& pos,
  594. UBool incremental) const = 0;
  595. public:
  596. /**
  597. * Transliterate a substring of text, as specified by index, taking filters
  598. * into account. This method is for subclasses that need to delegate to
  599. * another transliterator, such as CompoundTransliterator.
  600. * @param text the text to be transliterated
  601. * @param index the position indices
  602. * @param incremental if TRUE, then assume more characters may be inserted
  603. * at index.limit, and postpone processing to accomodate future incoming
  604. * characters
  605. * @stable ICU 2.4
  606. */
  607. virtual void filteredTransliterate(Replaceable& text,
  608. UTransPosition& index,
  609. UBool incremental) const;
  610. private:
  611. /**
  612. * Top-level transliteration method, handling filtering, incremental and
  613. * non-incremental transliteration, and rollback. All transliteration
  614. * public API methods eventually call this method with a rollback argument
  615. * of TRUE. Other entities may call this method but rollback should be
  616. * FALSE.
  617. *
  618. * <p>If this transliterator has a filter, break up the input text into runs
  619. * of unfiltered characters. Pass each run to
  620. * subclass.handleTransliterate().
  621. *
  622. * <p>In incremental mode, if rollback is TRUE, perform a special
  623. * incremental procedure in which several passes are made over the input
  624. * text, adding one character at a time, and committing successful
  625. * transliterations as they occur. Unsuccessful transliterations are rolled
  626. * back and retried with additional characters to give correct results.
  627. *
  628. * @param text the text to be transliterated
  629. * @param index the position indices
  630. * @param incremental if TRUE, then assume more characters may be inserted
  631. * at index.limit, and postpone processing to accomodate future incoming
  632. * characters
  633. * @param rollback if TRUE and if incremental is TRUE, then perform special
  634. * incremental processing, as described above, and undo partial
  635. * transliterations where necessary. If incremental is FALSE then this
  636. * parameter is ignored.
  637. */
  638. virtual void filteredTransliterate(Replaceable& text,
  639. UTransPosition& index,
  640. UBool incremental,
  641. UBool rollback) const;
  642. public:
  643. /**
  644. * Returns the length of the longest context required by this transliterator.
  645. * This is <em>preceding</em> context. The default implementation supplied
  646. * by <code>Transliterator</code> returns zero; subclasses
  647. * that use preceding context should override this method to return the
  648. * correct value. For example, if a transliterator translates "ddd" (where
  649. * d is any digit) to "555" when preceded by "(ddd)", then the preceding
  650. * context length is 5, the length of "(ddd)".
  651. *
  652. * @return The maximum number of preceding context characters this
  653. * transliterator needs to examine
  654. * @stable ICU 2.0
  655. */
  656. int32_t getMaximumContextLength(void) const;
  657. protected:
  658. /**
  659. * Method for subclasses to use to set the maximum context length.
  660. * @param maxContextLength the new value to be set.
  661. * @see #getMaximumContextLength
  662. * @stable ICU 2.4
  663. */
  664. void setMaximumContextLength(int32_t maxContextLength);
  665. public:
  666. /**
  667. * Returns a programmatic identifier for this transliterator.
  668. * If this identifier is passed to <code>createInstance()</code>, it
  669. * will return this object, if it has been registered.
  670. * @return a programmatic identifier for this transliterator.
  671. * @see #registerInstance
  672. * @see #registerFactory
  673. * @see #getAvailableIDs
  674. * @stable ICU 2.0
  675. */
  676. virtual const UnicodeString& getID(void) const;
  677. /**
  678. * Returns a name for this transliterator that is appropriate for
  679. * display to the user in the default locale. See {@link
  680. * #getDisplayName } for details.
  681. * @param ID the string identifier for this transliterator
  682. * @param result Output param to receive the display name
  683. * @return A reference to 'result'.
  684. * @stable ICU 2.0
  685. */
  686. static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
  687. UnicodeString& result);
  688. /**
  689. * Returns a name for this transliterator that is appropriate for
  690. * display to the user in the given locale. This name is taken
  691. * from the locale resource data in the standard manner of the
  692. * <code>java.text</code> package.
  693. *
  694. * <p>If no localized names exist in the system resource bundles,
  695. * a name is synthesized using a localized
  696. * <code>MessageFormat</code> pattern from the resource data. The
  697. * arguments to this pattern are an integer followed by one or two
  698. * strings. The integer is the number of strings, either 1 or 2.
  699. * The strings are formed by splitting the ID for this
  700. * transliterator at the first '-'. If there is no '-', then the
  701. * entire ID forms the only string.
  702. * @param ID the string identifier for this transliterator
  703. * @param inLocale the Locale in which the display name should be
  704. * localized.
  705. * @param result Output param to receive the display name
  706. * @return A reference to 'result'.
  707. * @stable ICU 2.0
  708. */
  709. static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
  710. const Locale& inLocale,
  711. UnicodeString& result);
  712. /**
  713. * Returns the filter used by this transliterator, or <tt>NULL</tt>
  714. * if this transliterator uses no filter.
  715. * @return the filter used by this transliterator, or <tt>NULL</tt>
  716. * if this transliterator uses no filter.
  717. * @stable ICU 2.0
  718. */
  719. const UnicodeFilter* getFilter(void) const;
  720. /**
  721. * Returns the filter used by this transliterator, or <tt>NULL</tt> if this
  722. * transliterator uses no filter. The caller must eventually delete the
  723. * result. After this call, this transliterator's filter is set to
  724. * <tt>NULL</tt>.
  725. * @return the filter used by this transliterator, or <tt>NULL</tt> if this
  726. * transliterator uses no filter.
  727. * @stable ICU 2.4
  728. */
  729. UnicodeFilter* orphanFilter(void);
  730. /**
  731. * Changes the filter used by this transliterator. If the filter
  732. * is set to <tt>null</tt> then no filtering will occur.
  733. *
  734. * <p>Callers must take care if a transliterator is in use by
  735. * multiple threads. The filter should not be changed by one
  736. * thread while another thread may be transliterating.
  737. * @param adoptedFilter the new filter to be adopted.
  738. * @stable ICU 2.0
  739. */
  740. void adoptFilter(UnicodeFilter* adoptedFilter);
  741. /**
  742. * Returns this transliterator's inverse. See the class
  743. * documentation for details. This implementation simply inverts
  744. * the two entities in the ID and attempts to retrieve the
  745. * resulting transliterator. That is, if <code>getID()</code>
  746. * returns "A-B", then this method will return the result of
  747. * <code>createInstance("B-A")</code>, or <code>null</code> if that
  748. * call fails.
  749. *
  750. * <p>Subclasses with knowledge of their inverse may wish to
  751. * override this method.
  752. *
  753. * @param status Output param to filled in with a success or an error.
  754. * @return a transliterator that is an inverse, not necessarily
  755. * exact, of this transliterator, or <code>null</code> if no such
  756. * transliterator is registered.
  757. * @see #registerInstance
  758. * @stable ICU 2.0
  759. */
  760. Transliterator* createInverse(UErrorCode& status) const;
  761. /**
  762. * Returns a <code>Transliterator</code> object given its ID.
  763. * The ID must be either a system transliterator ID or a ID registered
  764. * using <code>registerInstance()</code>.
  765. *
  766. * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
  767. * @param dir either FORWARD or REVERSE.
  768. * @param parseError Struct to recieve information on position
  769. * of error if an error is encountered
  770. * @param status Output param to filled in with a success or an error.
  771. * @return A <code>Transliterator</code> object with the given ID
  772. * @see #registerInstance
  773. * @see #getAvailableIDs
  774. * @see #getID
  775. * @stable ICU 2.0
  776. */
  777. static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
  778. UTransDirection dir,
  779. UParseError& parseError,
  780. UErrorCode& status);
  781. /**
  782. * Returns a <code>Transliterator</code> object given its ID.
  783. * The ID must be either a system transliterator ID or a ID registered
  784. * using <code>registerInstance()</code>.
  785. * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code>
  786. * @param dir either FORWARD or REVERSE.
  787. * @param status Output param to filled in with a success or an error.
  788. * @return A <code>Transliterator</code> object with the given ID
  789. * @stable ICU 2.0
  790. */
  791. static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
  792. UTransDirection dir,
  793. UErrorCode& status);
  794. /**
  795. * Returns a <code>Transliterator</code> object constructed from
  796. * the given rule string. This will be a RuleBasedTransliterator,
  797. * if the rule string contains only rules, or a
  798. * CompoundTransliterator, if it contains ID blocks, or a
  799. * NullTransliterator, if it contains ID blocks which parse as
  800. * empty for the given direction.
  801. * @param ID the id for the transliterator.
  802. * @param rules rules, separated by ';'
  803. * @param dir either FORWARD or REVERSE.
  804. * @param parseError Struct to recieve information on position
  805. * of error if an error is encountered
  806. * @param status Output param set to success/failure code.
  807. * @stable ICU 2.0
  808. */
  809. static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
  810. const UnicodeString& rules,
  811. UTransDirection dir,
  812. UParseError& parseError,
  813. UErrorCode& status);
  814. /**
  815. * Create a rule string that can be passed to createFromRules()
  816. * to recreate this transliterator.
  817. * @param result the string to receive the rules. Previous
  818. * contents will be deleted.
  819. * @param escapeUnprintable if TRUE then convert unprintable
  820. * character to their hex escape representations, \\uxxxx or
  821. * \\Uxxxxxxxx. Unprintable characters are those other than
  822. * U+000A, U+0020..U+007E.
  823. * @stable ICU 2.0
  824. */
  825. virtual UnicodeString& toRules(UnicodeString& result,
  826. UBool escapeUnprintable) const;
  827. /**
  828. * Return the number of elements that make up this transliterator.
  829. * For example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
  830. * were created, the return value of this method would be 3.
  831. *
  832. * <p>If this transliterator is not composed of other
  833. * transliterators, then this method returns 1.
  834. * @return the number of transliterators that compose this
  835. * transliterator, or 1 if this transliterator is not composed of
  836. * multiple transliterators
  837. * @stable ICU 3.0
  838. */
  839. int32_t countElements() const;
  840. /**
  841. * Return an element that makes up this transliterator. For
  842. * example, if the transliterator "NFD;Jamo-Latin;Latin-Greek"
  843. * were created, the return value of this method would be one
  844. * of the three transliterator objects that make up that
  845. * transliterator: [NFD, Jamo-Latin, Latin-Greek].
  846. *
  847. * <p>If this transliterator is not composed of other
  848. * transliterators, then this method will return a reference to
  849. * this transliterator when given the index 0.
  850. * @param index a value from 0..countElements()-1 indicating the
  851. * transliterator to return
  852. * @param ec input-output error code
  853. * @return one of the transliterators that makes up this
  854. * transliterator, if this transliterator is made up of multiple
  855. * transliterators, otherwise a reference to this object if given
  856. * an index of 0
  857. * @stable ICU 3.0
  858. */
  859. const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
  860. /**
  861. * Returns the set of all characters that may be modified in the
  862. * input text by this Transliterator. This incorporates this
  863. * object's current filter; if the filter is changed, the return
  864. * value of this function will change. The default implementation
  865. * returns an empty set. Some subclasses may override {@link
  866. * #handleGetSourceSet } to return a more precise result. The
  867. * return result is approximate in any case and is intended for
  868. * use by tests, tools, or utilities.
  869. * @param result receives result set; previous contents lost
  870. * @return a reference to result
  871. * @see #getTargetSet
  872. * @see #handleGetSourceSet
  873. * @stable ICU 2.4
  874. */
  875. UnicodeSet& getSourceSet(UnicodeSet& result) const;
  876. /**
  877. * Framework method that returns the set of all characters that
  878. * may be modified in the input text by this Transliterator,
  879. * ignoring the effect of this object's filter. The base class
  880. * implementation returns the empty set. Subclasses that wish to
  881. * implement this should override this method.
  882. * @return the set of characters that this transliterator may
  883. * modify. The set may be modified, so subclasses should return a
  884. * newly-created object.
  885. * @param result receives result set; previous contents lost
  886. * @see #getSourceSet
  887. * @see #getTargetSet
  888. * @stable ICU 2.4
  889. */
  890. virtual void handleGetSourceSet(UnicodeSet& result) const;
  891. /**
  892. * Returns the set of all characters that may be generated as
  893. * replacement text by this transliterator. The default
  894. * implementation returns the empty set. Some subclasses may
  895. * override this method to return a more precise result. The
  896. * return result is approximate in any case and is intended for
  897. * use by tests, tools, or utilities requiring such
  898. * meta-information.
  899. * @param result receives result set; previous contents lost
  900. * @return a reference to result
  901. * @see #getTargetSet
  902. * @stable ICU 2.4
  903. */
  904. virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
  905. public:
  906. /**
  907. * Registers a factory function that creates transliterators of
  908. * a given ID.
  909. *
  910. * Because ICU may choose to cache Transliterators internally, this must
  911. * be called at application startup, prior to any calls to
  912. * Transliterator::createXXX to avoid undefined behavior.
  913. *
  914. * @param id the ID being registered
  915. * @param factory a function pointer that will be copied and
  916. * called later when the given ID is passed to createInstance()
  917. * @param context a context pointer that will be stored and
  918. * later passed to the factory function when an ID matching
  919. * the registration ID is being instantiated with this factory.
  920. * @stable ICU 2.0
  921. */
  922. static void U_EXPORT2 registerFactory(const UnicodeString& id,
  923. Factory factory,
  924. Token context);
  925. /**
  926. * Registers an instance <tt>obj</tt> of a subclass of
  927. * <code>Transliterator</code> with the system. When
  928. * <tt>createInstance()</tt> is called with an ID string that is
  929. * equal to <tt>obj->getID()</tt>, then <tt>obj->clone()</tt> is
  930. * returned.
  931. *
  932. * After this call the Transliterator class owns the adoptedObj
  933. * and will delete it.
  934. *
  935. * Because ICU may choose to cache Transliterators internally, this must
  936. * be called at application startup, prior to any calls to
  937. * Transliterator::createXXX to avoid undefined behavior.
  938. *
  939. * @param adoptedObj an instance of subclass of
  940. * <code>Transliterator</code> that defines <tt>clone()</tt>
  941. * @see #createInstance
  942. * @see #registerFactory
  943. * @see #unregister
  944. * @stable ICU 2.0
  945. */
  946. static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
  947. /**
  948. * Registers an ID string as an alias of another ID string.
  949. * That is, after calling this function, <tt>createInstance(aliasID)</tt>
  950. * will return the same thing as <tt>createInstance(realID)</tt>.
  951. * This is generally used to create shorter, more mnemonic aliases
  952. * for long compound IDs.
  953. *
  954. * @param aliasID The new ID being registered.
  955. * @param realID The ID that the new ID is to be an alias for.
  956. * This can be a compound ID and can include filters and should
  957. * refer to transliterators that have already been registered with
  958. * the framework, although this isn't checked.
  959. * @stable ICU 3.6
  960. */
  961. static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
  962. const UnicodeString& realID);
  963. protected:
  964. #ifndef U_HIDE_INTERNAL_API
  965. /**
  966. * @param id the ID being registered
  967. * @param factory a function pointer that will be copied and
  968. * called later when the given ID is passed to createInstance()
  969. * @param context a context pointer that will be stored and
  970. * later passed to the factory function when an ID matching
  971. * the registration ID is being instantiated with this factory.
  972. * @internal
  973. */
  974. static void _registerFactory(const UnicodeString& id,
  975. Factory factory,
  976. Token context);
  977. /**
  978. * @internal
  979. */
  980. static void _registerInstance(Transliterator* adoptedObj);
  981. /**
  982. * @internal
  983. */
  984. static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
  985. /**
  986. * Register two targets as being inverses of one another. For
  987. * example, calling registerSpecialInverse("NFC", "NFD", true) causes
  988. * Transliterator to form the following inverse relationships:
  989. *
  990. * <pre>NFC => NFD
  991. * Any-NFC => Any-NFD
  992. * NFD => NFC
  993. * Any-NFD => Any-NFC</pre>
  994. *
  995. * (Without the special inverse registration, the inverse of NFC
  996. * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but
  997. * that the presence or absence of "Any-" is preserved.
  998. *
  999. * <p>The relationship is symmetrical; registering (a, b) is
  1000. * equivalent to registering (b, a).
  1001. *
  1002. * <p>The relevant IDs must still be registered separately as
  1003. * factories or classes.
  1004. *
  1005. * <p>Only the targets are specified. Special inverses always
  1006. * have the form Any-Target1 <=> Any-Target2. The target should
  1007. * have canonical casing (the casing desired to be produced when
  1008. * an inverse is formed) and should contain no whitespace or other
  1009. * extraneous characters.
  1010. *
  1011. * @param target the target against which to register the inverse
  1012. * @param inverseTarget the inverse of target, that is
  1013. * Any-target.getInverse() => Any-inverseTarget
  1014. * @param bidirectional if true, register the reverse relation
  1015. * as well, that is, Any-inverseTarget.getInverse() => Any-target
  1016. * @internal
  1017. */
  1018. static void _registerSpecialInverse(const UnicodeString& target,
  1019. const UnicodeString& inverseTarget,
  1020. UBool bidirectional);
  1021. #endif /* U_HIDE_INTERNAL_API */
  1022. public:
  1023. /**
  1024. * Unregisters a transliterator or class. This may be either
  1025. * a system transliterator or a user transliterator or class.
  1026. * Any attempt to construct an unregistered transliterator based
  1027. * on its ID will fail.
  1028. *
  1029. * Because ICU may choose to cache Transliterators internally, this should
  1030. * be called during application shutdown, after all calls to
  1031. * Transliterator::createXXX to avoid undefined behavior.
  1032. *
  1033. * @param ID the ID of the transliterator or class
  1034. * @return the <code>Object</code> that was registered with
  1035. * <code>ID</code>, or <code>null</code> if none was
  1036. * @see #registerInstance
  1037. * @see #registerFactory
  1038. * @stable ICU 2.0
  1039. */
  1040. static void U_EXPORT2 unregister(const UnicodeString& ID);
  1041. public:
  1042. /**
  1043. * Return a StringEnumeration over the IDs available at the time of the
  1044. * call, including user-registered IDs.
  1045. * @param ec input-output error code
  1046. * @return a newly-created StringEnumeration over the transliterators
  1047. * available at the time of the call. The caller should delete this object
  1048. * when done using it.
  1049. * @stable ICU 3.0
  1050. */
  1051. static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
  1052. /**
  1053. * Return the number of registered source specifiers.
  1054. * @return the number of registered source specifiers.
  1055. * @stable ICU 2.0
  1056. */
  1057. static int32_t U_EXPORT2 countAvailableSources(void);
  1058. /**
  1059. * Return a registered source specifier.
  1060. * @param index which specifier to return, from 0 to n-1, where
  1061. * n = countAvailableSources()
  1062. * @param result fill-in paramter to receive the source specifier.
  1063. * If index is out of range, result will be empty.
  1064. * @return reference to result
  1065. * @stable ICU 2.0
  1066. */
  1067. static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
  1068. UnicodeString& result);
  1069. /**
  1070. * Return the number of registered target specifiers for a given
  1071. * source specifier.
  1072. * @param source the given source specifier.
  1073. * @return the number of registered target specifiers for a given
  1074. * source specifier.
  1075. * @stable ICU 2.0
  1076. */
  1077. static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
  1078. /**
  1079. * Return a registered target specifier for a given source.
  1080. * @param index which specifier to return, from 0 to n-1, where
  1081. * n = countAvailableTargets(source)
  1082. * @param source the source specifier
  1083. * @param result fill-in paramter to receive the target specifier.
  1084. * If source is invalid or if index is out of range, result will
  1085. * be empty.
  1086. * @return reference to result
  1087. * @stable ICU 2.0
  1088. */
  1089. static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
  1090. const UnicodeString& source,
  1091. UnicodeString& result);
  1092. /**
  1093. * Return the number of registered variant specifiers for a given
  1094. * source-target pair.
  1095. * @param source the source specifiers.
  1096. * @param target the target specifiers.
  1097. * @stable ICU 2.0
  1098. */
  1099. static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
  1100. const UnicodeString& target);
  1101. /**
  1102. * Return a registered variant specifier for a given source-target
  1103. * pair.
  1104. * @param index which specifier to return, from 0 to n-1, where
  1105. * n = countAvailableVariants(source, target)
  1106. * @param source the source specifier
  1107. * @param target the target specifier
  1108. * @param result fill-in paramter to receive the variant
  1109. * specifier. If source is invalid or if target is invalid or if
  1110. * index is out of range, result will be empty.
  1111. * @return reference to result
  1112. * @stable ICU 2.0
  1113. */
  1114. static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
  1115. const UnicodeString& source,
  1116. const UnicodeString& target,
  1117. UnicodeString& result);
  1118. protected:
  1119. #ifndef U_HIDE_INTERNAL_API
  1120. /**
  1121. * Non-mutexed internal method
  1122. * @internal
  1123. */
  1124. static int32_t _countAvailableSources(void);
  1125. /**
  1126. * Non-mutexed internal method
  1127. * @internal
  1128. */
  1129. static UnicodeString& _getAvailableSource(int32_t index,
  1130. UnicodeString& result);
  1131. /**
  1132. * Non-mutexed internal method
  1133. * @internal
  1134. */
  1135. static int32_t _countAvailableTargets(const UnicodeString& source);
  1136. /**
  1137. * Non-mutexed internal method
  1138. * @internal
  1139. */
  1140. static UnicodeString& _getAvailableTarget(int32_t index,
  1141. const UnicodeString& source,
  1142. UnicodeString& result);
  1143. /**
  1144. * Non-mutexed internal method
  1145. * @internal
  1146. */
  1147. static int32_t _countAvailableVariants(const UnicodeString& source,
  1148. const UnicodeString& target);
  1149. /**
  1150. * Non-mutexed internal method
  1151. * @internal
  1152. */
  1153. static UnicodeString& _getAvailableVariant(int32_t index,
  1154. const UnicodeString& source,
  1155. const UnicodeString& target,
  1156. UnicodeString& result);
  1157. #endif /* U_HIDE_INTERNAL_API */
  1158. protected:
  1159. /**
  1160. * Set the ID of this transliterators. Subclasses shouldn't do
  1161. * this, unless the underlying script behavior has changed.
  1162. * @param id the new id t to be set.
  1163. * @stable ICU 2.4
  1164. */
  1165. void setID(const UnicodeString& id);
  1166. public:
  1167. /**
  1168. * Return the class ID for this class. This is useful only for
  1169. * comparing to a return value from getDynamicClassID().
  1170. * Note that Transliterator is an abstract base class, and therefor
  1171. * no fully constructed object will have a dynamic
  1172. * UCLassID that equals the UClassID returned from
  1173. * TRansliterator::getStaticClassID().
  1174. * @return The class ID for class Transliterator.
  1175. * @stable ICU 2.0
  1176. */
  1177. static UClassID U_EXPORT2 getStaticClassID(void);
  1178. /**
  1179. * Returns a unique class ID <b>polymorphically</b>. This method
  1180. * is to implement a simple version of RTTI, since not all C++
  1181. * compilers support genuine RTTI. Polymorphic operator==() and
  1182. * clone() methods call this method.
  1183. *
  1184. * <p>Concrete subclasses of Transliterator must use the
  1185. * UOBJECT_DEFINE_RTTI_IMPLEMENTATION macro from
  1186. * uobject.h to provide the RTTI functions.
  1187. *
  1188. * @return The class ID for this object. All objects of a given
  1189. * class have the same class ID. Objects of other classes have
  1190. * different class IDs.
  1191. * @stable ICU 2.0
  1192. */
  1193. virtual UClassID getDynamicClassID(void) const = 0;
  1194. private:
  1195. static UBool initializeRegistry(UErrorCode &status);
  1196. public:
  1197. #ifndef U_HIDE_OBSOLETE_API
  1198. /**
  1199. * Return the number of IDs currently registered with the system.
  1200. * To retrieve the actual IDs, call getAvailableID(i) with
  1201. * i from 0 to countAvailableIDs() - 1.
  1202. * @return the number of IDs currently registered with the system.
  1203. * @obsolete ICU 3.4 use getAvailableIDs() instead
  1204. */
  1205. static int32_t U_EXPORT2 countAvailableIDs(void);
  1206. /**
  1207. * Return the index-th available ID. index must be between 0
  1208. * and countAvailableIDs() - 1, inclusive. If index is out of
  1209. * range, the result of getAvailableID(0) is returned.
  1210. * @param index the given ID index.
  1211. * @return the index-th available ID. index must be between 0
  1212. * and countAvailableIDs() - 1, inclusive. If index is out of
  1213. * range, the result of getAvailableID(0) is returned.
  1214. * @obsolete ICU 3.4 use getAvailableIDs() instead; this function
  1215. * is not thread safe, since it returns a reference to storage that
  1216. * may become invalid if another thread calls unregister
  1217. */
  1218. static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
  1219. #endif /* U_HIDE_OBSOLETE_API */
  1220. };
  1221. inline int32_t Transliterator::getMaximumContextLength(void) const {
  1222. return maximumContextLength;
  1223. }
  1224. inline void Transliterator::setID(const UnicodeString& id) {
  1225. ID = id;
  1226. // NUL-terminate the ID string, which is a non-aliased copy.
  1227. ID.append((UChar)0);
  1228. ID.truncate(ID.length()-1);
  1229. }
  1230. #ifndef U_HIDE_INTERNAL_API
  1231. inline Transliterator::Token Transliterator::integerToken(int32_t i) {
  1232. Token t;
  1233. t.integer = i;
  1234. return t;
  1235. }
  1236. inline Transliterator::Token Transliterator::pointerToken(void* p) {
  1237. Token t;
  1238. t.pointer = p;
  1239. return t;
  1240. }
  1241. #endif /* U_HIDE_INTERNAL_API */
  1242. U_NAMESPACE_END
  1243. #endif /* #if !UCONFIG_NO_TRANSLITERATION */
  1244. #endif