utext.h 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2004-2012, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: utext.h
  11. * encoding: US-ASCII
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2004oct06
  16. * created by: Markus W. Scherer
  17. */
  18. #ifndef __UTEXT_H__
  19. #define __UTEXT_H__
  20. /**
  21. * \file
  22. * \brief C API: Abstract Unicode Text API
  23. *
  24. * The Text Access API provides a means to allow text that is stored in alternative
  25. * formats to work with ICU services. ICU normally operates on text that is
  26. * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
  27. * UnicodeString for C++ APIs.
  28. *
  29. * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
  30. * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
  31. *
  32. * There are three general classes of usage for UText:
  33. *
  34. * Application Level Use. This is the simplest usage - applications would
  35. * use one of the utext_open() functions on their input text, and pass
  36. * the resulting UText to the desired ICU service.
  37. *
  38. * Second is usage in ICU Services, such as break iteration, that will need to
  39. * operate on input presented to them as a UText. These implementations
  40. * will need to use the iteration and related UText functions to gain
  41. * access to the actual text.
  42. *
  43. * The third class of UText users are "text providers." These are the
  44. * UText implementations for the various text storage formats. An application
  45. * or system with a unique text storage format can implement a set of
  46. * UText provider functions for that format, which will then allow
  47. * ICU services to operate on that format.
  48. *
  49. *
  50. * <em>Iterating over text</em>
  51. *
  52. * Here is sample code for a forward iteration over the contents of a UText
  53. *
  54. * \code
  55. * UChar32 c;
  56. * UText *ut = whatever();
  57. *
  58. * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
  59. * // do whatever with the codepoint c here.
  60. * }
  61. * \endcode
  62. *
  63. * And here is similar code to iterate in the reverse direction, from the end
  64. * of the text towards the beginning.
  65. *
  66. * \code
  67. * UChar32 c;
  68. * UText *ut = whatever();
  69. * int textLength = utext_nativeLength(ut);
  70. * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
  71. * // do whatever with the codepoint c here.
  72. * }
  73. * \endcode
  74. *
  75. * <em>Characters and Indexing</em>
  76. *
  77. * Indexing into text by UText functions is nearly always in terms of the native
  78. * indexing of the underlying text storage. The storage format could be UTF-8
  79. * or UTF-32, for example. When coding to the UText access API, no assumptions
  80. * can be made regarding the size of characters, or how far an index
  81. * may move when iterating between characters.
  82. *
  83. * All indices supplied to UText functions are pinned to the length of the
  84. * text. An out-of-bounds index is not considered to be an error, but is
  85. * adjusted to be in the range 0 <= index <= length of input text.
  86. *
  87. *
  88. * When an index position is returned from a UText function, it will be
  89. * a native index to the underlying text. In the case of multi-unit characters,
  90. * it will always refer to the first position of the character,
  91. * never to the interior. This is essentially the same thing as saying that
  92. * a returned index will always point to a boundary between characters.
  93. *
  94. * When a native index is supplied to a UText function, all indices that
  95. * refer to any part of a multi-unit character representation are considered
  96. * to be equivalent. In the case of multi-unit characters, an incoming index
  97. * will be logically normalized to refer to the start of the character.
  98. *
  99. * It is possible to test whether a native index is on a code point boundary
  100. * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
  101. * If the index is returned unchanged, it was on a code point boundary. If
  102. * an adjusted index is returned, the original index referred to the
  103. * interior of a character.
  104. *
  105. * <em>Conventions for calling UText functions</em>
  106. *
  107. * Most UText access functions have as their first parameter a (UText *) pointer,
  108. * which specifies the UText to be used. Unless otherwise noted, the
  109. * pointer must refer to a valid, open UText. Attempting to
  110. * use a closed UText or passing a NULL pointer is a programming error and
  111. * will produce undefined results or NULL pointer exceptions.
  112. *
  113. * The UText_Open family of functions can either open an existing (closed)
  114. * UText, or heap allocate a new UText. Here is sample code for creating
  115. * a stack-allocated UText.
  116. *
  117. * \code
  118. * char *s = whatever(); // A utf-8 string
  119. * U_ErrorCode status = U_ZERO_ERROR;
  120. * UText ut = UTEXT_INITIALIZER;
  121. * utext_openUTF8(ut, s, -1, &status);
  122. * if (U_FAILURE(status)) {
  123. * // error handling
  124. * } else {
  125. * // work with the UText
  126. * }
  127. * \endcode
  128. *
  129. * Any existing UText passed to an open function _must_ have been initialized,
  130. * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
  131. * by an open function. Passing NULL will cause the open function to
  132. * heap-allocate and fully initialize a new UText.
  133. *
  134. */
  135. #include "unicode/utypes.h"
  136. #include "unicode/uchar.h"
  137. #if U_SHOW_CPLUSPLUS_API
  138. #include "unicode/localpointer.h"
  139. #include "unicode/rep.h"
  140. #include "unicode/unistr.h"
  141. #include "unicode/chariter.h"
  142. #endif
  143. U_CDECL_BEGIN
  144. struct UText;
  145. typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
  146. /***************************************************************************************
  147. *
  148. * C Functions for creating UText wrappers around various kinds of text strings.
  149. *
  150. ****************************************************************************************/
  151. /**
  152. * Close function for UText instances.
  153. * Cleans up, releases any resources being held by an open UText.
  154. * <p>
  155. * If the UText was originally allocated by one of the utext_open functions,
  156. * the storage associated with the utext will also be freed.
  157. * If the UText storage originated with the application, as it would with
  158. * a local or static instance, the storage will not be deleted.
  159. *
  160. * An open UText can be reset to refer to new string by using one of the utext_open()
  161. * functions without first closing the UText.
  162. *
  163. * @param ut The UText to be closed.
  164. * @return NULL if the UText struct was deleted by the close. If the UText struct
  165. * was originally provided by the caller to the open function, it is
  166. * returned by this function, and may be safely used again in
  167. * a subsequent utext_open.
  168. *
  169. * @stable ICU 3.4
  170. */
  171. U_STABLE UText * U_EXPORT2
  172. utext_close(UText *ut);
  173. #if U_SHOW_CPLUSPLUS_API
  174. U_NAMESPACE_BEGIN
  175. /**
  176. * \class LocalUTextPointer
  177. * "Smart pointer" class, closes a UText via utext_close().
  178. * For most methods see the LocalPointerBase base class.
  179. *
  180. * @see LocalPointerBase
  181. * @see LocalPointer
  182. * @stable ICU 4.4
  183. */
  184. U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
  185. U_NAMESPACE_END
  186. #endif
  187. /**
  188. * Open a read-only UText implementation for UTF-8 strings.
  189. *
  190. * \htmlonly
  191. * Any invalid UTF-8 in the input will be handled in this way:
  192. * a sequence of bytes that has the form of a truncated, but otherwise valid,
  193. * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD.
  194. * Any other illegal bytes will each be replaced by a \uFFFD.
  195. * \endhtmlonly
  196. *
  197. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  198. * If non-NULL, must refer to an initialized UText struct, which will then
  199. * be reset to reference the specified UTF-8 string.
  200. * @param s A UTF-8 string. Must not be NULL.
  201. * @param length The length of the UTF-8 string in bytes, or -1 if the string is
  202. * zero terminated.
  203. * @param status Errors are returned here.
  204. * @return A pointer to the UText. If a pre-allocated UText was provided, it
  205. * will always be used and returned.
  206. * @stable ICU 3.4
  207. */
  208. U_STABLE UText * U_EXPORT2
  209. utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
  210. /**
  211. * Open a read-only UText for UChar * string.
  212. *
  213. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  214. * If non-NULL, must refer to an initialized UText struct, which will then
  215. * be reset to reference the specified UChar string.
  216. * @param s A UChar (UTF-16) string
  217. * @param length The number of UChars in the input string, or -1 if the string is
  218. * zero terminated.
  219. * @param status Errors are returned here.
  220. * @return A pointer to the UText. If a pre-allocated UText was provided, it
  221. * will always be used and returned.
  222. * @stable ICU 3.4
  223. */
  224. U_STABLE UText * U_EXPORT2
  225. utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
  226. #if U_SHOW_CPLUSPLUS_API
  227. /**
  228. * Open a writable UText for a non-const UnicodeString.
  229. *
  230. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  231. * If non-NULL, must refer to an initialized UText struct, which will then
  232. * be reset to reference the specified input string.
  233. * @param s A UnicodeString.
  234. * @param status Errors are returned here.
  235. * @return Pointer to the UText. If a UText was supplied as input, this
  236. * will always be used and returned.
  237. * @stable ICU 3.4
  238. */
  239. U_STABLE UText * U_EXPORT2
  240. utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
  241. /**
  242. * Open a UText for a const UnicodeString. The resulting UText will not be writable.
  243. *
  244. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  245. * If non-NULL, must refer to an initialized UText struct, which will then
  246. * be reset to reference the specified input string.
  247. * @param s A const UnicodeString to be wrapped.
  248. * @param status Errors are returned here.
  249. * @return Pointer to the UText. If a UText was supplied as input, this
  250. * will always be used and returned.
  251. * @stable ICU 3.4
  252. */
  253. U_STABLE UText * U_EXPORT2
  254. utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
  255. /**
  256. * Open a writable UText implementation for an ICU Replaceable object.
  257. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  258. * If non-NULL, must refer to an already existing UText, which will then
  259. * be reset to reference the specified replaceable text.
  260. * @param rep A Replaceable text object.
  261. * @param status Errors are returned here.
  262. * @return Pointer to the UText. If a UText was supplied as input, this
  263. * will always be used and returned.
  264. * @see Replaceable
  265. * @stable ICU 3.4
  266. */
  267. U_STABLE UText * U_EXPORT2
  268. utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
  269. /**
  270. * Open a UText implementation over an ICU CharacterIterator.
  271. * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
  272. * If non-NULL, must refer to an already existing UText, which will then
  273. * be reset to reference the specified replaceable text.
  274. * @param ci A Character Iterator.
  275. * @param status Errors are returned here.
  276. * @return Pointer to the UText. If a UText was supplied as input, this
  277. * will always be used and returned.
  278. * @see Replaceable
  279. * @stable ICU 3.4
  280. */
  281. U_STABLE UText * U_EXPORT2
  282. utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
  283. #endif
  284. /**
  285. * Clone a UText. This is much like opening a UText where the source text is itself
  286. * another UText.
  287. *
  288. * A deep clone will copy both the UText data structures and the underlying text.
  289. * The original and cloned UText will operate completely independently; modifications
  290. * made to the text in one will not affect the other. Text providers are not
  291. * required to support deep clones. The user of clone() must check the status return
  292. * and be prepared to handle failures.
  293. *
  294. * The standard UText implementations for UTF8, UChar *, UnicodeString and
  295. * Replaceable all support deep cloning.
  296. *
  297. * The UText returned from a deep clone will be writable, assuming that the text
  298. * provider is able to support writing, even if the source UText had been made
  299. * non-writable by means of UText_freeze().
  300. *
  301. * A shallow clone replicates only the UText data structures; it does not make
  302. * a copy of the underlying text. Shallow clones can be used as an efficient way to
  303. * have multiple iterators active in a single text string that is not being
  304. * modified.
  305. *
  306. * A shallow clone operation will not fail, barring truly exceptional conditions such
  307. * as memory allocation failures.
  308. *
  309. * Shallow UText clones should be avoided if the UText functions that modify the
  310. * text are expected to be used, either on the original or the cloned UText.
  311. * Any such modifications can cause unpredictable behavior. Read Only
  312. * shallow clones provide some protection against errors of this type by
  313. * disabling text modification via the cloned UText.
  314. *
  315. * A shallow clone made with the readOnly parameter == FALSE will preserve the
  316. * utext_isWritable() state of the source object. Note, however, that
  317. * write operations must be avoided while more than one UText exists that refer
  318. * to the same underlying text.
  319. *
  320. * A UText and its clone may be safely concurrently accessed by separate threads.
  321. * This is true for read access only with shallow clones, and for both read and
  322. * write access with deep clones.
  323. * It is the responsibility of the Text Provider to ensure that this thread safety
  324. * constraint is met.
  325. *
  326. * @param dest A UText struct to be filled in with the result of the clone operation,
  327. * or NULL if the clone function should heap-allocate a new UText struct.
  328. * If non-NULL, must refer to an already existing UText, which will then
  329. * be reset to become the clone.
  330. * @param src The UText to be cloned.
  331. * @param deep TRUE to request a deep clone, FALSE for a shallow clone.
  332. * @param readOnly TRUE to request that the cloned UText have read only access to the
  333. * underlying text.
  334. * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
  335. * will be returned if the text provider is unable to clone the
  336. * original text.
  337. * @return The newly created clone, or NULL if the clone operation failed.
  338. * @stable ICU 3.4
  339. */
  340. U_STABLE UText * U_EXPORT2
  341. utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
  342. /**
  343. * Compare two UText objects for equality.
  344. * UTexts are equal if they are iterating over the same text, and
  345. * have the same iteration position within the text.
  346. * If either or both of the parameters are NULL, the comparison is FALSE.
  347. *
  348. * @param a The first of the two UTexts to compare.
  349. * @param b The other UText to be compared.
  350. * @return TRUE if the two UTexts are equal.
  351. * @stable ICU 3.6
  352. */
  353. U_STABLE UBool U_EXPORT2
  354. utext_equals(const UText *a, const UText *b);
  355. /*****************************************************************************
  356. *
  357. * Functions to work with the text represeted by a UText wrapper
  358. *
  359. *****************************************************************************/
  360. /**
  361. * Get the length of the text. Depending on the characteristics
  362. * of the underlying text representation, this may be expensive.
  363. * @see utext_isLengthExpensive()
  364. *
  365. *
  366. * @param ut the text to be accessed.
  367. * @return the length of the text, expressed in native units.
  368. *
  369. * @stable ICU 3.4
  370. */
  371. U_STABLE int64_t U_EXPORT2
  372. utext_nativeLength(UText *ut);
  373. /**
  374. * Return TRUE if calculating the length of the text could be expensive.
  375. * Finding the length of NUL terminated strings is considered to be expensive.
  376. *
  377. * Note that the value of this function may change
  378. * as the result of other operations on a UText.
  379. * Once the length of a string has been discovered, it will no longer
  380. * be expensive to report it.
  381. *
  382. * @param ut the text to be accessed.
  383. * @return TRUE if determining the length of the text could be time consuming.
  384. * @stable ICU 3.4
  385. */
  386. U_STABLE UBool U_EXPORT2
  387. utext_isLengthExpensive(const UText *ut);
  388. /**
  389. * Returns the code point at the requested index,
  390. * or U_SENTINEL (-1) if it is out of bounds.
  391. *
  392. * If the specified index points to the interior of a multi-unit
  393. * character - one of the trail bytes of a UTF-8 sequence, for example -
  394. * the complete code point will be returned.
  395. *
  396. * The iteration position will be set to the start of the returned code point.
  397. *
  398. * This function is roughly equivalent to the the sequence
  399. * utext_setNativeIndex(index);
  400. * utext_current32();
  401. * (There is a subtle difference if the index is out of bounds by being less than zero -
  402. * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
  403. * will return the char at zero. utext_char32At(negative index), on the other hand, will
  404. * return the U_SENTINEL value of -1.)
  405. *
  406. * @param ut the text to be accessed
  407. * @param nativeIndex the native index of the character to be accessed. If the index points
  408. * to other than the first unit of a multi-unit character, it will be adjusted
  409. * to the start of the character.
  410. * @return the code point at the specified index.
  411. * @stable ICU 3.4
  412. */
  413. U_STABLE UChar32 U_EXPORT2
  414. utext_char32At(UText *ut, int64_t nativeIndex);
  415. /**
  416. *
  417. * Get the code point at the current iteration position,
  418. * or U_SENTINEL (-1) if the iteration has reached the end of
  419. * the input text.
  420. *
  421. * @param ut the text to be accessed.
  422. * @return the Unicode code point at the current iterator position.
  423. * @stable ICU 3.4
  424. */
  425. U_STABLE UChar32 U_EXPORT2
  426. utext_current32(UText *ut);
  427. /**
  428. * Get the code point at the current iteration position of the UText, and
  429. * advance the position to the first index following the character.
  430. *
  431. * If the position is at the end of the text (the index following
  432. * the last character, which is also the length of the text),
  433. * return U_SENTINEL (-1) and do not advance the index.
  434. *
  435. * This is a post-increment operation.
  436. *
  437. * An inline macro version of this function, UTEXT_NEXT32(),
  438. * is available for performance critical use.
  439. *
  440. * @param ut the text to be accessed.
  441. * @return the Unicode code point at the iteration position.
  442. * @see UTEXT_NEXT32
  443. * @stable ICU 3.4
  444. */
  445. U_STABLE UChar32 U_EXPORT2
  446. utext_next32(UText *ut);
  447. /**
  448. * Move the iterator position to the character (code point) whose
  449. * index precedes the current position, and return that character.
  450. * This is a pre-decrement operation.
  451. *
  452. * If the initial position is at the start of the text (index of 0)
  453. * return U_SENTINEL (-1), and leave the position unchanged.
  454. *
  455. * An inline macro version of this function, UTEXT_PREVIOUS32(),
  456. * is available for performance critical use.
  457. *
  458. * @param ut the text to be accessed.
  459. * @return the previous UChar32 code point, or U_SENTINEL (-1)
  460. * if the iteration has reached the start of the text.
  461. * @see UTEXT_PREVIOUS32
  462. * @stable ICU 3.4
  463. */
  464. U_STABLE UChar32 U_EXPORT2
  465. utext_previous32(UText *ut);
  466. /**
  467. * Set the iteration index and return the code point at that index.
  468. * Leave the iteration index at the start of the following code point.
  469. *
  470. * This function is the most efficient and convenient way to
  471. * begin a forward iteration. The results are identical to the those
  472. * from the sequence
  473. * \code
  474. * utext_setIndex();
  475. * utext_next32();
  476. * \endcode
  477. *
  478. * @param ut the text to be accessed.
  479. * @param nativeIndex Iteration index, in the native units of the text provider.
  480. * @return Code point which starts at or before index,
  481. * or U_SENTINEL (-1) if it is out of bounds.
  482. * @stable ICU 3.4
  483. */
  484. U_STABLE UChar32 U_EXPORT2
  485. utext_next32From(UText *ut, int64_t nativeIndex);
  486. /**
  487. * Set the iteration index, and return the code point preceding the
  488. * one specified by the initial index. Leave the iteration position
  489. * at the start of the returned code point.
  490. *
  491. * This function is the most efficient and convenient way to
  492. * begin a backwards iteration.
  493. *
  494. * @param ut the text to be accessed.
  495. * @param nativeIndex Iteration index in the native units of the text provider.
  496. * @return Code point preceding the one at the initial index,
  497. * or U_SENTINEL (-1) if it is out of bounds.
  498. *
  499. * @stable ICU 3.4
  500. */
  501. U_STABLE UChar32 U_EXPORT2
  502. utext_previous32From(UText *ut, int64_t nativeIndex);
  503. /**
  504. * Get the current iterator position, which can range from 0 to
  505. * the length of the text.
  506. * The position is a native index into the input text, in whatever format it
  507. * may have (possibly UTF-8 for example), and may not always be the same as
  508. * the corresponding UChar (UTF-16) index.
  509. * The returned position will always be aligned to a code point boundary.
  510. *
  511. * @param ut the text to be accessed.
  512. * @return the current index position, in the native units of the text provider.
  513. * @stable ICU 3.4
  514. */
  515. U_STABLE int64_t U_EXPORT2
  516. utext_getNativeIndex(const UText *ut);
  517. /**
  518. * Set the current iteration position to the nearest code point
  519. * boundary at or preceding the specified index.
  520. * The index is in the native units of the original input text.
  521. * If the index is out of range, it will be pinned to be within
  522. * the range of the input text.
  523. * <p>
  524. * It will usually be more efficient to begin an iteration
  525. * using the functions utext_next32From() or utext_previous32From()
  526. * rather than setIndex().
  527. * <p>
  528. * Moving the index position to an adjacent character is best done
  529. * with utext_next32(), utext_previous32() or utext_moveIndex32().
  530. * Attempting to do direct arithmetic on the index position is
  531. * complicated by the fact that the size (in native units) of a
  532. * character depends on the underlying representation of the character
  533. * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
  534. * easily knowable.
  535. *
  536. * @param ut the text to be accessed.
  537. * @param nativeIndex the native unit index of the new iteration position.
  538. * @stable ICU 3.4
  539. */
  540. U_STABLE void U_EXPORT2
  541. utext_setNativeIndex(UText *ut, int64_t nativeIndex);
  542. /**
  543. * Move the iterator postion by delta code points. The number of code points
  544. * is a signed number; a negative delta will move the iterator backwards,
  545. * towards the start of the text.
  546. * <p>
  547. * The index is moved by <code>delta</code> code points
  548. * forward or backward, but no further backward than to 0 and
  549. * no further forward than to utext_nativeLength().
  550. * The resulting index value will be in between 0 and length, inclusive.
  551. *
  552. * @param ut the text to be accessed.
  553. * @param delta the signed number of code points to move the iteration position.
  554. * @return TRUE if the position could be moved the requested number of positions while
  555. * staying within the range [0 - text length].
  556. * @stable ICU 3.4
  557. */
  558. U_STABLE UBool U_EXPORT2
  559. utext_moveIndex32(UText *ut, int32_t delta);
  560. /**
  561. * Get the native index of the character preceeding the current position.
  562. * If the iteration position is already at the start of the text, zero
  563. * is returned.
  564. * The value returned is the same as that obtained from the following sequence,
  565. * but without the side effect of changing the iteration position.
  566. *
  567. * \code
  568. * UText *ut = whatever;
  569. * ...
  570. * utext_previous(ut)
  571. * utext_getNativeIndex(ut);
  572. * \endcode
  573. *
  574. * This function is most useful during forwards iteration, where it will get the
  575. * native index of the character most recently returned from utext_next().
  576. *
  577. * @param ut the text to be accessed
  578. * @return the native index of the character preceeding the current index position,
  579. * or zero if the current position is at the start of the text.
  580. * @stable ICU 3.6
  581. */
  582. U_STABLE int64_t U_EXPORT2
  583. utext_getPreviousNativeIndex(UText *ut);
  584. /**
  585. *
  586. * Extract text from a UText into a UChar buffer. The range of text to be extracted
  587. * is specified in the native indices of the UText provider. These may not necessarily
  588. * be UTF-16 indices.
  589. * <p>
  590. * The size (number of 16 bit UChars) of the data to be extracted is returned. The
  591. * full number of UChars is returned, even when the extracted text is truncated
  592. * because the specified buffer size is too small.
  593. * <p>
  594. * The extracted string will (if you are a user) / must (if you are a text provider)
  595. * be NUL-terminated if there is sufficient space in the destination buffer. This
  596. * terminating NUL is not included in the returned length.
  597. * <p>
  598. * The iteration index is left at the position following the last extracted character.
  599. *
  600. * @param ut the UText from which to extract data.
  601. * @param nativeStart the native index of the first character to extract.\
  602. * If the specified index is out of range,
  603. * it will be pinned to to be within 0 <= index <= textLength
  604. * @param nativeLimit the native string index of the position following the last
  605. * character to extract. If the specified index is out of range,
  606. * it will be pinned to to be within 0 <= index <= textLength.
  607. * nativeLimit must be >= nativeStart.
  608. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
  609. * @param destCapacity The size, in UChars, of the destination buffer. May be zero
  610. * for precomputing the required size.
  611. * @param status receives any error status.
  612. * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
  613. * buffer was too small. Returns number of UChars for preflighting.
  614. * @return Number of UChars in the data to be extracted. Does not include a trailing NUL.
  615. *
  616. * @stable ICU 3.4
  617. */
  618. U_STABLE int32_t U_EXPORT2
  619. utext_extract(UText *ut,
  620. int64_t nativeStart, int64_t nativeLimit,
  621. UChar *dest, int32_t destCapacity,
  622. UErrorCode *status);
  623. /************************************************************************************
  624. *
  625. * #define inline versions of selected performance-critical text access functions
  626. * Caution: do not use auto increment++ or decrement-- expressions
  627. * as parameters to these macros.
  628. *
  629. * For most use, where there is no extreme performance constraint, the
  630. * normal, non-inline functions are a better choice. The resulting code
  631. * will be smaller, and, if the need ever arises, easier to debug.
  632. *
  633. * These are implemented as #defines rather than real functions
  634. * because there is no fully portable way to do inline functions in plain C.
  635. *
  636. ************************************************************************************/
  637. #ifndef U_HIDE_INTERNAL_API
  638. /**
  639. * inline version of utext_current32(), for performance-critical situations.
  640. *
  641. * Get the code point at the current iteration position of the UText.
  642. * Returns U_SENTINEL (-1) if the position is at the end of the
  643. * text.
  644. *
  645. * @internal ICU 4.4 technology preview
  646. */
  647. #define UTEXT_CURRENT32(ut) \
  648. ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
  649. ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
  650. #endif /* U_HIDE_INTERNAL_API */
  651. /**
  652. * inline version of utext_next32(), for performance-critical situations.
  653. *
  654. * Get the code point at the current iteration position of the UText, and
  655. * advance the position to the first index following the character.
  656. * This is a post-increment operation.
  657. * Returns U_SENTINEL (-1) if the position is at the end of the
  658. * text.
  659. *
  660. * @stable ICU 3.4
  661. */
  662. #define UTEXT_NEXT32(ut) \
  663. ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
  664. ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
  665. /**
  666. * inline version of utext_previous32(), for performance-critical situations.
  667. *
  668. * Move the iterator position to the character (code point) whose
  669. * index precedes the current position, and return that character.
  670. * This is a pre-decrement operation.
  671. * Returns U_SENTINEL (-1) if the position is at the start of the text.
  672. *
  673. * @stable ICU 3.4
  674. */
  675. #define UTEXT_PREVIOUS32(ut) \
  676. ((ut)->chunkOffset > 0 && \
  677. (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
  678. (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
  679. /**
  680. * inline version of utext_getNativeIndex(), for performance-critical situations.
  681. *
  682. * Get the current iterator position, which can range from 0 to
  683. * the length of the text.
  684. * The position is a native index into the input text, in whatever format it
  685. * may have (possibly UTF-8 for example), and may not always be the same as
  686. * the corresponding UChar (UTF-16) index.
  687. * The returned position will always be aligned to a code point boundary.
  688. *
  689. * @stable ICU 3.6
  690. */
  691. #define UTEXT_GETNATIVEINDEX(ut) \
  692. ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
  693. (ut)->chunkNativeStart+(ut)->chunkOffset : \
  694. (ut)->pFuncs->mapOffsetToNative(ut))
  695. /**
  696. * inline version of utext_setNativeIndex(), for performance-critical situations.
  697. *
  698. * Set the current iteration position to the nearest code point
  699. * boundary at or preceding the specified index.
  700. * The index is in the native units of the original input text.
  701. * If the index is out of range, it will be pinned to be within
  702. * the range of the input text.
  703. *
  704. * @stable ICU 3.8
  705. */
  706. #define UTEXT_SETNATIVEINDEX(ut, ix) \
  707. { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
  708. if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
  709. (ut)->chunkOffset=(int32_t)__offset; \
  710. } else { \
  711. utext_setNativeIndex((ut), (ix)); } }
  712. /************************************************************************************
  713. *
  714. * Functions related to writing or modifying the text.
  715. * These will work only with modifiable UTexts. Attempting to
  716. * modify a read-only UText will return an error status.
  717. *
  718. ************************************************************************************/
  719. /**
  720. * Return TRUE if the text can be written (modified) with utext_replace() or
  721. * utext_copy(). For the text to be writable, the text provider must
  722. * be of a type that supports writing and the UText must not be frozen.
  723. *
  724. * Attempting to modify text when utext_isWriteable() is FALSE will fail -
  725. * the text will not be modified, and an error will be returned from the function
  726. * that attempted the modification.
  727. *
  728. * @param ut the UText to be tested.
  729. * @return TRUE if the text is modifiable.
  730. *
  731. * @see utext_freeze()
  732. * @see utext_replace()
  733. * @see utext_copy()
  734. * @stable ICU 3.4
  735. *
  736. */
  737. U_STABLE UBool U_EXPORT2
  738. utext_isWritable(const UText *ut);
  739. /**
  740. * Test whether there is meta data associated with the text.
  741. * @see Replaceable::hasMetaData()
  742. *
  743. * @param ut The UText to be tested
  744. * @return TRUE if the underlying text includes meta data.
  745. * @stable ICU 3.4
  746. */
  747. U_STABLE UBool U_EXPORT2
  748. utext_hasMetaData(const UText *ut);
  749. /**
  750. * Replace a range of the original text with a replacement text.
  751. *
  752. * Leaves the current iteration position at the position following the
  753. * newly inserted replacement text.
  754. *
  755. * This function is only available on UText types that support writing,
  756. * that is, ones where utext_isWritable() returns TRUE.
  757. *
  758. * When using this function, there should be only a single UText opened onto the
  759. * underlying native text string. Behavior after a replace operation
  760. * on a UText is undefined for any other additional UTexts that refer to the
  761. * modified string.
  762. *
  763. * @param ut the UText representing the text to be operated on.
  764. * @param nativeStart the native index of the start of the region to be replaced
  765. * @param nativeLimit the native index of the character following the region to be replaced.
  766. * @param replacementText pointer to the replacement text
  767. * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
  768. * @param status receives any error status. Possible errors include
  769. * U_NO_WRITE_PERMISSION
  770. *
  771. * @return The signed number of (native) storage units by which
  772. * the length of the text expanded or contracted.
  773. *
  774. * @stable ICU 3.4
  775. */
  776. U_STABLE int32_t U_EXPORT2
  777. utext_replace(UText *ut,
  778. int64_t nativeStart, int64_t nativeLimit,
  779. const UChar *replacementText, int32_t replacementLength,
  780. UErrorCode *status);
  781. /**
  782. *
  783. * Copy or move a substring from one position to another within the text,
  784. * while retaining any metadata associated with the text.
  785. * This function is used to duplicate or reorder substrings.
  786. * The destination index must not overlap the source range.
  787. *
  788. * The text to be copied or moved is inserted at destIndex;
  789. * it does not replace or overwrite any existing text.
  790. *
  791. * The iteration position is left following the newly inserted text
  792. * at the destination position.
  793. *
  794. * This function is only available on UText types that support writing,
  795. * that is, ones where utext_isWritable() returns TRUE.
  796. *
  797. * When using this function, there should be only a single UText opened onto the
  798. * underlying native text string. Behavior after a copy operation
  799. * on a UText is undefined in any other additional UTexts that refer to the
  800. * modified string.
  801. *
  802. * @param ut The UText representing the text to be operated on.
  803. * @param nativeStart The native index of the start of the region to be copied or moved
  804. * @param nativeLimit The native index of the character position following the region
  805. * to be copied.
  806. * @param destIndex The native destination index to which the source substring is
  807. * copied or moved.
  808. * @param move If TRUE, then the substring is moved, not copied/duplicated.
  809. * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
  810. *
  811. * @stable ICU 3.4
  812. */
  813. U_STABLE void U_EXPORT2
  814. utext_copy(UText *ut,
  815. int64_t nativeStart, int64_t nativeLimit,
  816. int64_t destIndex,
  817. UBool move,
  818. UErrorCode *status);
  819. /**
  820. * <p>
  821. * Freeze a UText. This prevents any modification to the underlying text itself
  822. * by means of functions operating on this UText.
  823. * </p>
  824. * <p>
  825. * Once frozen, a UText can not be unfrozen. The intent is to ensure
  826. * that a the text underlying a frozen UText wrapper cannot be modified via that UText.
  827. * </p>
  828. * <p>
  829. * Caution: freezing a UText will disable changes made via the specific
  830. * frozen UText wrapper only; it will not have any effect on the ability to
  831. * directly modify the text by bypassing the UText. Any such backdoor modifications
  832. * are always an error while UText access is occuring because the underlying
  833. * text can get out of sync with UText's buffering.
  834. * </p>
  835. *
  836. * @param ut The UText to be frozen.
  837. * @see utext_isWritable()
  838. * @stable ICU 3.6
  839. */
  840. U_STABLE void U_EXPORT2
  841. utext_freeze(UText *ut);
  842. /**
  843. * UText provider properties (bit field indexes).
  844. *
  845. * @see UText
  846. * @stable ICU 3.4
  847. */
  848. enum {
  849. /**
  850. * It is potentially time consuming for the provider to determine the length of the text.
  851. * @stable ICU 3.4
  852. */
  853. UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
  854. /**
  855. * Text chunks remain valid and usable until the text object is modified or
  856. * deleted, not just until the next time the access() function is called
  857. * (which is the default).
  858. * @stable ICU 3.4
  859. */
  860. UTEXT_PROVIDER_STABLE_CHUNKS = 2,
  861. /**
  862. * The provider supports modifying the text via the replace() and copy()
  863. * functions.
  864. * @see Replaceable
  865. * @stable ICU 3.4
  866. */
  867. UTEXT_PROVIDER_WRITABLE = 3,
  868. /**
  869. * There is meta data associated with the text.
  870. * @see Replaceable::hasMetaData()
  871. * @stable ICU 3.4
  872. */
  873. UTEXT_PROVIDER_HAS_META_DATA = 4,
  874. /**
  875. * Text provider owns the text storage.
  876. * Generally occurs as the result of a deep clone of the UText.
  877. * When closing the UText, the associated text must
  878. * also be closed/deleted/freed/ whatever is appropriate.
  879. * @stable ICU 3.6
  880. */
  881. UTEXT_PROVIDER_OWNS_TEXT = 5
  882. };
  883. /**
  884. * Function type declaration for UText.clone().
  885. *
  886. * clone a UText. Much like opening a UText where the source text is itself
  887. * another UText.
  888. *
  889. * A deep clone will copy both the UText data structures and the underlying text.
  890. * The original and cloned UText will operate completely independently; modifications
  891. * made to the text in one will not effect the other. Text providers are not
  892. * required to support deep clones. The user of clone() must check the status return
  893. * and be prepared to handle failures.
  894. *
  895. * A shallow clone replicates only the UText data structures; it does not make
  896. * a copy of the underlying text. Shallow clones can be used as an efficient way to
  897. * have multiple iterators active in a single text string that is not being
  898. * modified.
  899. *
  900. * A shallow clone operation must not fail except for truly exceptional conditions such
  901. * as memory allocation failures.
  902. *
  903. * A UText and its clone may be safely concurrently accessed by separate threads.
  904. * This is true for both shallow and deep clones.
  905. * It is the responsibility of the Text Provider to ensure that this thread safety
  906. * constraint is met.
  907. *
  908. * @param dest A UText struct to be filled in with the result of the clone operation,
  909. * or NULL if the clone function should heap-allocate a new UText struct.
  910. * @param src The UText to be cloned.
  911. * @param deep TRUE to request a deep clone, FALSE for a shallow clone.
  912. * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
  913. * should be returned if the text provider is unable to clone the
  914. * original text.
  915. * @return The newly created clone, or NULL if the clone operation failed.
  916. *
  917. * @stable ICU 3.4
  918. */
  919. typedef UText * U_CALLCONV
  920. UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
  921. /**
  922. * Function type declaration for UText.nativeLength().
  923. *
  924. * @param ut the UText to get the length of.
  925. * @return the length, in the native units of the original text string.
  926. * @see UText
  927. * @stable ICU 3.4
  928. */
  929. typedef int64_t U_CALLCONV
  930. UTextNativeLength(UText *ut);
  931. /**
  932. * Function type declaration for UText.access(). Get the description of the text chunk
  933. * containing the text at a requested native index. The UText's iteration
  934. * position will be left at the requested index. If the index is out
  935. * of bounds, the iteration position will be left at the start or end
  936. * of the string, as appropriate.
  937. *
  938. * Chunks must begin and end on code point boundaries. A single code point
  939. * comprised of multiple storage units must never span a chunk boundary.
  940. *
  941. *
  942. * @param ut the UText being accessed.
  943. * @param nativeIndex Requested index of the text to be accessed.
  944. * @param forward If TRUE, then the returned chunk must contain text
  945. * starting from the index, so that start<=index<limit.
  946. * If FALSE, then the returned chunk must contain text
  947. * before the index, so that start<index<=limit.
  948. * @return True if the requested index could be accessed. The chunk
  949. * will contain the requested text.
  950. * False value if a chunk cannot be accessed
  951. * (the requested index is out of bounds).
  952. *
  953. * @see UText
  954. * @stable ICU 3.4
  955. */
  956. typedef UBool U_CALLCONV
  957. UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
  958. /**
  959. * Function type declaration for UText.extract().
  960. *
  961. * Extract text from a UText into a UChar buffer. The range of text to be extracted
  962. * is specified in the native indices of the UText provider. These may not necessarily
  963. * be UTF-16 indices.
  964. * <p>
  965. * The size (number of 16 bit UChars) in the data to be extracted is returned. The
  966. * full amount is returned, even when the specified buffer size is smaller.
  967. * <p>
  968. * The extracted string will (if you are a user) / must (if you are a text provider)
  969. * be NUL-terminated if there is sufficient space in the destination buffer.
  970. *
  971. * @param ut the UText from which to extract data.
  972. * @param nativeStart the native index of the first characer to extract.
  973. * @param nativeLimit the native string index of the position following the last
  974. * character to extract.
  975. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
  976. * @param destCapacity The size, in UChars, of the destination buffer. May be zero
  977. * for precomputing the required size.
  978. * @param status receives any error status.
  979. * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
  980. * preflighting.
  981. * @return Number of UChars in the data. Does not include a trailing NUL.
  982. *
  983. * @stable ICU 3.4
  984. */
  985. typedef int32_t U_CALLCONV
  986. UTextExtract(UText *ut,
  987. int64_t nativeStart, int64_t nativeLimit,
  988. UChar *dest, int32_t destCapacity,
  989. UErrorCode *status);
  990. /**
  991. * Function type declaration for UText.replace().
  992. *
  993. * Replace a range of the original text with a replacement text.
  994. *
  995. * Leaves the current iteration position at the position following the
  996. * newly inserted replacement text.
  997. *
  998. * This function need only be implemented on UText types that support writing.
  999. *
  1000. * When using this function, there should be only a single UText opened onto the
  1001. * underlying native text string. The function is responsible for updating the
  1002. * text chunk within the UText to reflect the updated iteration position,
  1003. * taking into account any changes to the underlying string's structure caused
  1004. * by the replace operation.
  1005. *
  1006. * @param ut the UText representing the text to be operated on.
  1007. * @param nativeStart the index of the start of the region to be replaced
  1008. * @param nativeLimit the index of the character following the region to be replaced.
  1009. * @param replacementText pointer to the replacement text
  1010. * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
  1011. * @param status receives any error status. Possible errors include
  1012. * U_NO_WRITE_PERMISSION
  1013. *
  1014. * @return The signed number of (native) storage units by which
  1015. * the length of the text expanded or contracted.
  1016. *
  1017. * @stable ICU 3.4
  1018. */
  1019. typedef int32_t U_CALLCONV
  1020. UTextReplace(UText *ut,
  1021. int64_t nativeStart, int64_t nativeLimit,
  1022. const UChar *replacementText, int32_t replacmentLength,
  1023. UErrorCode *status);
  1024. /**
  1025. * Function type declaration for UText.copy().
  1026. *
  1027. * Copy or move a substring from one position to another within the text,
  1028. * while retaining any metadata associated with the text.
  1029. * This function is used to duplicate or reorder substrings.
  1030. * The destination index must not overlap the source range.
  1031. *
  1032. * The text to be copied or moved is inserted at destIndex;
  1033. * it does not replace or overwrite any existing text.
  1034. *
  1035. * This function need only be implemented for UText types that support writing.
  1036. *
  1037. * When using this function, there should be only a single UText opened onto the
  1038. * underlying native text string. The function is responsible for updating the
  1039. * text chunk within the UText to reflect the updated iteration position,
  1040. * taking into account any changes to the underlying string's structure caused
  1041. * by the replace operation.
  1042. *
  1043. * @param ut The UText representing the text to be operated on.
  1044. * @param nativeStart The index of the start of the region to be copied or moved
  1045. * @param nativeLimit The index of the character following the region to be replaced.
  1046. * @param nativeDest The destination index to which the source substring is copied or moved.
  1047. * @param move If TRUE, then the substring is moved, not copied/duplicated.
  1048. * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
  1049. *
  1050. * @stable ICU 3.4
  1051. */
  1052. typedef void U_CALLCONV
  1053. UTextCopy(UText *ut,
  1054. int64_t nativeStart, int64_t nativeLimit,
  1055. int64_t nativeDest,
  1056. UBool move,
  1057. UErrorCode *status);
  1058. /**
  1059. * Function type declaration for UText.mapOffsetToNative().
  1060. * Map from the current UChar offset within the current text chunk to
  1061. * the corresponding native index in the original source text.
  1062. *
  1063. * This is required only for text providers that do not use native UTF-16 indexes.
  1064. *
  1065. * @param ut the UText.
  1066. * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
  1067. * The returned native index should always be to a code point boundary.
  1068. *
  1069. * @stable ICU 3.4
  1070. */
  1071. typedef int64_t U_CALLCONV
  1072. UTextMapOffsetToNative(const UText *ut);
  1073. /**
  1074. * Function type declaration for UText.mapIndexToUTF16().
  1075. * Map from a native index to a UChar offset within a text chunk.
  1076. * Behavior is undefined if the native index does not fall within the
  1077. * current chunk.
  1078. *
  1079. * This function is required only for text providers that do not use native UTF-16 indexes.
  1080. *
  1081. * @param ut The UText containing the text chunk.
  1082. * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
  1083. * @return Chunk-relative UTF-16 offset corresponding to the specified native
  1084. * index.
  1085. *
  1086. * @stable ICU 3.4
  1087. */
  1088. typedef int32_t U_CALLCONV
  1089. UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
  1090. /**
  1091. * Function type declaration for UText.utextClose().
  1092. *
  1093. * A Text Provider close function is only required for provider types that make
  1094. * allocations in their open function (or other functions) that must be
  1095. * cleaned when the UText is closed.
  1096. *
  1097. * The allocation of the UText struct itself and any "extra" storage
  1098. * associated with the UText is handled by the common UText implementation
  1099. * and does not require provider specific cleanup in a close function.
  1100. *
  1101. * Most UText provider implementations do not need to implement this function.
  1102. *
  1103. * @param ut A UText object to be closed.
  1104. *
  1105. * @stable ICU 3.4
  1106. */
  1107. typedef void U_CALLCONV
  1108. UTextClose(UText *ut);
  1109. /**
  1110. * (public) Function dispatch table for UText.
  1111. * Conceptually very much like a C++ Virtual Function Table.
  1112. * This struct defines the organization of the table.
  1113. * Each text provider implementation must provide an
  1114. * actual table that is initialized with the appropriate functions
  1115. * for the type of text being handled.
  1116. * @stable ICU 3.6
  1117. */
  1118. struct UTextFuncs {
  1119. /**
  1120. * (public) Function table size, sizeof(UTextFuncs)
  1121. * Intended for use should the table grow to accomodate added
  1122. * functions in the future, to allow tests for older format
  1123. * function tables that do not contain the extensions.
  1124. *
  1125. * Fields are placed for optimal alignment on
  1126. * 32/64/128-bit-pointer machines, by normally grouping together
  1127. * 4 32-bit fields,
  1128. * 4 pointers,
  1129. * 2 64-bit fields
  1130. * in sequence.
  1131. * @stable ICU 3.6
  1132. */
  1133. int32_t tableSize;
  1134. /**
  1135. * (private) Alignment padding.
  1136. * Do not use, reserved for use by the UText framework only.
  1137. * @internal
  1138. */
  1139. int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
  1140. /**
  1141. * (public) Function pointer for UTextClone
  1142. *
  1143. * @see UTextClone
  1144. * @stable ICU 3.6
  1145. */
  1146. UTextClone *clone;
  1147. /**
  1148. * (public) function pointer for UTextLength
  1149. * May be expensive to compute!
  1150. *
  1151. * @see UTextLength
  1152. * @stable ICU 3.6
  1153. */
  1154. UTextNativeLength *nativeLength;
  1155. /**
  1156. * (public) Function pointer for UTextAccess.
  1157. *
  1158. * @see UTextAccess
  1159. * @stable ICU 3.6
  1160. */
  1161. UTextAccess *access;
  1162. /**
  1163. * (public) Function pointer for UTextExtract.
  1164. *
  1165. * @see UTextExtract
  1166. * @stable ICU 3.6
  1167. */
  1168. UTextExtract *extract;
  1169. /**
  1170. * (public) Function pointer for UTextReplace.
  1171. *
  1172. * @see UTextReplace
  1173. * @stable ICU 3.6
  1174. */
  1175. UTextReplace *replace;
  1176. /**
  1177. * (public) Function pointer for UTextCopy.
  1178. *
  1179. * @see UTextCopy
  1180. * @stable ICU 3.6
  1181. */
  1182. UTextCopy *copy;
  1183. /**
  1184. * (public) Function pointer for UTextMapOffsetToNative.
  1185. *
  1186. * @see UTextMapOffsetToNative
  1187. * @stable ICU 3.6
  1188. */
  1189. UTextMapOffsetToNative *mapOffsetToNative;
  1190. /**
  1191. * (public) Function pointer for UTextMapNativeIndexToUTF16.
  1192. *
  1193. * @see UTextMapNativeIndexToUTF16
  1194. * @stable ICU 3.6
  1195. */
  1196. UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
  1197. /**
  1198. * (public) Function pointer for UTextClose.
  1199. *
  1200. * @see UTextClose
  1201. * @stable ICU 3.6
  1202. */
  1203. UTextClose *close;
  1204. /**
  1205. * (private) Spare function pointer
  1206. * @internal
  1207. */
  1208. UTextClose *spare1;
  1209. /**
  1210. * (private) Spare function pointer
  1211. * @internal
  1212. */
  1213. UTextClose *spare2;
  1214. /**
  1215. * (private) Spare function pointer
  1216. * @internal
  1217. */
  1218. UTextClose *spare3;
  1219. };
  1220. /**
  1221. * Function dispatch table for UText
  1222. * @see UTextFuncs
  1223. */
  1224. typedef struct UTextFuncs UTextFuncs;
  1225. /**
  1226. * UText struct. Provides the interface between the generic UText access code
  1227. * and the UText provider code that works on specific kinds of
  1228. * text (UTF-8, noncontiguous UTF-16, whatever.)
  1229. *
  1230. * Applications that are using predefined types of text providers
  1231. * to pass text data to ICU services will have no need to view the
  1232. * internals of the UText structs that they open.
  1233. *
  1234. * @stable ICU 3.6
  1235. */
  1236. struct UText {
  1237. /**
  1238. * (private) Magic. Used to help detect when UText functions are handed
  1239. * invalid or unitialized UText structs.
  1240. * utext_openXYZ() functions take an initialized,
  1241. * but not necessarily open, UText struct as an
  1242. * optional fill-in parameter. This magic field
  1243. * is used to check for that initialization.
  1244. * Text provider close functions must NOT clear
  1245. * the magic field because that would prevent
  1246. * reuse of the UText struct.
  1247. * @internal
  1248. */
  1249. uint32_t magic;
  1250. /**
  1251. * (private) Flags for managing the allocation and freeing of
  1252. * memory associated with this UText.
  1253. * @internal
  1254. */
  1255. int32_t flags;
  1256. /**
  1257. * Text provider properties. This set of flags is maintainted by the
  1258. * text provider implementation.
  1259. * @stable ICU 3.4
  1260. */
  1261. int32_t providerProperties;
  1262. /**
  1263. * (public) sizeOfStruct=sizeof(UText)
  1264. * Allows possible backward compatible extension.
  1265. *
  1266. * @stable ICU 3.4
  1267. */
  1268. int32_t sizeOfStruct;
  1269. /* ------ 16 byte alignment boundary ----------- */
  1270. /**
  1271. * (protected) Native index of the first character position following
  1272. * the current chunk.
  1273. * @stable ICU 3.6
  1274. */
  1275. int64_t chunkNativeLimit;
  1276. /**
  1277. * (protected) Size in bytes of the extra space (pExtra).
  1278. * @stable ICU 3.4
  1279. */
  1280. int32_t extraSize;
  1281. /**
  1282. * (protected) The highest chunk offset where native indexing and
  1283. * chunk (UTF-16) indexing correspond. For UTF-16 sources, value
  1284. * will be equal to chunkLength.
  1285. *
  1286. * @stable ICU 3.6
  1287. */
  1288. int32_t nativeIndexingLimit;
  1289. /* ---- 16 byte alignment boundary------ */
  1290. /**
  1291. * (protected) Native index of the first character in the text chunk.
  1292. * @stable ICU 3.6
  1293. */
  1294. int64_t chunkNativeStart;
  1295. /**
  1296. * (protected) Current iteration position within the text chunk (UTF-16 buffer).
  1297. * This is the index to the character that will be returned by utext_next32().
  1298. * @stable ICU 3.6
  1299. */
  1300. int32_t chunkOffset;
  1301. /**
  1302. * (protected) Length the text chunk (UTF-16 buffer), in UChars.
  1303. * @stable ICU 3.6
  1304. */
  1305. int32_t chunkLength;
  1306. /* ---- 16 byte alignment boundary-- */
  1307. /**
  1308. * (protected) pointer to a chunk of text in UTF-16 format.
  1309. * May refer either to original storage of the source of the text, or
  1310. * if conversion was required, to a buffer owned by the UText.
  1311. * @stable ICU 3.6
  1312. */
  1313. const UChar *chunkContents;
  1314. /**
  1315. * (public) Pointer to Dispatch table for accessing functions for this UText.
  1316. * @stable ICU 3.6
  1317. */
  1318. const UTextFuncs *pFuncs;
  1319. /**
  1320. * (protected) Pointer to additional space requested by the
  1321. * text provider during the utext_open operation.
  1322. * @stable ICU 3.4
  1323. */
  1324. void *pExtra;
  1325. /**
  1326. * (protected) Pointer to string or text-containin object or similar.
  1327. * This is the source of the text that this UText is wrapping, in a format
  1328. * that is known to the text provider functions.
  1329. * @stable ICU 3.4
  1330. */
  1331. const void *context;
  1332. /* --- 16 byte alignment boundary--- */
  1333. /**
  1334. * (protected) Pointer fields available for use by the text provider.
  1335. * Not used by UText common code.
  1336. * @stable ICU 3.6
  1337. */
  1338. const void *p;
  1339. /**
  1340. * (protected) Pointer fields available for use by the text provider.
  1341. * Not used by UText common code.
  1342. * @stable ICU 3.6
  1343. */
  1344. const void *q;
  1345. /**
  1346. * (protected) Pointer fields available for use by the text provider.
  1347. * Not used by UText common code.
  1348. * @stable ICU 3.6
  1349. */
  1350. const void *r;
  1351. /**
  1352. * Private field reserved for future use by the UText framework
  1353. * itself. This is not to be touched by the text providers.
  1354. * @internal ICU 3.4
  1355. */
  1356. void *privP;
  1357. /* --- 16 byte alignment boundary--- */
  1358. /**
  1359. * (protected) Integer field reserved for use by the text provider.
  1360. * Not used by the UText framework, or by the client (user) of the UText.
  1361. * @stable ICU 3.4
  1362. */
  1363. int64_t a;
  1364. /**
  1365. * (protected) Integer field reserved for use by the text provider.
  1366. * Not used by the UText framework, or by the client (user) of the UText.
  1367. * @stable ICU 3.4
  1368. */
  1369. int32_t b;
  1370. /**
  1371. * (protected) Integer field reserved for use by the text provider.
  1372. * Not used by the UText framework, or by the client (user) of the UText.
  1373. * @stable ICU 3.4
  1374. */
  1375. int32_t c;
  1376. /* ---- 16 byte alignment boundary---- */
  1377. /**
  1378. * Private field reserved for future use by the UText framework
  1379. * itself. This is not to be touched by the text providers.
  1380. * @internal ICU 3.4
  1381. */
  1382. int64_t privA;
  1383. /**
  1384. * Private field reserved for future use by the UText framework
  1385. * itself. This is not to be touched by the text providers.
  1386. * @internal ICU 3.4
  1387. */
  1388. int32_t privB;
  1389. /**
  1390. * Private field reserved for future use by the UText framework
  1391. * itself. This is not to be touched by the text providers.
  1392. * @internal ICU 3.4
  1393. */
  1394. int32_t privC;
  1395. };
  1396. /**
  1397. * Common function for use by Text Provider implementations to allocate and/or initialize
  1398. * a new UText struct. To be called in the implementation of utext_open() functions.
  1399. * If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
  1400. * If the supplied UText is already open, the provider's close function will be called
  1401. * so that the struct can be reused by the open that is in progress.
  1402. *
  1403. * @param ut pointer to a UText struct to be re-used, or null if a new UText
  1404. * should be allocated.
  1405. * @param extraSpace The amount of additional space to be allocated as part
  1406. * of this UText, for use by types of providers that require
  1407. * additional storage.
  1408. * @param status Errors are returned here.
  1409. * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
  1410. * @stable ICU 3.4
  1411. */
  1412. U_STABLE UText * U_EXPORT2
  1413. utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
  1414. #ifndef U_HIDE_INTERNAL_API
  1415. /**
  1416. * @internal
  1417. * Value used to help identify correctly initialized UText structs.
  1418. * Note: must be publicly visible so that UTEXT_INITIALIZER can access it.
  1419. */
  1420. enum {
  1421. UTEXT_MAGIC = 0x345ad82c
  1422. };
  1423. #endif /* U_HIDE_INTERNAL_API */
  1424. /**
  1425. * initializer to be used with local (stack) instances of a UText
  1426. * struct. UText structs must be initialized before passing
  1427. * them to one of the utext_open functions.
  1428. *
  1429. * @stable ICU 3.6
  1430. */
  1431. #define UTEXT_INITIALIZER { \
  1432. UTEXT_MAGIC, /* magic */ \
  1433. 0, /* flags */ \
  1434. 0, /* providerProps */ \
  1435. sizeof(UText), /* sizeOfStruct */ \
  1436. 0, /* chunkNativeLimit */ \
  1437. 0, /* extraSize */ \
  1438. 0, /* nativeIndexingLimit */ \
  1439. 0, /* chunkNativeStart */ \
  1440. 0, /* chunkOffset */ \
  1441. 0, /* chunkLength */ \
  1442. NULL, /* chunkContents */ \
  1443. NULL, /* pFuncs */ \
  1444. NULL, /* pExtra */ \
  1445. NULL, /* context */ \
  1446. NULL, NULL, NULL, /* p, q, r */ \
  1447. NULL, /* privP */ \
  1448. 0, 0, 0, /* a, b, c */ \
  1449. 0, 0, 0 /* privA,B,C, */ \
  1450. }
  1451. U_CDECL_END
  1452. #endif