ubiditransform.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. /*
  2. ******************************************************************************
  3. *
  4. * Copyright (C) 2016 and later: Unicode, Inc. and others.
  5. * License & terms of use: http://www.unicode.org/copyright.html
  6. *
  7. ******************************************************************************
  8. * file name: ubiditransform.h
  9. * encoding: US-ASCII
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2016jul24
  14. * created by: Lina Kemmel
  15. *
  16. */
  17. #ifndef UBIDITRANSFORM_H
  18. #define UBIDITRANSFORM_H
  19. #include "unicode/utypes.h"
  20. #include "unicode/ubidi.h"
  21. #include "unicode/uchar.h"
  22. #include "unicode/localpointer.h"
  23. #ifndef U_HIDE_DRAFT_API
  24. /**
  25. * \file
  26. * \brief Bidi Transformations
  27. *
  28. * <code>UBiDiOrder</code> indicates the order of text.<p>
  29. * This bidi transformation engine supports all possible combinations (4 in
  30. * total) of input and output text order:
  31. * <ul>
  32. * <li><logical input, visual output>: unless the output direction is RTL, this
  33. * corresponds to a normal operation of the Bidi algorithm as described in the
  34. * Unicode Technical Report and implemented by <code>UBiDi</code> when the
  35. * reordering mode is set to <code>UBIDI_REORDER_DEFAULT</code>. Visual RTL
  36. * mode is not supported by <code>UBiDi</code> and is accomplished through
  37. * reversing a visual LTR string,</li>
  38. * <li><visual input, logical output>: unless the input direction is RTL, this
  39. * corresponds to an "inverse bidi algorithm" in <code>UBiDi</code> with the
  40. * reordering mode set to <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>.
  41. * Visual RTL mode is not not supported by <code>UBiDi</code> and is
  42. * accomplished through reversing a visual LTR string,</li>
  43. * <li><logical input, logical output>: if the input and output base directions
  44. * mismatch, this corresponds to the <code>UBiDi</code> implementation with the
  45. * reordering mode set to <code>UBIDI_REORDER_RUNS_ONLY</code>; and if the
  46. * input and output base directions are identical, the transformation engine
  47. * will only handle character mirroring and Arabic shaping operations without
  48. * reordering,</li>
  49. * <li><visual input, visual output>: this reordering mode is not supported by
  50. * the <code>UBiDi</code> engine; it implies character mirroring, Arabic
  51. * shaping, and - if the input/output base directions mismatch - string
  52. * reverse operations.</li>
  53. * </ul>
  54. * @see ubidi_setInverse
  55. * @see ubidi_setReorderingMode
  56. * @see UBIDI_REORDER_DEFAULT
  57. * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
  58. * @see UBIDI_REORDER_RUNS_ONLY
  59. * @draft ICU 58
  60. */
  61. typedef enum {
  62. /** 0: Constant indicating a logical order.
  63. * This is the default for input text.
  64. * @draft ICU 58
  65. */
  66. UBIDI_LOGICAL = 0,
  67. /** 1: Constant indicating a visual order.
  68. * This is a default for output text.
  69. * @draft ICU 58
  70. */
  71. UBIDI_VISUAL
  72. } UBiDiOrder;
  73. /**
  74. * <code>UBiDiMirroring</code> indicates whether or not characters with the
  75. * "mirrored" property in RTL runs should be replaced with their mirror-image
  76. * counterparts.
  77. * @see UBIDI_DO_MIRRORING
  78. * @see ubidi_setReorderingOptions
  79. * @see ubidi_writeReordered
  80. * @see ubidi_writeReverse
  81. * @draft ICU 58
  82. */
  83. typedef enum {
  84. /** 0: Constant indicating that character mirroring should not be
  85. * performed.
  86. * This is the default.
  87. * @draft ICU 58
  88. */
  89. UBIDI_MIRRORING_OFF = 0,
  90. /** 1: Constant indicating that character mirroring should be performed.
  91. * This corresponds to calling <code>ubidi_writeReordered</code> or
  92. * <code>ubidi_writeReverse</code> with the
  93. * <code>UBIDI_DO_MIRRORING</code> option bit set.
  94. * @draft ICU 58
  95. */
  96. UBIDI_MIRRORING_ON
  97. } UBiDiMirroring;
  98. /**
  99. * Forward declaration of the <code>UBiDiTransform</code> structure that stores
  100. * information used by the layout transformation engine.
  101. * @draft ICU 58
  102. */
  103. typedef struct UBiDiTransform UBiDiTransform;
  104. /**
  105. * Performs transformation of text from the bidi layout defined by the input
  106. * ordering scheme to the bidi layout defined by the output ordering scheme,
  107. * and applies character mirroring and Arabic shaping operations.<p>
  108. * In terms of <code>UBiDi</code>, such a transformation implies:
  109. * <ul>
  110. * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
  111. * reordering mode is other than normal),</li>
  112. * <li>calling <code>ubidi_setInverse</code> as needed (when text should be
  113. * transformed from a visual to a logical form),</li>
  114. * <li>resolving embedding levels of each character in the input text by
  115. * calling <code>ubidi_setPara</code>,</li>
  116. * <li>reordering the characters based on the computed embedding levels, also
  117. * performing character mirroring as needed, and streaming the result to the
  118. * output, by calling <code>ubidi_writeReordered</code>,</li>
  119. * <li>performing Arabic digit and letter shaping on the output text by calling
  120. * <code>u_shapeArabic</code>.</li>
  121. * </ul>
  122. * An "ordering scheme" encompasses the base direction and the order of text,
  123. * and these characteristics must be defined by the caller for both input and
  124. * output explicitly .<p>
  125. * There are 36 possible combinations of <input, output> ordering schemes,
  126. * which are partially supported by <code>UBiDi</code> already. Examples of the
  127. * currently supported combinations:
  128. * <ul>
  129. * <li><Logical LTR, Visual LTR>: this is equivalent to calling
  130. * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
  131. * <li><Logical RTL, Visual LTR>: this is equivalent to calling
  132. * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
  133. * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
  134. * calling <code>ubidi_setPara</code> with
  135. * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
  136. * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
  137. * calling <code>ubidi_setPara</code> with
  138. * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
  139. * <li><Visual LTR, Logical LTR>: this is equivalent to
  140. * calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then
  141. * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
  142. * <li><Visual LTR, Logical RTL>: this is equivalent to
  143. * calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then
  144. * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
  145. * </ul>
  146. * All combinations that involve the Visual RTL scheme are unsupported by
  147. * <code>UBiDi</code>, for instance:
  148. * <ul>
  149. * <li><Logical LTR, Visual RTL>,</li>
  150. * <li><Visual RTL, Logical RTL>.</li>
  151. * </ul>
  152. * <p>Example of usage of the transformation engine:<br>
  153. * <pre>
  154. * \code
  155. * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
  156. * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
  157. * UErrorCode errorCode = U_ZERO_ERROR;
  158. * // Run a transformation.
  159. * ubiditransform_transform(pBidiTransform,
  160. * text1, -1, text2, -1,
  161. * UBIDI_LTR, UBIDI_VISUAL,
  162. * UBIDI_RTL, UBIDI_LOGICAL,
  163. * UBIDI_MIRRORING_OFF,
  164. * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
  165. * &errorCode);
  166. * // Do something with text2.
  167. * text2[4] = '2';
  168. * // Run a reverse transformation.
  169. * ubiditransform_transform(pBidiTransform,
  170. * text2, -1, text1, -1,
  171. * UBIDI_RTL, UBIDI_LOGICAL,
  172. * UBIDI_LTR, UBIDI_VISUAL,
  173. * UBIDI_MIRRORING_OFF,
  174. * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
  175. * &errorCode);
  176. *\endcode
  177. * </pre>
  178. * </p>
  179. *
  180. * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
  181. * allocated with <code>ubiditransform_open()</code> or
  182. * <code>NULL</code>.<p>
  183. * This object serves for one-time setup to amortize initialization
  184. * overheads. Use of this object is not thread-safe. All other threads
  185. * should allocate a new <code>UBiDiTransform</code> object by calling
  186. * <code>ubiditransform_open()</code> before using it. Alternatively,
  187. * a caller can set this parameter to <code>NULL</code>, in which case
  188. * the object will be allocated by the engine on the fly.</p>
  189. * @param src A pointer to the text that the Bidi layout transformations will
  190. * be performed on.
  191. * <p><strong>Note:</strong> the text must be (at least)
  192. * <code>srcLength</code> long.</p>
  193. * @param srcLength The length of the text, in number of UChars. If
  194. * <code>length == -1</code> then the text must be zero-terminated.
  195. * @param dest A pointer to where the processed text is to be copied.
  196. * @param destSize The size of the <code>dest</code> buffer, in number of
  197. * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
  198. * then the destination length could be as large as
  199. * <code>srcLength * 2</code>. Otherwise, the destination length will
  200. * not exceed <code>srcLength</code>. If the caller reserves the last
  201. * position for zero-termination, it should be excluded from
  202. * <code>destSize</code>.
  203. * <p><code>destSize == -1</code> is allowed and makes sense when
  204. * <code>dest</code> was holds some meaningful value, e.g. that of
  205. * <code>src</code>. In this case <code>dest</code> must be
  206. * zero-terminated.</p>
  207. * @param inParaLevel A base embedding level of the input as defined in
  208. * <code>ubidi_setPara</code> documentation for the
  209. * <code>paraLevel</code> parameter.
  210. * @param inOrder An order of the input, which can be one of the
  211. * <code>UBiDiOrder</code> values.
  212. * @param outParaLevel A base embedding level of the output as defined in
  213. * <code>ubidi_setPara</code> documentation for the
  214. * <code>paraLevel</code> parameter.
  215. * @param outOrder An order of the output, which can be one of the
  216. * <code>UBiDiOrder</code> values.
  217. * @param doMirroring Indicates whether or not to perform character mirroring,
  218. * and can accept one of the <code>UBiDiMirroring</code> values.
  219. * @param shapingOptions Arabic digit and letter shaping options defined in the
  220. * ushape.h documentation.
  221. * <p><strong>Note:</strong> Direction indicator options are computed by
  222. * the transformation engine based on the effective ordering schemes, so
  223. * user-defined direction indicators will be ignored.</p>
  224. * @param pErrorCode A pointer to an error code value.
  225. *
  226. * @return The destination length, i.e. the number of UChars written to
  227. * <code>dest</code>. If the transformation fails, the return value
  228. * will be 0 (and the error code will be written to
  229. * <code>pErrorCode</code>).
  230. *
  231. * @see UBiDiLevel
  232. * @see UBiDiOrder
  233. * @see UBiDiMirroring
  234. * @see ubidi_setPara
  235. * @see u_shapeArabic
  236. * @draft ICU 58
  237. */
  238. U_DRAFT uint32_t U_EXPORT2
  239. ubiditransform_transform(UBiDiTransform *pBiDiTransform,
  240. const UChar *src, int32_t srcLength,
  241. UChar *dest, int32_t destSize,
  242. UBiDiLevel inParaLevel, UBiDiOrder inOrder,
  243. UBiDiLevel outParaLevel, UBiDiOrder outOrder,
  244. UBiDiMirroring doMirroring, uint32_t shapingOptions,
  245. UErrorCode *pErrorCode);
  246. /**
  247. * Allocates a <code>UBiDiTransform</code> object. This object can be reused,
  248. * e.g. with different ordering schemes, mirroring or shaping options.<p>
  249. * <strong>Note:</strong>The object can only be reused in the same thread.
  250. * All other threads should allocate a new <code>UBiDiTransform</code> object
  251. * before using it.<p>
  252. * Example of usage:<p>
  253. * <pre>
  254. * \code
  255. * UErrorCode errorCode = U_ZERO_ERROR;
  256. * // Open a new UBiDiTransform.
  257. * UBiDiTransform* transform = ubiditransform_open(&errorCode);
  258. * // Run a transformation.
  259. * ubiditransform_transform(transform,
  260. * text1, -1, text2, -1,
  261. * UBIDI_RTL, UBIDI_LOGICAL,
  262. * UBIDI_LTR, UBIDI_VISUAL,
  263. * UBIDI_MIRRORING_ON,
  264. * U_SHAPE_DIGITS_EN2AN,
  265. * &errorCode);
  266. * // Do something with the output text and invoke another transformation using
  267. * // that text as input.
  268. * ubiditransform_transform(transform,
  269. * text2, -1, text3, -1,
  270. * UBIDI_LTR, UBIDI_VISUAL,
  271. * UBIDI_RTL, UBIDI_VISUAL,
  272. * UBIDI_MIRRORING_ON,
  273. * 0, &errorCode);
  274. *\endcode
  275. * </pre>
  276. * <p>
  277. * The <code>UBiDiTransform</code> object must be deallocated by calling
  278. * <code>ubiditransform_close()</code>.
  279. *
  280. * @return An empty <code>UBiDiTransform</code> object.
  281. * @draft ICU 58
  282. */
  283. U_DRAFT UBiDiTransform* U_EXPORT2
  284. ubiditransform_open(UErrorCode *pErrorCode);
  285. /**
  286. * Deallocates the given <code>UBiDiTransform</code> object.
  287. * @draft ICU 58
  288. */
  289. U_DRAFT void U_EXPORT2
  290. ubiditransform_close(UBiDiTransform *pBidiTransform);
  291. #if U_SHOW_CPLUSPLUS_API
  292. U_NAMESPACE_BEGIN
  293. /**
  294. * \class LocalUBiDiTransformPointer
  295. * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
  296. * For most methods see the LocalPointerBase base class.
  297. *
  298. * @see LocalPointerBase
  299. * @see LocalPointer
  300. * @draft ICU 58
  301. */
  302. U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
  303. U_NAMESPACE_END
  304. #endif
  305. #endif /* U_HIDE_DRAFT_API */
  306. #endif