uidna.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2003-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: uidna.h
  11. * encoding: US-ASCII
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2003feb1
  16. * created by: Ram Viswanadha
  17. */
  18. #ifndef __UIDNA_H__
  19. #define __UIDNA_H__
  20. #include "unicode/utypes.h"
  21. #if !UCONFIG_NO_IDNA
  22. #include "unicode/localpointer.h"
  23. #include "unicode/parseerr.h"
  24. /**
  25. * \file
  26. * \brief C API: Internationalizing Domain Names in Applications (IDNA)
  27. *
  28. * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
  29. *
  30. * The C API functions which do take a UIDNA * service object pointer
  31. * implement UTS #46 and IDNA2008.
  32. *
  33. * IDNA2003 is obsolete.
  34. * The C API functions which do not take a service object pointer
  35. * implement IDNA2003. They are all deprecated.
  36. */
  37. /*
  38. * IDNA option bit set values.
  39. */
  40. enum {
  41. /**
  42. * Default options value: None of the other options are set.
  43. * For use in static worker and factory methods.
  44. * @stable ICU 2.6
  45. */
  46. UIDNA_DEFAULT=0,
  47. #ifndef U_HIDE_DEPRECATED_API
  48. /**
  49. * Option to allow unassigned code points in domain names and labels.
  50. * For use in static worker and factory methods.
  51. * <p>This option is ignored by the UTS46 implementation.
  52. * (UTS #46 disallows unassigned code points.)
  53. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  54. */
  55. UIDNA_ALLOW_UNASSIGNED=1,
  56. #endif /* U_HIDE_DEPRECATED_API */
  57. /**
  58. * Option to check whether the input conforms to the STD3 ASCII rules,
  59. * for example the restriction of labels to LDH characters
  60. * (ASCII Letters, Digits and Hyphen-Minus).
  61. * For use in static worker and factory methods.
  62. * @stable ICU 2.6
  63. */
  64. UIDNA_USE_STD3_RULES=2,
  65. /**
  66. * IDNA option to check for whether the input conforms to the BiDi rules.
  67. * For use in static worker and factory methods.
  68. * <p>This option is ignored by the IDNA2003 implementation.
  69. * (IDNA2003 always performs a BiDi check.)
  70. * @stable ICU 4.6
  71. */
  72. UIDNA_CHECK_BIDI=4,
  73. /**
  74. * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
  75. * For use in static worker and factory methods.
  76. * <p>This option is ignored by the IDNA2003 implementation.
  77. * (The CONTEXTJ check is new in IDNA2008.)
  78. * @stable ICU 4.6
  79. */
  80. UIDNA_CHECK_CONTEXTJ=8,
  81. /**
  82. * IDNA option for nontransitional processing in ToASCII().
  83. * For use in static worker and factory methods.
  84. * <p>By default, ToASCII() uses transitional processing.
  85. * <p>This option is ignored by the IDNA2003 implementation.
  86. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
  87. * @stable ICU 4.6
  88. */
  89. UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
  90. /**
  91. * IDNA option for nontransitional processing in ToUnicode().
  92. * For use in static worker and factory methods.
  93. * <p>By default, ToUnicode() uses transitional processing.
  94. * <p>This option is ignored by the IDNA2003 implementation.
  95. * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
  96. * @stable ICU 4.6
  97. */
  98. UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
  99. /**
  100. * IDNA option to check for whether the input conforms to the CONTEXTO rules.
  101. * For use in static worker and factory methods.
  102. * <p>This option is ignored by the IDNA2003 implementation.
  103. * (The CONTEXTO check is new in IDNA2008.)
  104. * <p>This is for use by registries for IDNA2008 conformance.
  105. * UTS #46 does not require the CONTEXTO check.
  106. * @stable ICU 49
  107. */
  108. UIDNA_CHECK_CONTEXTO=0x40
  109. };
  110. /**
  111. * Opaque C service object type for the new IDNA API.
  112. * @stable ICU 4.6
  113. */
  114. struct UIDNA;
  115. typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
  116. /**
  117. * Returns a UIDNA instance which implements UTS #46.
  118. * Returns an unmodifiable instance, owned by the caller.
  119. * Cache it for multiple operations, and uidna_close() it when done.
  120. * The instance is thread-safe, that is, it can be used concurrently.
  121. *
  122. * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
  123. *
  124. * @param options Bit set to modify the processing and error checking.
  125. * See option bit set values in uidna.h.
  126. * @param pErrorCode Standard ICU error code. Its input value must
  127. * pass the U_SUCCESS() test, or else the function returns
  128. * immediately. Check for U_FAILURE() on output or use with
  129. * function chaining. (See User Guide for details.)
  130. * @return the UTS #46 UIDNA instance, if successful
  131. * @stable ICU 4.6
  132. */
  133. U_STABLE UIDNA * U_EXPORT2
  134. uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
  135. /**
  136. * Closes a UIDNA instance.
  137. * @param idna UIDNA instance to be closed
  138. * @stable ICU 4.6
  139. */
  140. U_STABLE void U_EXPORT2
  141. uidna_close(UIDNA *idna);
  142. #if U_SHOW_CPLUSPLUS_API
  143. U_NAMESPACE_BEGIN
  144. /**
  145. * \class LocalUIDNAPointer
  146. * "Smart pointer" class, closes a UIDNA via uidna_close().
  147. * For most methods see the LocalPointerBase base class.
  148. *
  149. * @see LocalPointerBase
  150. * @see LocalPointer
  151. * @stable ICU 4.6
  152. */
  153. U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
  154. U_NAMESPACE_END
  155. #endif
  156. /**
  157. * Output container for IDNA processing errors.
  158. * Initialize with UIDNA_INFO_INITIALIZER:
  159. * \code
  160. * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
  161. * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
  162. * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
  163. * \endcode
  164. * @stable ICU 4.6
  165. */
  166. typedef struct UIDNAInfo {
  167. /** sizeof(UIDNAInfo) @stable ICU 4.6 */
  168. int16_t size;
  169. /**
  170. * Set to TRUE if transitional and nontransitional processing produce different results.
  171. * For details see C++ IDNAInfo::isTransitionalDifferent().
  172. * @stable ICU 4.6
  173. */
  174. UBool isTransitionalDifferent;
  175. UBool reservedB3; /**< Reserved field, do not use. @internal */
  176. /**
  177. * Bit set indicating IDNA processing errors. 0 if no errors.
  178. * See UIDNA_ERROR_... constants.
  179. * @stable ICU 4.6
  180. */
  181. uint32_t errors;
  182. int32_t reservedI2; /**< Reserved field, do not use. @internal */
  183. int32_t reservedI3; /**< Reserved field, do not use. @internal */
  184. } UIDNAInfo;
  185. /**
  186. * Static initializer for a UIDNAInfo struct.
  187. * @stable ICU 4.6
  188. */
  189. #define UIDNA_INFO_INITIALIZER { \
  190. (int16_t)sizeof(UIDNAInfo), \
  191. FALSE, FALSE, \
  192. 0, 0, 0 }
  193. /**
  194. * Converts a single domain name label into its ASCII form for DNS lookup.
  195. * If any processing step fails, then pInfo->errors will be non-zero and
  196. * the result might not be an ASCII string.
  197. * The label might be modified according to the types of errors.
  198. * Labels with severe errors will be left in (or turned into) their Unicode form.
  199. *
  200. * The UErrorCode indicates an error only in exceptional cases,
  201. * such as a U_MEMORY_ALLOCATION_ERROR.
  202. *
  203. * @param idna UIDNA instance
  204. * @param label Input domain name label
  205. * @param length Label length, or -1 if NUL-terminated
  206. * @param dest Destination string buffer
  207. * @param capacity Destination buffer capacity
  208. * @param pInfo Output container of IDNA processing details.
  209. * @param pErrorCode Standard ICU error code. Its input value must
  210. * pass the U_SUCCESS() test, or else the function returns
  211. * immediately. Check for U_FAILURE() on output or use with
  212. * function chaining. (See User Guide for details.)
  213. * @return destination string length
  214. * @stable ICU 4.6
  215. */
  216. U_STABLE int32_t U_EXPORT2
  217. uidna_labelToASCII(const UIDNA *idna,
  218. const UChar *label, int32_t length,
  219. UChar *dest, int32_t capacity,
  220. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  221. /**
  222. * Converts a single domain name label into its Unicode form for human-readable display.
  223. * If any processing step fails, then pInfo->errors will be non-zero.
  224. * The label might be modified according to the types of errors.
  225. *
  226. * The UErrorCode indicates an error only in exceptional cases,
  227. * such as a U_MEMORY_ALLOCATION_ERROR.
  228. *
  229. * @param idna UIDNA instance
  230. * @param label Input domain name label
  231. * @param length Label length, or -1 if NUL-terminated
  232. * @param dest Destination string buffer
  233. * @param capacity Destination buffer capacity
  234. * @param pInfo Output container of IDNA processing details.
  235. * @param pErrorCode Standard ICU error code. Its input value must
  236. * pass the U_SUCCESS() test, or else the function returns
  237. * immediately. Check for U_FAILURE() on output or use with
  238. * function chaining. (See User Guide for details.)
  239. * @return destination string length
  240. * @stable ICU 4.6
  241. */
  242. U_STABLE int32_t U_EXPORT2
  243. uidna_labelToUnicode(const UIDNA *idna,
  244. const UChar *label, int32_t length,
  245. UChar *dest, int32_t capacity,
  246. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  247. /**
  248. * Converts a whole domain name into its ASCII form for DNS lookup.
  249. * If any processing step fails, then pInfo->errors will be non-zero and
  250. * the result might not be an ASCII string.
  251. * The domain name might be modified according to the types of errors.
  252. * Labels with severe errors will be left in (or turned into) their Unicode form.
  253. *
  254. * The UErrorCode indicates an error only in exceptional cases,
  255. * such as a U_MEMORY_ALLOCATION_ERROR.
  256. *
  257. * @param idna UIDNA instance
  258. * @param name Input domain name
  259. * @param length Domain name length, or -1 if NUL-terminated
  260. * @param dest Destination string buffer
  261. * @param capacity Destination buffer capacity
  262. * @param pInfo Output container of IDNA processing details.
  263. * @param pErrorCode Standard ICU error code. Its input value must
  264. * pass the U_SUCCESS() test, or else the function returns
  265. * immediately. Check for U_FAILURE() on output or use with
  266. * function chaining. (See User Guide for details.)
  267. * @return destination string length
  268. * @stable ICU 4.6
  269. */
  270. U_STABLE int32_t U_EXPORT2
  271. uidna_nameToASCII(const UIDNA *idna,
  272. const UChar *name, int32_t length,
  273. UChar *dest, int32_t capacity,
  274. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  275. /**
  276. * Converts a whole domain name into its Unicode form for human-readable display.
  277. * If any processing step fails, then pInfo->errors will be non-zero.
  278. * The domain name might be modified according to the types of errors.
  279. *
  280. * The UErrorCode indicates an error only in exceptional cases,
  281. * such as a U_MEMORY_ALLOCATION_ERROR.
  282. *
  283. * @param idna UIDNA instance
  284. * @param name Input domain name
  285. * @param length Domain name length, or -1 if NUL-terminated
  286. * @param dest Destination string buffer
  287. * @param capacity Destination buffer capacity
  288. * @param pInfo Output container of IDNA processing details.
  289. * @param pErrorCode Standard ICU error code. Its input value must
  290. * pass the U_SUCCESS() test, or else the function returns
  291. * immediately. Check for U_FAILURE() on output or use with
  292. * function chaining. (See User Guide for details.)
  293. * @return destination string length
  294. * @stable ICU 4.6
  295. */
  296. U_STABLE int32_t U_EXPORT2
  297. uidna_nameToUnicode(const UIDNA *idna,
  298. const UChar *name, int32_t length,
  299. UChar *dest, int32_t capacity,
  300. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  301. /* UTF-8 versions of the processing methods --------------------------------- */
  302. /**
  303. * Converts a single domain name label into its ASCII form for DNS lookup.
  304. * UTF-8 version of uidna_labelToASCII(), same behavior.
  305. *
  306. * @param idna UIDNA instance
  307. * @param label Input domain name label
  308. * @param length Label length, or -1 if NUL-terminated
  309. * @param dest Destination string buffer
  310. * @param capacity Destination buffer capacity
  311. * @param pInfo Output container of IDNA processing details.
  312. * @param pErrorCode Standard ICU error code. Its input value must
  313. * pass the U_SUCCESS() test, or else the function returns
  314. * immediately. Check for U_FAILURE() on output or use with
  315. * function chaining. (See User Guide for details.)
  316. * @return destination string length
  317. * @stable ICU 4.6
  318. */
  319. U_STABLE int32_t U_EXPORT2
  320. uidna_labelToASCII_UTF8(const UIDNA *idna,
  321. const char *label, int32_t length,
  322. char *dest, int32_t capacity,
  323. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  324. /**
  325. * Converts a single domain name label into its Unicode form for human-readable display.
  326. * UTF-8 version of uidna_labelToUnicode(), same behavior.
  327. *
  328. * @param idna UIDNA instance
  329. * @param label Input domain name label
  330. * @param length Label length, or -1 if NUL-terminated
  331. * @param dest Destination string buffer
  332. * @param capacity Destination buffer capacity
  333. * @param pInfo Output container of IDNA processing details.
  334. * @param pErrorCode Standard ICU error code. Its input value must
  335. * pass the U_SUCCESS() test, or else the function returns
  336. * immediately. Check for U_FAILURE() on output or use with
  337. * function chaining. (See User Guide for details.)
  338. * @return destination string length
  339. * @stable ICU 4.6
  340. */
  341. U_STABLE int32_t U_EXPORT2
  342. uidna_labelToUnicodeUTF8(const UIDNA *idna,
  343. const char *label, int32_t length,
  344. char *dest, int32_t capacity,
  345. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  346. /**
  347. * Converts a whole domain name into its ASCII form for DNS lookup.
  348. * UTF-8 version of uidna_nameToASCII(), same behavior.
  349. *
  350. * @param idna UIDNA instance
  351. * @param name Input domain name
  352. * @param length Domain name length, or -1 if NUL-terminated
  353. * @param dest Destination string buffer
  354. * @param capacity Destination buffer capacity
  355. * @param pInfo Output container of IDNA processing details.
  356. * @param pErrorCode Standard ICU error code. Its input value must
  357. * pass the U_SUCCESS() test, or else the function returns
  358. * immediately. Check for U_FAILURE() on output or use with
  359. * function chaining. (See User Guide for details.)
  360. * @return destination string length
  361. * @stable ICU 4.6
  362. */
  363. U_STABLE int32_t U_EXPORT2
  364. uidna_nameToASCII_UTF8(const UIDNA *idna,
  365. const char *name, int32_t length,
  366. char *dest, int32_t capacity,
  367. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  368. /**
  369. * Converts a whole domain name into its Unicode form for human-readable display.
  370. * UTF-8 version of uidna_nameToUnicode(), same behavior.
  371. *
  372. * @param idna UIDNA instance
  373. * @param name Input domain name
  374. * @param length Domain name length, or -1 if NUL-terminated
  375. * @param dest Destination string buffer
  376. * @param capacity Destination buffer capacity
  377. * @param pInfo Output container of IDNA processing details.
  378. * @param pErrorCode Standard ICU error code. Its input value must
  379. * pass the U_SUCCESS() test, or else the function returns
  380. * immediately. Check for U_FAILURE() on output or use with
  381. * function chaining. (See User Guide for details.)
  382. * @return destination string length
  383. * @stable ICU 4.6
  384. */
  385. U_STABLE int32_t U_EXPORT2
  386. uidna_nameToUnicodeUTF8(const UIDNA *idna,
  387. const char *name, int32_t length,
  388. char *dest, int32_t capacity,
  389. UIDNAInfo *pInfo, UErrorCode *pErrorCode);
  390. /*
  391. * IDNA error bit set values.
  392. * When a domain name or label fails a processing step or does not meet the
  393. * validity criteria, then one or more of these error bits are set.
  394. */
  395. enum {
  396. /**
  397. * A non-final domain name label (or the whole domain name) is empty.
  398. * @stable ICU 4.6
  399. */
  400. UIDNA_ERROR_EMPTY_LABEL=1,
  401. /**
  402. * A domain name label is longer than 63 bytes.
  403. * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
  404. * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
  405. * @stable ICU 4.6
  406. */
  407. UIDNA_ERROR_LABEL_TOO_LONG=2,
  408. /**
  409. * A domain name is longer than 255 bytes in its storage form.
  410. * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
  411. * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
  412. * @stable ICU 4.6
  413. */
  414. UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
  415. /**
  416. * A label starts with a hyphen-minus ('-').
  417. * @stable ICU 4.6
  418. */
  419. UIDNA_ERROR_LEADING_HYPHEN=8,
  420. /**
  421. * A label ends with a hyphen-minus ('-').
  422. * @stable ICU 4.6
  423. */
  424. UIDNA_ERROR_TRAILING_HYPHEN=0x10,
  425. /**
  426. * A label contains hyphen-minus ('-') in the third and fourth positions.
  427. * @stable ICU 4.6
  428. */
  429. UIDNA_ERROR_HYPHEN_3_4=0x20,
  430. /**
  431. * A label starts with a combining mark.
  432. * @stable ICU 4.6
  433. */
  434. UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
  435. /**
  436. * A label or domain name contains disallowed characters.
  437. * @stable ICU 4.6
  438. */
  439. UIDNA_ERROR_DISALLOWED=0x80,
  440. /**
  441. * A label starts with "xn--" but does not contain valid Punycode.
  442. * That is, an xn-- label failed Punycode decoding.
  443. * @stable ICU 4.6
  444. */
  445. UIDNA_ERROR_PUNYCODE=0x100,
  446. /**
  447. * A label contains a dot=full stop.
  448. * This can occur in an input string for a single-label function.
  449. * @stable ICU 4.6
  450. */
  451. UIDNA_ERROR_LABEL_HAS_DOT=0x200,
  452. /**
  453. * An ACE label does not contain a valid label string.
  454. * The label was successfully ACE (Punycode) decoded but the resulting
  455. * string had severe validation errors. For example,
  456. * it might contain characters that are not allowed in ACE labels,
  457. * or it might not be normalized.
  458. * @stable ICU 4.6
  459. */
  460. UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
  461. /**
  462. * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
  463. * @stable ICU 4.6
  464. */
  465. UIDNA_ERROR_BIDI=0x800,
  466. /**
  467. * A label does not meet the IDNA CONTEXTJ requirements.
  468. * @stable ICU 4.6
  469. */
  470. UIDNA_ERROR_CONTEXTJ=0x1000,
  471. /**
  472. * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
  473. * Some punctuation characters "Would otherwise have been DISALLOWED"
  474. * but are allowed in certain contexts. (RFC 5892)
  475. * @stable ICU 49
  476. */
  477. UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
  478. /**
  479. * A label does not meet the IDNA CONTEXTO requirements for digits.
  480. * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
  481. * @stable ICU 49
  482. */
  483. UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
  484. };
  485. #ifndef U_HIDE_DEPRECATED_API
  486. /* IDNA2003 API ------------------------------------------------------------- */
  487. /**
  488. * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
  489. * This operation is done on <b>single labels</b> before sending it to something that expects
  490. * ASCII names. A label is an individual part of a domain name. Labels are usually
  491. * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
  492. *
  493. * IDNA2003 API Overview:
  494. *
  495. * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
  496. * (http://www.ietf.org/rfc/rfc3490.txt).
  497. * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
  498. * containing non-ASCII code points are processed by the
  499. * ToASCII operation before passing it to resolver libraries. Domain names
  500. * that are obtained from resolver libraries are processed by the
  501. * ToUnicode operation before displaying the domain name to the user.
  502. * IDNA requires that implementations process input strings with Nameprep
  503. * (http://www.ietf.org/rfc/rfc3491.txt),
  504. * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
  505. * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
  506. * Implementations of IDNA MUST fully implement Nameprep and Punycode;
  507. * neither Nameprep nor Punycode are optional.
  508. * The input and output of ToASCII and ToUnicode operations are Unicode
  509. * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
  510. * multiple times to an input string will yield the same result as applying the operation
  511. * once.
  512. * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
  513. * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
  514. *
  515. * @param src Input UChar array containing label in Unicode.
  516. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  517. * @param dest Output UChar array with ASCII (ACE encoded) label.
  518. * @param destCapacity Size of dest.
  519. * @param options A bit set of options:
  520. *
  521. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  522. * and do not use STD3 ASCII rules
  523. * If unassigned code points are found the operation fails with
  524. * U_UNASSIGNED_ERROR error code.
  525. *
  526. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  527. * If this option is set, the unassigned code points are in the input
  528. * are treated as normal Unicode code points.
  529. *
  530. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  531. * If this option is set and the input does not satisfy STD3 rules,
  532. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  533. *
  534. * @param parseError Pointer to UParseError struct to receive information on position
  535. * of error if an error is encountered. Can be NULL.
  536. * @param status ICU in/out error code parameter.
  537. * U_INVALID_CHAR_FOUND if src contains
  538. * unmatched single surrogates.
  539. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  540. * too many code points.
  541. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  542. * @return The length of the result string, if successful - or in case of a buffer overflow,
  543. * in which case it will be greater than destCapacity.
  544. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  545. */
  546. U_DEPRECATED int32_t U_EXPORT2
  547. uidna_toASCII(const UChar* src, int32_t srcLength,
  548. UChar* dest, int32_t destCapacity,
  549. int32_t options,
  550. UParseError* parseError,
  551. UErrorCode* status);
  552. /**
  553. * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
  554. * This operation is done on <b>single labels</b> before sending it to something that expects
  555. * Unicode names. A label is an individual part of a domain name. Labels are usually
  556. * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
  557. *
  558. * @param src Input UChar array containing ASCII (ACE encoded) label.
  559. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  560. * @param dest Output Converted UChar array containing Unicode equivalent of label.
  561. * @param destCapacity Size of dest.
  562. * @param options A bit set of options:
  563. *
  564. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  565. * and do not use STD3 ASCII rules
  566. * If unassigned code points are found the operation fails with
  567. * U_UNASSIGNED_ERROR error code.
  568. *
  569. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  570. * If this option is set, the unassigned code points are in the input
  571. * are treated as normal Unicode code points. <b> Note: </b> This option is
  572. * required on toUnicode operation because the RFC mandates
  573. * verification of decoded ACE input by applying toASCII and comparing
  574. * its output with source
  575. *
  576. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  577. * If this option is set and the input does not satisfy STD3 rules,
  578. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  579. *
  580. * @param parseError Pointer to UParseError struct to receive information on position
  581. * of error if an error is encountered. Can be NULL.
  582. * @param status ICU in/out error code parameter.
  583. * U_INVALID_CHAR_FOUND if src contains
  584. * unmatched single surrogates.
  585. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  586. * too many code points.
  587. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  588. * @return The length of the result string, if successful - or in case of a buffer overflow,
  589. * in which case it will be greater than destCapacity.
  590. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  591. */
  592. U_DEPRECATED int32_t U_EXPORT2
  593. uidna_toUnicode(const UChar* src, int32_t srcLength,
  594. UChar* dest, int32_t destCapacity,
  595. int32_t options,
  596. UParseError* parseError,
  597. UErrorCode* status);
  598. /**
  599. * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
  600. * This operation is done on complete domain names, e.g: "www.example.com".
  601. * It is important to note that this operation can fail. If it fails, then the input
  602. * domain name cannot be used as an Internationalized Domain Name and the application
  603. * should have methods defined to deal with the failure.
  604. *
  605. * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
  606. * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
  607. * and then convert. This function does not offer that level of granularity. The options once
  608. * set will apply to all labels in the domain name
  609. *
  610. * @param src Input UChar array containing IDN in Unicode.
  611. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  612. * @param dest Output UChar array with ASCII (ACE encoded) IDN.
  613. * @param destCapacity Size of dest.
  614. * @param options A bit set of options:
  615. *
  616. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  617. * and do not use STD3 ASCII rules
  618. * If unassigned code points are found the operation fails with
  619. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  620. *
  621. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  622. * If this option is set, the unassigned code points are in the input
  623. * are treated as normal Unicode code points.
  624. *
  625. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  626. * If this option is set and the input does not satisfy STD3 rules,
  627. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  628. *
  629. * @param parseError Pointer to UParseError struct to receive information on position
  630. * of error if an error is encountered. Can be NULL.
  631. * @param status ICU in/out error code parameter.
  632. * U_INVALID_CHAR_FOUND if src contains
  633. * unmatched single surrogates.
  634. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  635. * too many code points.
  636. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  637. * @return The length of the result string, if successful - or in case of a buffer overflow,
  638. * in which case it will be greater than destCapacity.
  639. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  640. */
  641. U_DEPRECATED int32_t U_EXPORT2
  642. uidna_IDNToASCII( const UChar* src, int32_t srcLength,
  643. UChar* dest, int32_t destCapacity,
  644. int32_t options,
  645. UParseError* parseError,
  646. UErrorCode* status);
  647. /**
  648. * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
  649. * This operation is done on complete domain names, e.g: "www.example.com".
  650. *
  651. * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
  652. * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
  653. * and then convert. This function does not offer that level of granularity. The options once
  654. * set will apply to all labels in the domain name
  655. *
  656. * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
  657. * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
  658. * @param dest Output UChar array containing Unicode equivalent of source IDN.
  659. * @param destCapacity Size of dest.
  660. * @param options A bit set of options:
  661. *
  662. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  663. * and do not use STD3 ASCII rules
  664. * If unassigned code points are found the operation fails with
  665. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  666. *
  667. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  668. * If this option is set, the unassigned code points are in the input
  669. * are treated as normal Unicode code points.
  670. *
  671. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  672. * If this option is set and the input does not satisfy STD3 rules,
  673. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  674. *
  675. * @param parseError Pointer to UParseError struct to receive information on position
  676. * of error if an error is encountered. Can be NULL.
  677. * @param status ICU in/out error code parameter.
  678. * U_INVALID_CHAR_FOUND if src contains
  679. * unmatched single surrogates.
  680. * U_INDEX_OUTOFBOUNDS_ERROR if src contains
  681. * too many code points.
  682. * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
  683. * @return The length of the result string, if successful - or in case of a buffer overflow,
  684. * in which case it will be greater than destCapacity.
  685. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  686. */
  687. U_DEPRECATED int32_t U_EXPORT2
  688. uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
  689. UChar* dest, int32_t destCapacity,
  690. int32_t options,
  691. UParseError* parseError,
  692. UErrorCode* status);
  693. /**
  694. * IDNA2003: Compare two IDN strings for equivalence.
  695. * This function splits the domain names into labels and compares them.
  696. * According to IDN RFC, whenever two labels are compared, they are
  697. * considered equal if and only if their ASCII forms (obtained by
  698. * applying toASCII) match using an case-insensitive ASCII comparison.
  699. * Two domain names are considered a match if and only if all labels
  700. * match regardless of whether label separators match.
  701. *
  702. * @param s1 First source string.
  703. * @param length1 Length of first source string, or -1 if NUL-terminated.
  704. *
  705. * @param s2 Second source string.
  706. * @param length2 Length of second source string, or -1 if NUL-terminated.
  707. * @param options A bit set of options:
  708. *
  709. * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
  710. * and do not use STD3 ASCII rules
  711. * If unassigned code points are found the operation fails with
  712. * U_UNASSIGNED_CODE_POINT_FOUND error code.
  713. *
  714. * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
  715. * If this option is set, the unassigned code points are in the input
  716. * are treated as normal Unicode code points.
  717. *
  718. * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
  719. * If this option is set and the input does not satisfy STD3 rules,
  720. * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
  721. *
  722. * @param status ICU error code in/out parameter.
  723. * Must fulfill U_SUCCESS before the function call.
  724. * @return <0 or 0 or >0 as usual for string comparisons
  725. * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
  726. */
  727. U_DEPRECATED int32_t U_EXPORT2
  728. uidna_compare( const UChar *s1, int32_t length1,
  729. const UChar *s2, int32_t length2,
  730. int32_t options,
  731. UErrorCode* status);
  732. #endif /* U_HIDE_DEPRECATED_API */
  733. #endif /* #if !UCONFIG_NO_IDNA */
  734. #endif