idna.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2010-2012, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: idna.h
  9. * encoding: US-ASCII
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2010mar05
  14. * created by: Markus W. Scherer
  15. */
  16. #ifndef __IDNA_H__
  17. #define __IDNA_H__
  18. /**
  19. * \file
  20. * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
  21. */
  22. #include "unicode/utypes.h"
  23. #if !UCONFIG_NO_IDNA
  24. #include "unicode/bytestream.h"
  25. #include "unicode/stringpiece.h"
  26. #include "unicode/uidna.h"
  27. #include "unicode/unistr.h"
  28. U_NAMESPACE_BEGIN
  29. class IDNAInfo;
  30. /**
  31. * Abstract base class for IDNA processing.
  32. * See http://www.unicode.org/reports/tr46/
  33. * and http://www.ietf.org/rfc/rfc3490.txt
  34. *
  35. * The IDNA class is not intended for public subclassing.
  36. *
  37. * This C++ API currently only implements UTS #46.
  38. * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
  39. * and IDNA2003 (functions that do not use a service object).
  40. * @stable ICU 4.6
  41. */
  42. class U_COMMON_API IDNA : public UObject {
  43. public:
  44. /**
  45. * Destructor.
  46. * @stable ICU 4.6
  47. */
  48. ~IDNA();
  49. /**
  50. * Returns an IDNA instance which implements UTS #46.
  51. * Returns an unmodifiable instance, owned by the caller.
  52. * Cache it for multiple operations, and delete it when done.
  53. * The instance is thread-safe, that is, it can be used concurrently.
  54. *
  55. * UTS #46 defines Unicode IDNA Compatibility Processing,
  56. * updated to the latest version of Unicode and compatible with both
  57. * IDNA2003 and IDNA2008.
  58. *
  59. * The worker functions use transitional processing, including deviation mappings,
  60. * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
  61. * is used in which case the deviation characters are passed through without change.
  62. *
  63. * Disallowed characters are mapped to U+FFFD.
  64. *
  65. * For available options see the uidna.h header.
  66. * Operations with the UTS #46 instance do not support the
  67. * UIDNA_ALLOW_UNASSIGNED option.
  68. *
  69. * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
  70. * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
  71. * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
  72. *
  73. * @param options Bit set to modify the processing and error checking.
  74. * See option bit set values in uidna.h.
  75. * @param errorCode Standard ICU error code. Its input value must
  76. * pass the U_SUCCESS() test, or else the function returns
  77. * immediately. Check for U_FAILURE() on output or use with
  78. * function chaining. (See User Guide for details.)
  79. * @return the UTS #46 IDNA instance, if successful
  80. * @stable ICU 4.6
  81. */
  82. static IDNA *
  83. createUTS46Instance(uint32_t options, UErrorCode &errorCode);
  84. /**
  85. * Converts a single domain name label into its ASCII form for DNS lookup.
  86. * If any processing step fails, then info.hasErrors() will be TRUE and
  87. * the result might not be an ASCII string.
  88. * The label might be modified according to the types of errors.
  89. * Labels with severe errors will be left in (or turned into) their Unicode form.
  90. *
  91. * The UErrorCode indicates an error only in exceptional cases,
  92. * such as a U_MEMORY_ALLOCATION_ERROR.
  93. *
  94. * @param label Input domain name label
  95. * @param dest Destination string object
  96. * @param info Output container of IDNA processing details.
  97. * @param errorCode Standard ICU error code. Its input value must
  98. * pass the U_SUCCESS() test, or else the function returns
  99. * immediately. Check for U_FAILURE() on output or use with
  100. * function chaining. (See User Guide for details.)
  101. * @return dest
  102. * @stable ICU 4.6
  103. */
  104. virtual UnicodeString &
  105. labelToASCII(const UnicodeString &label, UnicodeString &dest,
  106. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  107. /**
  108. * Converts a single domain name label into its Unicode form for human-readable display.
  109. * If any processing step fails, then info.hasErrors() will be TRUE.
  110. * The label might be modified according to the types of errors.
  111. *
  112. * The UErrorCode indicates an error only in exceptional cases,
  113. * such as a U_MEMORY_ALLOCATION_ERROR.
  114. *
  115. * @param label Input domain name label
  116. * @param dest Destination string object
  117. * @param info Output container of IDNA processing details.
  118. * @param errorCode Standard ICU error code. Its input value must
  119. * pass the U_SUCCESS() test, or else the function returns
  120. * immediately. Check for U_FAILURE() on output or use with
  121. * function chaining. (See User Guide for details.)
  122. * @return dest
  123. * @stable ICU 4.6
  124. */
  125. virtual UnicodeString &
  126. labelToUnicode(const UnicodeString &label, UnicodeString &dest,
  127. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  128. /**
  129. * Converts a whole domain name into its ASCII form for DNS lookup.
  130. * If any processing step fails, then info.hasErrors() will be TRUE and
  131. * the result might not be an ASCII string.
  132. * The domain name might be modified according to the types of errors.
  133. * Labels with severe errors will be left in (or turned into) their Unicode form.
  134. *
  135. * The UErrorCode indicates an error only in exceptional cases,
  136. * such as a U_MEMORY_ALLOCATION_ERROR.
  137. *
  138. * @param name Input domain name
  139. * @param dest Destination string object
  140. * @param info Output container of IDNA processing details.
  141. * @param errorCode Standard ICU error code. Its input value must
  142. * pass the U_SUCCESS() test, or else the function returns
  143. * immediately. Check for U_FAILURE() on output or use with
  144. * function chaining. (See User Guide for details.)
  145. * @return dest
  146. * @stable ICU 4.6
  147. */
  148. virtual UnicodeString &
  149. nameToASCII(const UnicodeString &name, UnicodeString &dest,
  150. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  151. /**
  152. * Converts a whole domain name into its Unicode form for human-readable display.
  153. * If any processing step fails, then info.hasErrors() will be TRUE.
  154. * The domain name might be modified according to the types of errors.
  155. *
  156. * The UErrorCode indicates an error only in exceptional cases,
  157. * such as a U_MEMORY_ALLOCATION_ERROR.
  158. *
  159. * @param name Input domain name
  160. * @param dest Destination string object
  161. * @param info Output container of IDNA processing details.
  162. * @param errorCode Standard ICU error code. Its input value must
  163. * pass the U_SUCCESS() test, or else the function returns
  164. * immediately. Check for U_FAILURE() on output or use with
  165. * function chaining. (See User Guide for details.)
  166. * @return dest
  167. * @stable ICU 4.6
  168. */
  169. virtual UnicodeString &
  170. nameToUnicode(const UnicodeString &name, UnicodeString &dest,
  171. IDNAInfo &info, UErrorCode &errorCode) const = 0;
  172. // UTF-8 versions of the processing methods ---------------------------- ***
  173. /**
  174. * Converts a single domain name label into its ASCII form for DNS lookup.
  175. * UTF-8 version of labelToASCII(), same behavior.
  176. *
  177. * @param label Input domain name label
  178. * @param dest Destination byte sink; Flush()ed if successful
  179. * @param info Output container of IDNA processing details.
  180. * @param errorCode Standard ICU error code. Its input value must
  181. * pass the U_SUCCESS() test, or else the function returns
  182. * immediately. Check for U_FAILURE() on output or use with
  183. * function chaining. (See User Guide for details.)
  184. * @return dest
  185. * @stable ICU 4.6
  186. */
  187. virtual void
  188. labelToASCII_UTF8(StringPiece label, ByteSink &dest,
  189. IDNAInfo &info, UErrorCode &errorCode) const;
  190. /**
  191. * Converts a single domain name label into its Unicode form for human-readable display.
  192. * UTF-8 version of labelToUnicode(), same behavior.
  193. *
  194. * @param label Input domain name label
  195. * @param dest Destination byte sink; Flush()ed if successful
  196. * @param info Output container of IDNA processing details.
  197. * @param errorCode Standard ICU error code. Its input value must
  198. * pass the U_SUCCESS() test, or else the function returns
  199. * immediately. Check for U_FAILURE() on output or use with
  200. * function chaining. (See User Guide for details.)
  201. * @return dest
  202. * @stable ICU 4.6
  203. */
  204. virtual void
  205. labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
  206. IDNAInfo &info, UErrorCode &errorCode) const;
  207. /**
  208. * Converts a whole domain name into its ASCII form for DNS lookup.
  209. * UTF-8 version of nameToASCII(), same behavior.
  210. *
  211. * @param name Input domain name
  212. * @param dest Destination byte sink; Flush()ed if successful
  213. * @param info Output container of IDNA processing details.
  214. * @param errorCode Standard ICU error code. Its input value must
  215. * pass the U_SUCCESS() test, or else the function returns
  216. * immediately. Check for U_FAILURE() on output or use with
  217. * function chaining. (See User Guide for details.)
  218. * @return dest
  219. * @stable ICU 4.6
  220. */
  221. virtual void
  222. nameToASCII_UTF8(StringPiece name, ByteSink &dest,
  223. IDNAInfo &info, UErrorCode &errorCode) const;
  224. /**
  225. * Converts a whole domain name into its Unicode form for human-readable display.
  226. * UTF-8 version of nameToUnicode(), same behavior.
  227. *
  228. * @param name Input domain name
  229. * @param dest Destination byte sink; Flush()ed if successful
  230. * @param info Output container of IDNA processing details.
  231. * @param errorCode Standard ICU error code. Its input value must
  232. * pass the U_SUCCESS() test, or else the function returns
  233. * immediately. Check for U_FAILURE() on output or use with
  234. * function chaining. (See User Guide for details.)
  235. * @return dest
  236. * @stable ICU 4.6
  237. */
  238. virtual void
  239. nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
  240. IDNAInfo &info, UErrorCode &errorCode) const;
  241. };
  242. class UTS46;
  243. /**
  244. * Output container for IDNA processing errors.
  245. * The IDNAInfo class is not suitable for subclassing.
  246. * @stable ICU 4.6
  247. */
  248. class U_COMMON_API IDNAInfo : public UMemory {
  249. public:
  250. /**
  251. * Constructor for stack allocation.
  252. * @stable ICU 4.6
  253. */
  254. IDNAInfo() : errors(0), labelErrors(0), isTransDiff(FALSE), isBiDi(FALSE), isOkBiDi(TRUE) {}
  255. /**
  256. * Were there IDNA processing errors?
  257. * @return TRUE if there were processing errors
  258. * @stable ICU 4.6
  259. */
  260. UBool hasErrors() const { return errors!=0; }
  261. /**
  262. * Returns a bit set indicating IDNA processing errors.
  263. * See UIDNA_ERROR_... constants in uidna.h.
  264. * @return bit set of processing errors
  265. * @stable ICU 4.6
  266. */
  267. uint32_t getErrors() const { return errors; }
  268. /**
  269. * Returns TRUE if transitional and nontransitional processing produce different results.
  270. * This is the case when the input label or domain name contains
  271. * one or more deviation characters outside a Punycode label (see UTS #46).
  272. * <ul>
  273. * <li>With nontransitional processing, such characters are
  274. * copied to the destination string.
  275. * <li>With transitional processing, such characters are
  276. * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
  277. * </ul>
  278. * @return TRUE if transitional and nontransitional processing produce different results
  279. * @stable ICU 4.6
  280. */
  281. UBool isTransitionalDifferent() const { return isTransDiff; }
  282. private:
  283. friend class UTS46;
  284. IDNAInfo(const IDNAInfo &other); // no copying
  285. IDNAInfo &operator=(const IDNAInfo &other); // no copying
  286. void reset() {
  287. errors=labelErrors=0;
  288. isTransDiff=FALSE;
  289. isBiDi=FALSE;
  290. isOkBiDi=TRUE;
  291. }
  292. uint32_t errors, labelErrors;
  293. UBool isTransDiff;
  294. UBool isBiDi;
  295. UBool isOkBiDi;
  296. };
  297. U_NAMESPACE_END
  298. #endif // UCONFIG_NO_IDNA
  299. #endif // __IDNA_H__