messagepattern.h 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2011-2013, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: messagepattern.h
  9. * encoding: US-ASCII
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2011mar14
  14. * created by: Markus W. Scherer
  15. */
  16. #ifndef __MESSAGEPATTERN_H__
  17. #define __MESSAGEPATTERN_H__
  18. /**
  19. * \file
  20. * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
  21. */
  22. #include "unicode/utypes.h"
  23. #if !UCONFIG_NO_FORMATTING
  24. #include "unicode/parseerr.h"
  25. #include "unicode/unistr.h"
  26. /**
  27. * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
  28. * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
  29. * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
  30. * <p>
  31. * A pair of adjacent apostrophes always results in a single apostrophe in the output,
  32. * even when the pair is between two single, text-quoting apostrophes.
  33. * <p>
  34. * The following table shows examples of desired MessageFormat.format() output
  35. * with the pattern strings that yield that output.
  36. * <p>
  37. * <table>
  38. * <tr>
  39. * <th>Desired output</th>
  40. * <th>DOUBLE_OPTIONAL</th>
  41. * <th>DOUBLE_REQUIRED</th>
  42. * </tr>
  43. * <tr>
  44. * <td>I see {many}</td>
  45. * <td>I see '{many}'</td>
  46. * <td>(same)</td>
  47. * </tr>
  48. * <tr>
  49. * <td>I said {'Wow!'}</td>
  50. * <td>I said '{''Wow!''}'</td>
  51. * <td>(same)</td>
  52. * </tr>
  53. * <tr>
  54. * <td>I don't know</td>
  55. * <td>I don't know OR<br> I don''t know</td>
  56. * <td>I don''t know</td>
  57. * </tr>
  58. * </table>
  59. * @stable ICU 4.8
  60. * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
  61. */
  62. enum UMessagePatternApostropheMode {
  63. /**
  64. * A literal apostrophe is represented by
  65. * either a single or a double apostrophe pattern character.
  66. * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
  67. * if it immediately precedes a curly brace {},
  68. * or a pipe symbol | if inside a choice format,
  69. * or a pound symbol # if inside a plural format.
  70. * <p>
  71. * This is the default behavior starting with ICU 4.8.
  72. * @stable ICU 4.8
  73. */
  74. UMSGPAT_APOS_DOUBLE_OPTIONAL,
  75. /**
  76. * A literal apostrophe must be represented by
  77. * a double apostrophe pattern character.
  78. * A single apostrophe always starts quoted literal text.
  79. * <p>
  80. * This is the behavior of ICU 4.6 and earlier, and of the JDK.
  81. * @stable ICU 4.8
  82. */
  83. UMSGPAT_APOS_DOUBLE_REQUIRED
  84. };
  85. /**
  86. * @stable ICU 4.8
  87. */
  88. typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
  89. /**
  90. * MessagePattern::Part type constants.
  91. * @stable ICU 4.8
  92. */
  93. enum UMessagePatternPartType {
  94. /**
  95. * Start of a message pattern (main or nested).
  96. * The length is 0 for the top-level message
  97. * and for a choice argument sub-message, otherwise 1 for the '{'.
  98. * The value indicates the nesting level, starting with 0 for the main message.
  99. * <p>
  100. * There is always a later MSG_LIMIT part.
  101. * @stable ICU 4.8
  102. */
  103. UMSGPAT_PART_TYPE_MSG_START,
  104. /**
  105. * End of a message pattern (main or nested).
  106. * The length is 0 for the top-level message and
  107. * the last sub-message of a choice argument,
  108. * otherwise 1 for the '}' or (in a choice argument style) the '|'.
  109. * The value indicates the nesting level, starting with 0 for the main message.
  110. * @stable ICU 4.8
  111. */
  112. UMSGPAT_PART_TYPE_MSG_LIMIT,
  113. /**
  114. * Indicates a substring of the pattern string which is to be skipped when formatting.
  115. * For example, an apostrophe that begins or ends quoted text
  116. * would be indicated with such a part.
  117. * The value is undefined and currently always 0.
  118. * @stable ICU 4.8
  119. */
  120. UMSGPAT_PART_TYPE_SKIP_SYNTAX,
  121. /**
  122. * Indicates that a syntax character needs to be inserted for auto-quoting.
  123. * The length is 0.
  124. * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
  125. * @stable ICU 4.8
  126. */
  127. UMSGPAT_PART_TYPE_INSERT_CHAR,
  128. /**
  129. * Indicates a syntactic (non-escaped) # symbol in a plural variant.
  130. * When formatting, replace this part's substring with the
  131. * (value-offset) for the plural argument value.
  132. * The value is undefined and currently always 0.
  133. * @stable ICU 4.8
  134. */
  135. UMSGPAT_PART_TYPE_REPLACE_NUMBER,
  136. /**
  137. * Start of an argument.
  138. * The length is 1 for the '{'.
  139. * The value is the ordinal value of the ArgType. Use getArgType().
  140. * <p>
  141. * This part is followed by either an ARG_NUMBER or ARG_NAME,
  142. * followed by optional argument sub-parts (see UMessagePatternArgType constants)
  143. * and finally an ARG_LIMIT part.
  144. * @stable ICU 4.8
  145. */
  146. UMSGPAT_PART_TYPE_ARG_START,
  147. /**
  148. * End of an argument.
  149. * The length is 1 for the '}'.
  150. * The value is the ordinal value of the ArgType. Use getArgType().
  151. * @stable ICU 4.8
  152. */
  153. UMSGPAT_PART_TYPE_ARG_LIMIT,
  154. /**
  155. * The argument number, provided by the value.
  156. * @stable ICU 4.8
  157. */
  158. UMSGPAT_PART_TYPE_ARG_NUMBER,
  159. /**
  160. * The argument name.
  161. * The value is undefined and currently always 0.
  162. * @stable ICU 4.8
  163. */
  164. UMSGPAT_PART_TYPE_ARG_NAME,
  165. /**
  166. * The argument type.
  167. * The value is undefined and currently always 0.
  168. * @stable ICU 4.8
  169. */
  170. UMSGPAT_PART_TYPE_ARG_TYPE,
  171. /**
  172. * The argument style text.
  173. * The value is undefined and currently always 0.
  174. * @stable ICU 4.8
  175. */
  176. UMSGPAT_PART_TYPE_ARG_STYLE,
  177. /**
  178. * A selector substring in a "complex" argument style.
  179. * The value is undefined and currently always 0.
  180. * @stable ICU 4.8
  181. */
  182. UMSGPAT_PART_TYPE_ARG_SELECTOR,
  183. /**
  184. * An integer value, for example the offset or an explicit selector value
  185. * in a PluralFormat style.
  186. * The part value is the integer value.
  187. * @stable ICU 4.8
  188. */
  189. UMSGPAT_PART_TYPE_ARG_INT,
  190. /**
  191. * A numeric value, for example the offset or an explicit selector value
  192. * in a PluralFormat style.
  193. * The part value is an index into an internal array of numeric values;
  194. * use getNumericValue().
  195. * @stable ICU 4.8
  196. */
  197. UMSGPAT_PART_TYPE_ARG_DOUBLE
  198. };
  199. /**
  200. * @stable ICU 4.8
  201. */
  202. typedef enum UMessagePatternPartType UMessagePatternPartType;
  203. /**
  204. * Argument type constants.
  205. * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
  206. *
  207. * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
  208. * with a nesting level one greater than the surrounding message.
  209. * @stable ICU 4.8
  210. */
  211. enum UMessagePatternArgType {
  212. /**
  213. * The argument has no specified type.
  214. * @stable ICU 4.8
  215. */
  216. UMSGPAT_ARG_TYPE_NONE,
  217. /**
  218. * The argument has a "simple" type which is provided by the ARG_TYPE part.
  219. * An ARG_STYLE part might follow that.
  220. * @stable ICU 4.8
  221. */
  222. UMSGPAT_ARG_TYPE_SIMPLE,
  223. /**
  224. * The argument is a ChoiceFormat with one or more
  225. * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
  226. * @stable ICU 4.8
  227. */
  228. UMSGPAT_ARG_TYPE_CHOICE,
  229. /**
  230. * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
  231. * (e.g., offset:1)
  232. * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
  233. * If the selector has an explicit value (e.g., =2), then
  234. * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
  235. * Otherwise the message immediately follows the ARG_SELECTOR.
  236. * @stable ICU 4.8
  237. */
  238. UMSGPAT_ARG_TYPE_PLURAL,
  239. /**
  240. * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
  241. * @stable ICU 4.8
  242. */
  243. UMSGPAT_ARG_TYPE_SELECT,
  244. /**
  245. * The argument is an ordinal-number PluralFormat
  246. * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
  247. * @stable ICU 50
  248. */
  249. UMSGPAT_ARG_TYPE_SELECTORDINAL
  250. };
  251. /**
  252. * @stable ICU 4.8
  253. */
  254. typedef enum UMessagePatternArgType UMessagePatternArgType;
  255. /**
  256. * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
  257. * Returns TRUE if the argument type has a plural style part sequence and semantics,
  258. * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
  259. * @stable ICU 50
  260. */
  261. #define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
  262. ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
  263. enum {
  264. /**
  265. * Return value from MessagePattern.validateArgumentName() for when
  266. * the string is a valid "pattern identifier" but not a number.
  267. * @stable ICU 4.8
  268. */
  269. UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
  270. /**
  271. * Return value from MessagePattern.validateArgumentName() for when
  272. * the string is invalid.
  273. * It might not be a valid "pattern identifier",
  274. * or it have only ASCII digits but there is a leading zero or the number is too large.
  275. * @stable ICU 4.8
  276. */
  277. UMSGPAT_ARG_NAME_NOT_VALID=-2
  278. };
  279. /**
  280. * Special value that is returned by getNumericValue(Part) when no
  281. * numeric value is defined for a part.
  282. * @see MessagePattern.getNumericValue()
  283. * @stable ICU 4.8
  284. */
  285. #define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
  286. U_NAMESPACE_BEGIN
  287. class MessagePatternDoubleList;
  288. class MessagePatternPartsList;
  289. /**
  290. * Parses and represents ICU MessageFormat patterns.
  291. * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
  292. * Used in the implementations of those classes as well as in tools
  293. * for message validation, translation and format conversion.
  294. * <p>
  295. * The parser handles all syntax relevant for identifying message arguments.
  296. * This includes "complex" arguments whose style strings contain
  297. * nested MessageFormat pattern substrings.
  298. * For "simple" arguments (with no nested MessageFormat pattern substrings),
  299. * the argument style is not parsed any further.
  300. * <p>
  301. * The parser handles named and numbered message arguments and allows both in one message.
  302. * <p>
  303. * Once a pattern has been parsed successfully, iterate through the parsed data
  304. * with countParts(), getPart() and related methods.
  305. * <p>
  306. * The data logically represents a parse tree, but is stored and accessed
  307. * as a list of "parts" for fast and simple parsing and to minimize object allocations.
  308. * Arguments and nested messages are best handled via recursion.
  309. * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
  310. * the index of the corresponding _LIMIT "part".
  311. * <p>
  312. * List of "parts":
  313. * <pre>
  314. * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
  315. * argument = noneArg | simpleArg | complexArg
  316. * complexArg = choiceArg | pluralArg | selectArg
  317. *
  318. * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
  319. * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
  320. * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
  321. * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
  322. * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
  323. *
  324. * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
  325. * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
  326. * selectStyle = (ARG_SELECTOR message)+
  327. * </pre>
  328. * <ul>
  329. * <li>Literal output text is not represented directly by "parts" but accessed
  330. * between parts of a message, from one part's getLimit() to the next part's getIndex().
  331. * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
  332. * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
  333. * the less-than-or-equal-to sign (U+2264).
  334. * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
  335. * The optional numeric Part between each (ARG_SELECTOR, message) pair
  336. * is the value of an explicit-number selector like "=2",
  337. * otherwise the selector is a non-numeric identifier.
  338. * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
  339. * </ul>
  340. * <p>
  341. * This class is not intended for public subclassing.
  342. *
  343. * @stable ICU 4.8
  344. */
  345. class U_COMMON_API MessagePattern : public UObject {
  346. public:
  347. /**
  348. * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
  349. * @param errorCode Standard ICU error code. Its input value must
  350. * pass the U_SUCCESS() test, or else the function returns
  351. * immediately. Check for U_FAILURE() on output or use with
  352. * function chaining. (See User Guide for details.)
  353. * @stable ICU 4.8
  354. */
  355. MessagePattern(UErrorCode &errorCode);
  356. /**
  357. * Constructs an empty MessagePattern.
  358. * @param mode Explicit UMessagePatternApostropheMode.
  359. * @param errorCode Standard ICU error code. Its input value must
  360. * pass the U_SUCCESS() test, or else the function returns
  361. * immediately. Check for U_FAILURE() on output or use with
  362. * function chaining. (See User Guide for details.)
  363. * @stable ICU 4.8
  364. */
  365. MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
  366. /**
  367. * Constructs a MessagePattern with default UMessagePatternApostropheMode and
  368. * parses the MessageFormat pattern string.
  369. * @param pattern a MessageFormat pattern string
  370. * @param parseError Struct to receive information on the position
  371. * of an error within the pattern.
  372. * Can be NULL.
  373. * @param errorCode Standard ICU error code. Its input value must
  374. * pass the U_SUCCESS() test, or else the function returns
  375. * immediately. Check for U_FAILURE() on output or use with
  376. * function chaining. (See User Guide for details.)
  377. * TODO: turn @throws into UErrorCode specifics?
  378. * @throws IllegalArgumentException for syntax errors in the pattern string
  379. * @throws IndexOutOfBoundsException if certain limits are exceeded
  380. * (e.g., argument number too high, argument name too long, etc.)
  381. * @throws NumberFormatException if a number could not be parsed
  382. * @stable ICU 4.8
  383. */
  384. MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
  385. /**
  386. * Copy constructor.
  387. * @param other Object to copy.
  388. * @stable ICU 4.8
  389. */
  390. MessagePattern(const MessagePattern &other);
  391. /**
  392. * Assignment operator.
  393. * @param other Object to copy.
  394. * @return *this=other
  395. * @stable ICU 4.8
  396. */
  397. MessagePattern &operator=(const MessagePattern &other);
  398. /**
  399. * Destructor.
  400. * @stable ICU 4.8
  401. */
  402. virtual ~MessagePattern();
  403. /**
  404. * Parses a MessageFormat pattern string.
  405. * @param pattern a MessageFormat pattern string
  406. * @param parseError Struct to receive information on the position
  407. * of an error within the pattern.
  408. * Can be NULL.
  409. * @param errorCode Standard ICU error code. Its input value must
  410. * pass the U_SUCCESS() test, or else the function returns
  411. * immediately. Check for U_FAILURE() on output or use with
  412. * function chaining. (See User Guide for details.)
  413. * @return *this
  414. * @throws IllegalArgumentException for syntax errors in the pattern string
  415. * @throws IndexOutOfBoundsException if certain limits are exceeded
  416. * (e.g., argument number too high, argument name too long, etc.)
  417. * @throws NumberFormatException if a number could not be parsed
  418. * @stable ICU 4.8
  419. */
  420. MessagePattern &parse(const UnicodeString &pattern,
  421. UParseError *parseError, UErrorCode &errorCode);
  422. /**
  423. * Parses a ChoiceFormat pattern string.
  424. * @param pattern a ChoiceFormat pattern string
  425. * @param parseError Struct to receive information on the position
  426. * of an error within the pattern.
  427. * Can be NULL.
  428. * @param errorCode Standard ICU error code. Its input value must
  429. * pass the U_SUCCESS() test, or else the function returns
  430. * immediately. Check for U_FAILURE() on output or use with
  431. * function chaining. (See User Guide for details.)
  432. * @return *this
  433. * @throws IllegalArgumentException for syntax errors in the pattern string
  434. * @throws IndexOutOfBoundsException if certain limits are exceeded
  435. * (e.g., argument number too high, argument name too long, etc.)
  436. * @throws NumberFormatException if a number could not be parsed
  437. * @stable ICU 4.8
  438. */
  439. MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
  440. UParseError *parseError, UErrorCode &errorCode);
  441. /**
  442. * Parses a PluralFormat pattern string.
  443. * @param pattern a PluralFormat pattern string
  444. * @param parseError Struct to receive information on the position
  445. * of an error within the pattern.
  446. * Can be NULL.
  447. * @param errorCode Standard ICU error code. Its input value must
  448. * pass the U_SUCCESS() test, or else the function returns
  449. * immediately. Check for U_FAILURE() on output or use with
  450. * function chaining. (See User Guide for details.)
  451. * @return *this
  452. * @throws IllegalArgumentException for syntax errors in the pattern string
  453. * @throws IndexOutOfBoundsException if certain limits are exceeded
  454. * (e.g., argument number too high, argument name too long, etc.)
  455. * @throws NumberFormatException if a number could not be parsed
  456. * @stable ICU 4.8
  457. */
  458. MessagePattern &parsePluralStyle(const UnicodeString &pattern,
  459. UParseError *parseError, UErrorCode &errorCode);
  460. /**
  461. * Parses a SelectFormat pattern string.
  462. * @param pattern a SelectFormat pattern string
  463. * @param parseError Struct to receive information on the position
  464. * of an error within the pattern.
  465. * Can be NULL.
  466. * @param errorCode Standard ICU error code. Its input value must
  467. * pass the U_SUCCESS() test, or else the function returns
  468. * immediately. Check for U_FAILURE() on output or use with
  469. * function chaining. (See User Guide for details.)
  470. * @return *this
  471. * @throws IllegalArgumentException for syntax errors in the pattern string
  472. * @throws IndexOutOfBoundsException if certain limits are exceeded
  473. * (e.g., argument number too high, argument name too long, etc.)
  474. * @throws NumberFormatException if a number could not be parsed
  475. * @stable ICU 4.8
  476. */
  477. MessagePattern &parseSelectStyle(const UnicodeString &pattern,
  478. UParseError *parseError, UErrorCode &errorCode);
  479. /**
  480. * Clears this MessagePattern.
  481. * countParts() will return 0.
  482. * @stable ICU 4.8
  483. */
  484. void clear();
  485. /**
  486. * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
  487. * countParts() will return 0.
  488. * @param mode The new UMessagePatternApostropheMode.
  489. * @stable ICU 4.8
  490. */
  491. void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
  492. clear();
  493. aposMode=mode;
  494. }
  495. /**
  496. * @param other another object to compare with.
  497. * @return TRUE if this object is equivalent to the other one.
  498. * @stable ICU 4.8
  499. */
  500. UBool operator==(const MessagePattern &other) const;
  501. /**
  502. * @param other another object to compare with.
  503. * @return FALSE if this object is equivalent to the other one.
  504. * @stable ICU 4.8
  505. */
  506. inline UBool operator!=(const MessagePattern &other) const {
  507. return !operator==(other);
  508. }
  509. /**
  510. * @return A hash code for this object.
  511. * @stable ICU 4.8
  512. */
  513. int32_t hashCode() const;
  514. /**
  515. * @return this instance's UMessagePatternApostropheMode.
  516. * @stable ICU 4.8
  517. */
  518. UMessagePatternApostropheMode getApostropheMode() const {
  519. return aposMode;
  520. }
  521. // Java has package-private jdkAposMode() here.
  522. // In C++, this is declared in the MessageImpl class.
  523. /**
  524. * @return the parsed pattern string (null if none was parsed).
  525. * @stable ICU 4.8
  526. */
  527. const UnicodeString &getPatternString() const {
  528. return msg;
  529. }
  530. /**
  531. * Does the parsed pattern have named arguments like {first_name}?
  532. * @return TRUE if the parsed pattern has at least one named argument.
  533. * @stable ICU 4.8
  534. */
  535. UBool hasNamedArguments() const {
  536. return hasArgNames;
  537. }
  538. /**
  539. * Does the parsed pattern have numbered arguments like {2}?
  540. * @return TRUE if the parsed pattern has at least one numbered argument.
  541. * @stable ICU 4.8
  542. */
  543. UBool hasNumberedArguments() const {
  544. return hasArgNumbers;
  545. }
  546. /**
  547. * Validates and parses an argument name or argument number string.
  548. * An argument name must be a "pattern identifier", that is, it must contain
  549. * no Unicode Pattern_Syntax or Pattern_White_Space characters.
  550. * If it only contains ASCII digits, then it must be a small integer with no leading zero.
  551. * @param name Input string.
  552. * @return &gt;=0 if the name is a valid number,
  553. * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
  554. * ARG_NAME_NOT_VALID (-2) if it is neither.
  555. * @stable ICU 4.8
  556. */
  557. static int32_t validateArgumentName(const UnicodeString &name);
  558. /**
  559. * Returns a version of the parsed pattern string where each ASCII apostrophe
  560. * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
  561. * <p>
  562. * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
  563. * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
  564. * @return the deep-auto-quoted version of the parsed pattern string.
  565. * @see MessageFormat.autoQuoteApostrophe()
  566. * @stable ICU 4.8
  567. */
  568. UnicodeString autoQuoteApostropheDeep() const;
  569. class Part;
  570. /**
  571. * Returns the number of "parts" created by parsing the pattern string.
  572. * Returns 0 if no pattern has been parsed or clear() was called.
  573. * @return the number of pattern parts.
  574. * @stable ICU 4.8
  575. */
  576. int32_t countParts() const {
  577. return partsLength;
  578. }
  579. /**
  580. * Gets the i-th pattern "part".
  581. * @param i The index of the Part data. (0..countParts()-1)
  582. * @return the i-th pattern "part".
  583. * @stable ICU 4.8
  584. */
  585. const Part &getPart(int32_t i) const {
  586. return parts[i];
  587. }
  588. /**
  589. * Returns the UMessagePatternPartType of the i-th pattern "part".
  590. * Convenience method for getPart(i).getType().
  591. * @param i The index of the Part data. (0..countParts()-1)
  592. * @return The UMessagePatternPartType of the i-th Part.
  593. * @stable ICU 4.8
  594. */
  595. UMessagePatternPartType getPartType(int32_t i) const {
  596. return getPart(i).type;
  597. }
  598. /**
  599. * Returns the pattern index of the specified pattern "part".
  600. * Convenience method for getPart(partIndex).getIndex().
  601. * @param partIndex The index of the Part data. (0..countParts()-1)
  602. * @return The pattern index of this Part.
  603. * @stable ICU 4.8
  604. */
  605. int32_t getPatternIndex(int32_t partIndex) const {
  606. return getPart(partIndex).index;
  607. }
  608. /**
  609. * Returns the substring of the pattern string indicated by the Part.
  610. * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
  611. * @param part a part of this MessagePattern.
  612. * @return the substring associated with part.
  613. * @stable ICU 4.8
  614. */
  615. UnicodeString getSubstring(const Part &part) const {
  616. return msg.tempSubString(part.index, part.length);
  617. }
  618. /**
  619. * Compares the part's substring with the input string s.
  620. * @param part a part of this MessagePattern.
  621. * @param s a string.
  622. * @return TRUE if getSubstring(part).equals(s).
  623. * @stable ICU 4.8
  624. */
  625. UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
  626. return 0==msg.compare(part.index, part.length, s);
  627. }
  628. /**
  629. * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
  630. * @param part a part of this MessagePattern.
  631. * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
  632. * @stable ICU 4.8
  633. */
  634. double getNumericValue(const Part &part) const;
  635. /**
  636. * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
  637. * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
  638. * @return the "offset:" value.
  639. * @stable ICU 4.8
  640. */
  641. double getPluralOffset(int32_t pluralStart) const;
  642. /**
  643. * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
  644. * @param start The index of some Part data (0..countParts()-1);
  645. * this Part should be of Type ARG_START or MSG_START.
  646. * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
  647. * or start itself if getPartType(msgStart)!=ARG|MSG_START.
  648. * @stable ICU 4.8
  649. */
  650. int32_t getLimitPartIndex(int32_t start) const {
  651. int32_t limit=getPart(start).limitPartIndex;
  652. if(limit<start) {
  653. return start;
  654. }
  655. return limit;
  656. }
  657. /**
  658. * A message pattern "part", representing a pattern parsing event.
  659. * There is a part for the start and end of a message or argument,
  660. * for quoting and escaping of and with ASCII apostrophes,
  661. * and for syntax elements of "complex" arguments.
  662. * @stable ICU 4.8
  663. */
  664. class Part : public UMemory {
  665. public:
  666. /**
  667. * Default constructor, do not use.
  668. * @internal
  669. */
  670. Part() {}
  671. /**
  672. * Returns the type of this part.
  673. * @return the part type.
  674. * @stable ICU 4.8
  675. */
  676. UMessagePatternPartType getType() const {
  677. return type;
  678. }
  679. /**
  680. * Returns the pattern string index associated with this Part.
  681. * @return this part's pattern string index.
  682. * @stable ICU 4.8
  683. */
  684. int32_t getIndex() const {
  685. return index;
  686. }
  687. /**
  688. * Returns the length of the pattern substring associated with this Part.
  689. * This is 0 for some parts.
  690. * @return this part's pattern substring length.
  691. * @stable ICU 4.8
  692. */
  693. int32_t getLength() const {
  694. return length;
  695. }
  696. /**
  697. * Returns the pattern string limit (exclusive-end) index associated with this Part.
  698. * Convenience method for getIndex()+getLength().
  699. * @return this part's pattern string limit index, same as getIndex()+getLength().
  700. * @stable ICU 4.8
  701. */
  702. int32_t getLimit() const {
  703. return index+length;
  704. }
  705. /**
  706. * Returns a value associated with this part.
  707. * See the documentation of each part type for details.
  708. * @return the part value.
  709. * @stable ICU 4.8
  710. */
  711. int32_t getValue() const {
  712. return value;
  713. }
  714. /**
  715. * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
  716. * otherwise UMSGPAT_ARG_TYPE_NONE.
  717. * @return the argument type for this part.
  718. * @stable ICU 4.8
  719. */
  720. UMessagePatternArgType getArgType() const {
  721. UMessagePatternPartType type=getType();
  722. if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
  723. return (UMessagePatternArgType)value;
  724. } else {
  725. return UMSGPAT_ARG_TYPE_NONE;
  726. }
  727. }
  728. /**
  729. * Indicates whether the Part type has a numeric value.
  730. * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
  731. * @param type The Part type to be tested.
  732. * @return TRUE if the Part type has a numeric value.
  733. * @stable ICU 4.8
  734. */
  735. static UBool hasNumericValue(UMessagePatternPartType type) {
  736. return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
  737. }
  738. /**
  739. * @param other another object to compare with.
  740. * @return TRUE if this object is equivalent to the other one.
  741. * @stable ICU 4.8
  742. */
  743. UBool operator==(const Part &other) const;
  744. /**
  745. * @param other another object to compare with.
  746. * @return FALSE if this object is equivalent to the other one.
  747. * @stable ICU 4.8
  748. */
  749. inline UBool operator!=(const Part &other) const {
  750. return !operator==(other);
  751. }
  752. /**
  753. * @return A hash code for this object.
  754. * @stable ICU 4.8
  755. */
  756. int32_t hashCode() const {
  757. return ((type*37+index)*37+length)*37+value;
  758. }
  759. private:
  760. friend class MessagePattern;
  761. static const int32_t MAX_LENGTH=0xffff;
  762. static const int32_t MAX_VALUE=0x7fff;
  763. // Some fields are not final because they are modified during pattern parsing.
  764. // After pattern parsing, the parts are effectively immutable.
  765. UMessagePatternPartType type;
  766. int32_t index;
  767. uint16_t length;
  768. int16_t value;
  769. int32_t limitPartIndex;
  770. };
  771. private:
  772. void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
  773. void postParse();
  774. int32_t parseMessage(int32_t index, int32_t msgStartLength,
  775. int32_t nestingLevel, UMessagePatternArgType parentType,
  776. UParseError *parseError, UErrorCode &errorCode);
  777. int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
  778. UParseError *parseError, UErrorCode &errorCode);
  779. int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
  780. int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
  781. UParseError *parseError, UErrorCode &errorCode);
  782. int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
  783. UParseError *parseError, UErrorCode &errorCode);
  784. /**
  785. * Validates and parses an argument name or argument number string.
  786. * This internal method assumes that the input substring is a "pattern identifier".
  787. * @return &gt;=0 if the name is a valid number,
  788. * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
  789. * ARG_NAME_NOT_VALID (-2) if it is neither.
  790. * @see #validateArgumentName(String)
  791. */
  792. static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
  793. int32_t parseArgNumber(int32_t start, int32_t limit) {
  794. return parseArgNumber(msg, start, limit);
  795. }
  796. /**
  797. * Parses a number from the specified message substring.
  798. * @param start start index into the message string
  799. * @param limit limit index into the message string, must be start<limit
  800. * @param allowInfinity TRUE if U+221E is allowed (for ChoiceFormat)
  801. * @param parseError
  802. * @param errorCode
  803. */
  804. void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
  805. UParseError *parseError, UErrorCode &errorCode);
  806. // Java has package-private appendReducedApostrophes() here.
  807. // In C++, this is declared in the MessageImpl class.
  808. int32_t skipWhiteSpace(int32_t index);
  809. int32_t skipIdentifier(int32_t index);
  810. /**
  811. * Skips a sequence of characters that could occur in a double value.
  812. * Does not fully parse or validate the value.
  813. */
  814. int32_t skipDouble(int32_t index);
  815. static UBool isArgTypeChar(UChar32 c);
  816. UBool isChoice(int32_t index);
  817. UBool isPlural(int32_t index);
  818. UBool isSelect(int32_t index);
  819. UBool isOrdinal(int32_t index);
  820. /**
  821. * @return TRUE if we are inside a MessageFormat (sub-)pattern,
  822. * as opposed to inside a top-level choice/plural/select pattern.
  823. */
  824. UBool inMessageFormatPattern(int32_t nestingLevel);
  825. /**
  826. * @return TRUE if we are in a MessageFormat sub-pattern
  827. * of a top-level ChoiceFormat pattern.
  828. */
  829. UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
  830. void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
  831. int32_t value, UErrorCode &errorCode);
  832. void addLimitPart(int32_t start,
  833. UMessagePatternPartType type, int32_t index, int32_t length,
  834. int32_t value, UErrorCode &errorCode);
  835. void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
  836. void setParseError(UParseError *parseError, int32_t index);
  837. UBool init(UErrorCode &errorCode);
  838. UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
  839. UMessagePatternApostropheMode aposMode;
  840. UnicodeString msg;
  841. // ArrayList<Part> parts=new ArrayList<Part>();
  842. MessagePatternPartsList *partsList;
  843. Part *parts;
  844. int32_t partsLength;
  845. // ArrayList<Double> numericValues;
  846. MessagePatternDoubleList *numericValuesList;
  847. double *numericValues;
  848. int32_t numericValuesLength;
  849. UBool hasArgNames;
  850. UBool hasArgNumbers;
  851. UBool needsAutoQuoting;
  852. };
  853. U_NAMESPACE_END
  854. #endif // !UCONFIG_NO_FORMATTING
  855. #endif // __MESSAGEPATTERN_H__