simpleformatter.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. * Copyright (C) 2014-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. ******************************************************************************
  8. * simpleformatter.h
  9. */
  10. #ifndef __SIMPLEFORMATTER_H__
  11. #define __SIMPLEFORMATTER_H__
  12. /**
  13. * \file
  14. * \brief C++ API: Simple formatter, minimal subset of MessageFormat.
  15. */
  16. #include "unicode/utypes.h"
  17. #include "unicode/unistr.h"
  18. #ifndef U_HIDE_DRAFT_API
  19. U_NAMESPACE_BEGIN
  20. /**
  21. * Formats simple patterns like "{1} was born in {0}".
  22. * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
  23. * Supports only numbered arguments with no type nor style parameters,
  24. * and formats only string values.
  25. * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
  26. *
  27. * Factory methods set error codes for syntax errors
  28. * and for too few or too many arguments/placeholders.
  29. *
  30. * SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
  31. *
  32. * Example:
  33. * <pre>
  34. * UErrorCode errorCode = U_ZERO_ERROR;
  35. * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
  36. * UnicodeString result;
  37. *
  38. * // Output: "paul {born} in england"
  39. * fmt.format("england", "paul", result, errorCode);
  40. * </pre>
  41. *
  42. * This class is not intended for public subclassing.
  43. *
  44. * @see MessageFormat
  45. * @see UMessagePatternApostropheMode
  46. * @draft ICU 57
  47. */
  48. class U_COMMON_API SimpleFormatter U_FINAL : public UMemory {
  49. public:
  50. /**
  51. * Default constructor.
  52. * @draft ICU 57
  53. */
  54. SimpleFormatter() : compiledPattern((UChar)0) {}
  55. /**
  56. * Constructs a formatter from the pattern string.
  57. *
  58. * @param pattern The pattern string.
  59. * @param errorCode ICU error code in/out parameter.
  60. * Must fulfill U_SUCCESS before the function call.
  61. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
  62. * @draft ICU 57
  63. */
  64. SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
  65. applyPattern(pattern, errorCode);
  66. }
  67. /**
  68. * Constructs a formatter from the pattern string.
  69. * The number of arguments checked against the given limits is the
  70. * highest argument number plus one, not the number of occurrences of arguments.
  71. *
  72. * @param pattern The pattern string.
  73. * @param min The pattern must have at least this many arguments.
  74. * @param max The pattern must have at most this many arguments.
  75. * @param errorCode ICU error code in/out parameter.
  76. * Must fulfill U_SUCCESS before the function call.
  77. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
  78. * too few or too many arguments.
  79. * @draft ICU 57
  80. */
  81. SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
  82. UErrorCode &errorCode) {
  83. applyPatternMinMaxArguments(pattern, min, max, errorCode);
  84. }
  85. /**
  86. * Copy constructor.
  87. * @draft ICU 57
  88. */
  89. SimpleFormatter(const SimpleFormatter& other)
  90. : compiledPattern(other.compiledPattern) {}
  91. /**
  92. * Assignment operator.
  93. * @draft ICU 57
  94. */
  95. SimpleFormatter &operator=(const SimpleFormatter& other);
  96. /**
  97. * Destructor.
  98. * @draft ICU 57
  99. */
  100. ~SimpleFormatter();
  101. /**
  102. * Changes this object according to the new pattern.
  103. *
  104. * @param pattern The pattern string.
  105. * @param errorCode ICU error code in/out parameter.
  106. * Must fulfill U_SUCCESS before the function call.
  107. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
  108. * @return TRUE if U_SUCCESS(errorCode).
  109. * @draft ICU 57
  110. */
  111. UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
  112. return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
  113. }
  114. /**
  115. * Changes this object according to the new pattern.
  116. * The number of arguments checked against the given limits is the
  117. * highest argument number plus one, not the number of occurrences of arguments.
  118. *
  119. * @param pattern The pattern string.
  120. * @param min The pattern must have at least this many arguments.
  121. * @param max The pattern must have at most this many arguments.
  122. * @param errorCode ICU error code in/out parameter.
  123. * Must fulfill U_SUCCESS before the function call.
  124. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
  125. * too few or too many arguments.
  126. * @return TRUE if U_SUCCESS(errorCode).
  127. * @draft ICU 57
  128. */
  129. UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
  130. int32_t min, int32_t max, UErrorCode &errorCode);
  131. /**
  132. * @return The max argument number + 1.
  133. * @draft ICU 57
  134. */
  135. int32_t getArgumentLimit() const {
  136. return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
  137. }
  138. /**
  139. * Formats the given value, appending to the appendTo builder.
  140. * The argument value must not be the same object as appendTo.
  141. * getArgumentLimit() must be at most 1.
  142. *
  143. * @param value0 Value for argument {0}.
  144. * @param appendTo Gets the formatted pattern and value appended.
  145. * @param errorCode ICU error code in/out parameter.
  146. * Must fulfill U_SUCCESS before the function call.
  147. * @return appendTo
  148. * @draft ICU 57
  149. */
  150. UnicodeString &format(
  151. const UnicodeString &value0,
  152. UnicodeString &appendTo, UErrorCode &errorCode) const;
  153. /**
  154. * Formats the given values, appending to the appendTo builder.
  155. * An argument value must not be the same object as appendTo.
  156. * getArgumentLimit() must be at most 2.
  157. *
  158. * @param value0 Value for argument {0}.
  159. * @param value1 Value for argument {1}.
  160. * @param appendTo Gets the formatted pattern and values appended.
  161. * @param errorCode ICU error code in/out parameter.
  162. * Must fulfill U_SUCCESS before the function call.
  163. * @return appendTo
  164. * @draft ICU 57
  165. */
  166. UnicodeString &format(
  167. const UnicodeString &value0,
  168. const UnicodeString &value1,
  169. UnicodeString &appendTo, UErrorCode &errorCode) const;
  170. /**
  171. * Formats the given values, appending to the appendTo builder.
  172. * An argument value must not be the same object as appendTo.
  173. * getArgumentLimit() must be at most 3.
  174. *
  175. * @param value0 Value for argument {0}.
  176. * @param value1 Value for argument {1}.
  177. * @param value2 Value for argument {2}.
  178. * @param appendTo Gets the formatted pattern and values appended.
  179. * @param errorCode ICU error code in/out parameter.
  180. * Must fulfill U_SUCCESS before the function call.
  181. * @return appendTo
  182. * @draft ICU 57
  183. */
  184. UnicodeString &format(
  185. const UnicodeString &value0,
  186. const UnicodeString &value1,
  187. const UnicodeString &value2,
  188. UnicodeString &appendTo, UErrorCode &errorCode) const;
  189. /**
  190. * Formats the given values, appending to the appendTo string.
  191. *
  192. * @param values The argument values.
  193. * An argument value must not be the same object as appendTo.
  194. * Can be NULL if valuesLength==getArgumentLimit()==0.
  195. * @param valuesLength The length of the values array.
  196. * Must be at least getArgumentLimit().
  197. * @param appendTo Gets the formatted pattern and values appended.
  198. * @param offsets offsets[i] receives the offset of where
  199. * values[i] replaced pattern argument {i}.
  200. * Can be shorter or longer than values. Can be NULL if offsetsLength==0.
  201. * If there is no {i} in the pattern, then offsets[i] is set to -1.
  202. * @param offsetsLength The length of the offsets array.
  203. * @param errorCode ICU error code in/out parameter.
  204. * Must fulfill U_SUCCESS before the function call.
  205. * @return appendTo
  206. * @draft ICU 57
  207. */
  208. UnicodeString &formatAndAppend(
  209. const UnicodeString *const *values, int32_t valuesLength,
  210. UnicodeString &appendTo,
  211. int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
  212. /**
  213. * Formats the given values, replacing the contents of the result string.
  214. * May optimize by actually appending to the result if it is the same object
  215. * as the value corresponding to the initial argument in the pattern.
  216. *
  217. * @param values The argument values.
  218. * An argument value may be the same object as result.
  219. * Can be NULL if valuesLength==getArgumentLimit()==0.
  220. * @param valuesLength The length of the values array.
  221. * Must be at least getArgumentLimit().
  222. * @param result Gets its contents replaced by the formatted pattern and values.
  223. * @param offsets offsets[i] receives the offset of where
  224. * values[i] replaced pattern argument {i}.
  225. * Can be shorter or longer than values. Can be NULL if offsetsLength==0.
  226. * If there is no {i} in the pattern, then offsets[i] is set to -1.
  227. * @param offsetsLength The length of the offsets array.
  228. * @param errorCode ICU error code in/out parameter.
  229. * Must fulfill U_SUCCESS before the function call.
  230. * @return result
  231. * @draft ICU 57
  232. */
  233. UnicodeString &formatAndReplace(
  234. const UnicodeString *const *values, int32_t valuesLength,
  235. UnicodeString &result,
  236. int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
  237. /**
  238. * Returns the pattern text with none of the arguments.
  239. * Like formatting with all-empty string values.
  240. * @draft ICU 57
  241. */
  242. UnicodeString getTextWithNoArguments() const {
  243. return getTextWithNoArguments(compiledPattern.getBuffer(), compiledPattern.length());
  244. }
  245. private:
  246. /**
  247. * Binary representation of the compiled pattern.
  248. * Index 0: One more than the highest argument number.
  249. * Followed by zero or more arguments or literal-text segments.
  250. *
  251. * An argument is stored as its number, less than ARG_NUM_LIMIT.
  252. * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
  253. * followed by that many chars.
  254. */
  255. UnicodeString compiledPattern;
  256. static inline int32_t getArgumentLimit(const UChar *compiledPattern,
  257. int32_t compiledPatternLength) {
  258. return compiledPatternLength == 0 ? 0 : compiledPattern[0];
  259. }
  260. static UnicodeString getTextWithNoArguments(const UChar *compiledPattern, int32_t compiledPatternLength);
  261. static UnicodeString &format(
  262. const UChar *compiledPattern, int32_t compiledPatternLength,
  263. const UnicodeString *const *values,
  264. UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
  265. int32_t *offsets, int32_t offsetsLength,
  266. UErrorCode &errorCode);
  267. };
  268. U_NAMESPACE_END
  269. #endif /* U_HIDE_DRAFT_API */
  270. #endif // __SIMPLEFORMATTER_H__