unistr.h 170 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657
  1. // Copyright (C) 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1998-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. * File unistr.h
  10. *
  11. * Modification History:
  12. *
  13. * Date Name Description
  14. * 09/25/98 stephen Creation.
  15. * 11/11/98 stephen Changed per 11/9 code review.
  16. * 04/20/99 stephen Overhauled per 4/16 code review.
  17. * 11/18/99 aliu Made to inherit from Replaceable. Added method
  18. * handleReplaceBetween(); other methods unchanged.
  19. * 06/25/01 grhoten Remove dependency on iostream.
  20. ******************************************************************************
  21. */
  22. #ifndef UNISTR_H
  23. #define UNISTR_H
  24. /**
  25. * \file
  26. * \brief C++ API: Unicode String
  27. */
  28. #include "unicode/utypes.h"
  29. #include "unicode/rep.h"
  30. #include "unicode/std_string.h"
  31. #include "unicode/stringpiece.h"
  32. #include "unicode/bytestream.h"
  33. #include "unicode/ucasemap.h"
  34. struct UConverter; // unicode/ucnv.h
  35. #ifndef U_COMPARE_CODE_POINT_ORDER
  36. /* see also ustring.h and unorm.h */
  37. /**
  38. * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
  39. * Compare strings in code point order instead of code unit order.
  40. * @stable ICU 2.2
  41. */
  42. #define U_COMPARE_CODE_POINT_ORDER 0x8000
  43. #endif
  44. #ifndef USTRING_H
  45. /**
  46. * \ingroup ustring_ustrlen
  47. */
  48. U_STABLE int32_t U_EXPORT2
  49. u_strlen(const UChar *s);
  50. #endif
  51. /**
  52. * \def U_STRING_CASE_MAPPER_DEFINED
  53. * @internal
  54. */
  55. #ifndef U_STRING_CASE_MAPPER_DEFINED
  56. #define U_STRING_CASE_MAPPER_DEFINED
  57. /**
  58. * Internal string case mapping function type.
  59. * @internal
  60. */
  61. typedef int32_t U_CALLCONV
  62. UStringCaseMapper(const UCaseMap *csm,
  63. UChar *dest, int32_t destCapacity,
  64. const UChar *src, int32_t srcLength,
  65. UErrorCode *pErrorCode);
  66. #endif
  67. U_NAMESPACE_BEGIN
  68. #if !UCONFIG_NO_BREAK_ITERATION
  69. class BreakIterator; // unicode/brkiter.h
  70. #endif
  71. class Locale; // unicode/locid.h
  72. class StringCharacterIterator;
  73. class UnicodeStringAppendable; // unicode/appendable.h
  74. /* The <iostream> include has been moved to unicode/ustream.h */
  75. /**
  76. * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  77. * which constructs a Unicode string from an invariant-character char * string.
  78. * About invariant characters see utypes.h.
  79. * This constructor has no runtime dependency on conversion code and is
  80. * therefore recommended over ones taking a charset name string
  81. * (where the empty string "" indicates invariant-character conversion).
  82. *
  83. * @stable ICU 3.2
  84. */
  85. #define US_INV icu::UnicodeString::kInvariant
  86. /**
  87. * Unicode String literals in C++.
  88. * Dependent on the platform properties, different UnicodeString
  89. * constructors should be used to create a UnicodeString object from
  90. * a string literal.
  91. * The macros are defined for maximum performance.
  92. * They work only for strings that contain "invariant characters", i.e.,
  93. * only latin letters, digits, and some punctuation.
  94. * See utypes.h for details.
  95. *
  96. * The string parameter must be a C string literal.
  97. * The length of the string, not including the terminating
  98. * <code>NUL</code>, must be specified as a constant.
  99. * The U_STRING_DECL macro should be invoked exactly once for one
  100. * such string variable before it is used.
  101. * @stable ICU 2.0
  102. */
  103. #if defined(U_DECLARE_UTF16)
  104. # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
  105. #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
  106. # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
  107. #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
  108. # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
  109. #else
  110. # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
  111. #endif
  112. /**
  113. * Unicode String literals in C++.
  114. * Dependent on the platform properties, different UnicodeString
  115. * constructors should be used to create a UnicodeString object from
  116. * a string literal.
  117. * The macros are defined for improved performance.
  118. * They work only for strings that contain "invariant characters", i.e.,
  119. * only latin letters, digits, and some punctuation.
  120. * See utypes.h for details.
  121. *
  122. * The string parameter must be a C string literal.
  123. * @stable ICU 2.0
  124. */
  125. #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
  126. /**
  127. * \def UNISTR_FROM_CHAR_EXPLICIT
  128. * This can be defined to be empty or "explicit".
  129. * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
  130. * constructors are marked as explicit, preventing their inadvertent use.
  131. * @stable ICU 49
  132. */
  133. #ifndef UNISTR_FROM_CHAR_EXPLICIT
  134. # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
  135. // Auto-"explicit" in ICU library code.
  136. # define UNISTR_FROM_CHAR_EXPLICIT explicit
  137. # else
  138. // Empty by default for source code compatibility.
  139. # define UNISTR_FROM_CHAR_EXPLICIT
  140. # endif
  141. #endif
  142. /**
  143. * \def UNISTR_FROM_STRING_EXPLICIT
  144. * This can be defined to be empty or "explicit".
  145. * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
  146. * constructors are marked as explicit, preventing their inadvertent use.
  147. *
  148. * In particular, this helps prevent accidentally depending on ICU conversion code
  149. * by passing a string literal into an API with a const UnicodeString & parameter.
  150. * @stable ICU 49
  151. */
  152. #ifndef UNISTR_FROM_STRING_EXPLICIT
  153. # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
  154. // Auto-"explicit" in ICU library code.
  155. # define UNISTR_FROM_STRING_EXPLICIT explicit
  156. # else
  157. // Empty by default for source code compatibility.
  158. # define UNISTR_FROM_STRING_EXPLICIT
  159. # endif
  160. #endif
  161. /**
  162. * \def UNISTR_OBJECT_SIZE
  163. * Desired sizeof(UnicodeString) in bytes.
  164. * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
  165. * The object size may want to be a multiple of 16 bytes,
  166. * which is a common granularity for heap allocation.
  167. *
  168. * Any space inside the object beyond sizeof(vtable pointer) + 2
  169. * is available for storing short strings inside the object.
  170. * The bigger the object, the longer a string that can be stored inside the object,
  171. * without additional heap allocation.
  172. *
  173. * Depending on a platform's pointer size, pointer alignment requirements,
  174. * and struct padding, the compiler will usually round up sizeof(UnicodeString)
  175. * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
  176. * to hold the fields for heap-allocated strings.
  177. * Such a minimum size also ensures that the object is easily large enough
  178. * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH).
  179. *
  180. * sizeof(UnicodeString) >= 48 should work for all known platforms.
  181. *
  182. * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
  183. * sizeof(UnicodeString) = 64 would leave space for
  184. * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
  185. * UChars stored inside the object.
  186. *
  187. * The minimum object size on a 64-bit machine would be
  188. * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
  189. * and the internal buffer would hold up to 11 UChars in that case.
  190. *
  191. * @see U16_MAX_LENGTH
  192. * @stable ICU 56
  193. */
  194. #ifndef UNISTR_OBJECT_SIZE
  195. # define UNISTR_OBJECT_SIZE 64
  196. #endif
  197. /**
  198. * UnicodeString is a string class that stores Unicode characters directly and provides
  199. * similar functionality as the Java String and StringBuffer/StringBuilder classes.
  200. * It is a concrete implementation of the abstract class Replaceable (for transliteration).
  201. *
  202. * A UnicodeString may also "alias" an external array of characters
  203. * (that is, point to it, rather than own the array)
  204. * whose lifetime must then at least match the lifetime of the aliasing object.
  205. * This aliasing may be preserved when returning a UnicodeString by value,
  206. * depending on the compiler and the function implementation,
  207. * via Return Value Optimization (RVO) or the move assignment operator.
  208. * (However, the copy assignment operator does not preserve aliasing.)
  209. * For details see the description of storage models at the end of the class API docs
  210. * and in the User Guide chapter linked from there.
  211. *
  212. * The UnicodeString class is not suitable for subclassing.
  213. *
  214. * <p>For an overview of Unicode strings in C and C++ see the
  215. * <a href="http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-">User Guide Strings chapter</a>.</p>
  216. *
  217. * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
  218. * A Unicode character may be stored with either one code unit
  219. * (the most common case) or with a matched pair of special code units
  220. * ("surrogates"). The data type for code units is UChar.
  221. * For single-character handling, a Unicode character code <em>point</em> is a value
  222. * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
  223. *
  224. * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
  225. * This is the same as with multi-byte char* strings in traditional string handling.
  226. * Operations on partial strings typically do not test for code point boundaries.
  227. * If necessary, the user needs to take care of such boundaries by testing for the code unit
  228. * values or by using functions like
  229. * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
  230. * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
  231. *
  232. * UnicodeString methods are more lenient with regard to input parameter values
  233. * than other ICU APIs. In particular:
  234. * - If indexes are out of bounds for a UnicodeString object
  235. * (<0 or >length()) then they are "pinned" to the nearest boundary.
  236. * - If primitive string pointer values (e.g., const UChar * or char *)
  237. * for input strings are NULL, then those input string parameters are treated
  238. * as if they pointed to an empty string.
  239. * However, this is <em>not</em> the case for char * parameters for charset names
  240. * or other IDs.
  241. * - Most UnicodeString methods do not take a UErrorCode parameter because
  242. * there are usually very few opportunities for failure other than a shortage
  243. * of memory, error codes in low-level C++ string methods would be inconvenient,
  244. * and the error code as the last parameter (ICU convention) would prevent
  245. * the use of default parameter values.
  246. * Instead, such methods set the UnicodeString into a "bogus" state
  247. * (see isBogus()) if an error occurs.
  248. *
  249. * In string comparisons, two UnicodeString objects that are both "bogus"
  250. * compare equal (to be transitive and prevent endless loops in sorting),
  251. * and a "bogus" string compares less than any non-"bogus" one.
  252. *
  253. * Const UnicodeString methods are thread-safe. Multiple threads can use
  254. * const methods on the same UnicodeString object simultaneously,
  255. * but non-const methods must not be called concurrently (in multiple threads)
  256. * with any other (const or non-const) methods.
  257. *
  258. * Similarly, const UnicodeString & parameters are thread-safe.
  259. * One object may be passed in as such a parameter concurrently in multiple threads.
  260. * This includes the const UnicodeString & parameters for
  261. * copy construction, assignment, and cloning.
  262. *
  263. * <p>UnicodeString uses several storage methods.
  264. * String contents can be stored inside the UnicodeString object itself,
  265. * in an allocated and shared buffer, or in an outside buffer that is "aliased".
  266. * Most of this is done transparently, but careful aliasing in particular provides
  267. * significant performance improvements.
  268. * Also, the internal buffer is accessible via special functions.
  269. * For details see the
  270. * <a href="http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model">User Guide Strings chapter</a>.</p>
  271. *
  272. * @see utf.h
  273. * @see CharacterIterator
  274. * @stable ICU 2.0
  275. */
  276. class U_COMMON_API UnicodeString : public Replaceable
  277. {
  278. public:
  279. /**
  280. * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
  281. * which constructs a Unicode string from an invariant-character char * string.
  282. * Use the macro US_INV instead of the full qualification for this value.
  283. *
  284. * @see US_INV
  285. * @stable ICU 3.2
  286. */
  287. enum EInvariant {
  288. /**
  289. * @see EInvariant
  290. * @stable ICU 3.2
  291. */
  292. kInvariant
  293. };
  294. //========================================
  295. // Read-only operations
  296. //========================================
  297. /* Comparison - bitwise only - for international comparison use collation */
  298. /**
  299. * Equality operator. Performs only bitwise comparison.
  300. * @param text The UnicodeString to compare to this one.
  301. * @return TRUE if <TT>text</TT> contains the same characters as this one,
  302. * FALSE otherwise.
  303. * @stable ICU 2.0
  304. */
  305. inline UBool operator== (const UnicodeString& text) const;
  306. /**
  307. * Inequality operator. Performs only bitwise comparison.
  308. * @param text The UnicodeString to compare to this one.
  309. * @return FALSE if <TT>text</TT> contains the same characters as this one,
  310. * TRUE otherwise.
  311. * @stable ICU 2.0
  312. */
  313. inline UBool operator!= (const UnicodeString& text) const;
  314. /**
  315. * Greater than operator. Performs only bitwise comparison.
  316. * @param text The UnicodeString to compare to this one.
  317. * @return TRUE if the characters in this are bitwise
  318. * greater than the characters in <code>text</code>, FALSE otherwise
  319. * @stable ICU 2.0
  320. */
  321. inline UBool operator> (const UnicodeString& text) const;
  322. /**
  323. * Less than operator. Performs only bitwise comparison.
  324. * @param text The UnicodeString to compare to this one.
  325. * @return TRUE if the characters in this are bitwise
  326. * less than the characters in <code>text</code>, FALSE otherwise
  327. * @stable ICU 2.0
  328. */
  329. inline UBool operator< (const UnicodeString& text) const;
  330. /**
  331. * Greater than or equal operator. Performs only bitwise comparison.
  332. * @param text The UnicodeString to compare to this one.
  333. * @return TRUE if the characters in this are bitwise
  334. * greater than or equal to the characters in <code>text</code>, FALSE otherwise
  335. * @stable ICU 2.0
  336. */
  337. inline UBool operator>= (const UnicodeString& text) const;
  338. /**
  339. * Less than or equal operator. Performs only bitwise comparison.
  340. * @param text The UnicodeString to compare to this one.
  341. * @return TRUE if the characters in this are bitwise
  342. * less than or equal to the characters in <code>text</code>, FALSE otherwise
  343. * @stable ICU 2.0
  344. */
  345. inline UBool operator<= (const UnicodeString& text) const;
  346. /**
  347. * Compare the characters bitwise in this UnicodeString to
  348. * the characters in <code>text</code>.
  349. * @param text The UnicodeString to compare to this one.
  350. * @return The result of bitwise character comparison: 0 if this
  351. * contains the same characters as <code>text</code>, -1 if the characters in
  352. * this are bitwise less than the characters in <code>text</code>, +1 if the
  353. * characters in this are bitwise greater than the characters
  354. * in <code>text</code>.
  355. * @stable ICU 2.0
  356. */
  357. inline int8_t compare(const UnicodeString& text) const;
  358. /**
  359. * Compare the characters bitwise in the range
  360. * [<TT>start</TT>, <TT>start + length</TT>) with the characters
  361. * in the <b>entire string</b> <TT>text</TT>.
  362. * (The parameters "start" and "length" are not applied to the other text "text".)
  363. * @param start the offset at which the compare operation begins
  364. * @param length the number of characters of text to compare.
  365. * @param text the other text to be compared against this string.
  366. * @return The result of bitwise character comparison: 0 if this
  367. * contains the same characters as <code>text</code>, -1 if the characters in
  368. * this are bitwise less than the characters in <code>text</code>, +1 if the
  369. * characters in this are bitwise greater than the characters
  370. * in <code>text</code>.
  371. * @stable ICU 2.0
  372. */
  373. inline int8_t compare(int32_t start,
  374. int32_t length,
  375. const UnicodeString& text) const;
  376. /**
  377. * Compare the characters bitwise in the range
  378. * [<TT>start</TT>, <TT>start + length</TT>) with the characters
  379. * in <TT>srcText</TT> in the range
  380. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  381. * @param start the offset at which the compare operation begins
  382. * @param length the number of characters in this to compare.
  383. * @param srcText the text to be compared
  384. * @param srcStart the offset into <TT>srcText</TT> to start comparison
  385. * @param srcLength the number of characters in <TT>src</TT> to compare
  386. * @return The result of bitwise character comparison: 0 if this
  387. * contains the same characters as <code>srcText</code>, -1 if the characters in
  388. * this are bitwise less than the characters in <code>srcText</code>, +1 if the
  389. * characters in this are bitwise greater than the characters
  390. * in <code>srcText</code>.
  391. * @stable ICU 2.0
  392. */
  393. inline int8_t compare(int32_t start,
  394. int32_t length,
  395. const UnicodeString& srcText,
  396. int32_t srcStart,
  397. int32_t srcLength) const;
  398. /**
  399. * Compare the characters bitwise in this UnicodeString with the first
  400. * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
  401. * @param srcChars The characters to compare to this UnicodeString.
  402. * @param srcLength the number of characters in <TT>srcChars</TT> to compare
  403. * @return The result of bitwise character comparison: 0 if this
  404. * contains the same characters as <code>srcChars</code>, -1 if the characters in
  405. * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
  406. * characters in this are bitwise greater than the characters
  407. * in <code>srcChars</code>.
  408. * @stable ICU 2.0
  409. */
  410. inline int8_t compare(const UChar *srcChars,
  411. int32_t srcLength) const;
  412. /**
  413. * Compare the characters bitwise in the range
  414. * [<TT>start</TT>, <TT>start + length</TT>) with the first
  415. * <TT>length</TT> characters in <TT>srcChars</TT>
  416. * @param start the offset at which the compare operation begins
  417. * @param length the number of characters to compare.
  418. * @param srcChars the characters to be compared
  419. * @return The result of bitwise character comparison: 0 if this
  420. * contains the same characters as <code>srcChars</code>, -1 if the characters in
  421. * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
  422. * characters in this are bitwise greater than the characters
  423. * in <code>srcChars</code>.
  424. * @stable ICU 2.0
  425. */
  426. inline int8_t compare(int32_t start,
  427. int32_t length,
  428. const UChar *srcChars) const;
  429. /**
  430. * Compare the characters bitwise in the range
  431. * [<TT>start</TT>, <TT>start + length</TT>) with the characters
  432. * in <TT>srcChars</TT> in the range
  433. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  434. * @param start the offset at which the compare operation begins
  435. * @param length the number of characters in this to compare
  436. * @param srcChars the characters to be compared
  437. * @param srcStart the offset into <TT>srcChars</TT> to start comparison
  438. * @param srcLength the number of characters in <TT>srcChars</TT> to compare
  439. * @return The result of bitwise character comparison: 0 if this
  440. * contains the same characters as <code>srcChars</code>, -1 if the characters in
  441. * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
  442. * characters in this are bitwise greater than the characters
  443. * in <code>srcChars</code>.
  444. * @stable ICU 2.0
  445. */
  446. inline int8_t compare(int32_t start,
  447. int32_t length,
  448. const UChar *srcChars,
  449. int32_t srcStart,
  450. int32_t srcLength) const;
  451. /**
  452. * Compare the characters bitwise in the range
  453. * [<TT>start</TT>, <TT>limit</TT>) with the characters
  454. * in <TT>srcText</TT> in the range
  455. * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
  456. * @param start the offset at which the compare operation begins
  457. * @param limit the offset immediately following the compare operation
  458. * @param srcText the text to be compared
  459. * @param srcStart the offset into <TT>srcText</TT> to start comparison
  460. * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
  461. * @return The result of bitwise character comparison: 0 if this
  462. * contains the same characters as <code>srcText</code>, -1 if the characters in
  463. * this are bitwise less than the characters in <code>srcText</code>, +1 if the
  464. * characters in this are bitwise greater than the characters
  465. * in <code>srcText</code>.
  466. * @stable ICU 2.0
  467. */
  468. inline int8_t compareBetween(int32_t start,
  469. int32_t limit,
  470. const UnicodeString& srcText,
  471. int32_t srcStart,
  472. int32_t srcLimit) const;
  473. /**
  474. * Compare two Unicode strings in code point order.
  475. * The result may be different from the results of compare(), operator<, etc.
  476. * if supplementary characters are present:
  477. *
  478. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  479. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  480. * which means that they compare as less than some other BMP characters like U+feff.
  481. * This function compares Unicode strings in code point order.
  482. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  483. *
  484. * @param text Another string to compare this one to.
  485. * @return a negative/zero/positive integer corresponding to whether
  486. * this string is less than/equal to/greater than the second one
  487. * in code point order
  488. * @stable ICU 2.0
  489. */
  490. inline int8_t compareCodePointOrder(const UnicodeString& text) const;
  491. /**
  492. * Compare two Unicode strings in code point order.
  493. * The result may be different from the results of compare(), operator<, etc.
  494. * if supplementary characters are present:
  495. *
  496. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  497. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  498. * which means that they compare as less than some other BMP characters like U+feff.
  499. * This function compares Unicode strings in code point order.
  500. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  501. *
  502. * @param start The start offset in this string at which the compare operation begins.
  503. * @param length The number of code units from this string to compare.
  504. * @param srcText Another string to compare this one to.
  505. * @return a negative/zero/positive integer corresponding to whether
  506. * this string is less than/equal to/greater than the second one
  507. * in code point order
  508. * @stable ICU 2.0
  509. */
  510. inline int8_t compareCodePointOrder(int32_t start,
  511. int32_t length,
  512. const UnicodeString& srcText) const;
  513. /**
  514. * Compare two Unicode strings in code point order.
  515. * The result may be different from the results of compare(), operator<, etc.
  516. * if supplementary characters are present:
  517. *
  518. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  519. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  520. * which means that they compare as less than some other BMP characters like U+feff.
  521. * This function compares Unicode strings in code point order.
  522. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  523. *
  524. * @param start The start offset in this string at which the compare operation begins.
  525. * @param length The number of code units from this string to compare.
  526. * @param srcText Another string to compare this one to.
  527. * @param srcStart The start offset in that string at which the compare operation begins.
  528. * @param srcLength The number of code units from that string to compare.
  529. * @return a negative/zero/positive integer corresponding to whether
  530. * this string is less than/equal to/greater than the second one
  531. * in code point order
  532. * @stable ICU 2.0
  533. */
  534. inline int8_t compareCodePointOrder(int32_t start,
  535. int32_t length,
  536. const UnicodeString& srcText,
  537. int32_t srcStart,
  538. int32_t srcLength) const;
  539. /**
  540. * Compare two Unicode strings in code point order.
  541. * The result may be different from the results of compare(), operator<, etc.
  542. * if supplementary characters are present:
  543. *
  544. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  545. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  546. * which means that they compare as less than some other BMP characters like U+feff.
  547. * This function compares Unicode strings in code point order.
  548. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  549. *
  550. * @param srcChars A pointer to another string to compare this one to.
  551. * @param srcLength The number of code units from that string to compare.
  552. * @return a negative/zero/positive integer corresponding to whether
  553. * this string is less than/equal to/greater than the second one
  554. * in code point order
  555. * @stable ICU 2.0
  556. */
  557. inline int8_t compareCodePointOrder(const UChar *srcChars,
  558. int32_t srcLength) const;
  559. /**
  560. * Compare two Unicode strings in code point order.
  561. * The result may be different from the results of compare(), operator<, etc.
  562. * if supplementary characters are present:
  563. *
  564. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  565. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  566. * which means that they compare as less than some other BMP characters like U+feff.
  567. * This function compares Unicode strings in code point order.
  568. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  569. *
  570. * @param start The start offset in this string at which the compare operation begins.
  571. * @param length The number of code units from this string to compare.
  572. * @param srcChars A pointer to another string to compare this one to.
  573. * @return a negative/zero/positive integer corresponding to whether
  574. * this string is less than/equal to/greater than the second one
  575. * in code point order
  576. * @stable ICU 2.0
  577. */
  578. inline int8_t compareCodePointOrder(int32_t start,
  579. int32_t length,
  580. const UChar *srcChars) const;
  581. /**
  582. * Compare two Unicode strings in code point order.
  583. * The result may be different from the results of compare(), operator<, etc.
  584. * if supplementary characters are present:
  585. *
  586. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  587. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  588. * which means that they compare as less than some other BMP characters like U+feff.
  589. * This function compares Unicode strings in code point order.
  590. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  591. *
  592. * @param start The start offset in this string at which the compare operation begins.
  593. * @param length The number of code units from this string to compare.
  594. * @param srcChars A pointer to another string to compare this one to.
  595. * @param srcStart The start offset in that string at which the compare operation begins.
  596. * @param srcLength The number of code units from that string to compare.
  597. * @return a negative/zero/positive integer corresponding to whether
  598. * this string is less than/equal to/greater than the second one
  599. * in code point order
  600. * @stable ICU 2.0
  601. */
  602. inline int8_t compareCodePointOrder(int32_t start,
  603. int32_t length,
  604. const UChar *srcChars,
  605. int32_t srcStart,
  606. int32_t srcLength) const;
  607. /**
  608. * Compare two Unicode strings in code point order.
  609. * The result may be different from the results of compare(), operator<, etc.
  610. * if supplementary characters are present:
  611. *
  612. * In UTF-16, supplementary characters (with code points U+10000 and above) are
  613. * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
  614. * which means that they compare as less than some other BMP characters like U+feff.
  615. * This function compares Unicode strings in code point order.
  616. * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
  617. *
  618. * @param start The start offset in this string at which the compare operation begins.
  619. * @param limit The offset after the last code unit from this string to compare.
  620. * @param srcText Another string to compare this one to.
  621. * @param srcStart The start offset in that string at which the compare operation begins.
  622. * @param srcLimit The offset after the last code unit from that string to compare.
  623. * @return a negative/zero/positive integer corresponding to whether
  624. * this string is less than/equal to/greater than the second one
  625. * in code point order
  626. * @stable ICU 2.0
  627. */
  628. inline int8_t compareCodePointOrderBetween(int32_t start,
  629. int32_t limit,
  630. const UnicodeString& srcText,
  631. int32_t srcStart,
  632. int32_t srcLimit) const;
  633. /**
  634. * Compare two strings case-insensitively using full case folding.
  635. * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
  636. *
  637. * @param text Another string to compare this one to.
  638. * @param options A bit set of options:
  639. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  640. * Comparison in code unit order with default case folding.
  641. *
  642. * - U_COMPARE_CODE_POINT_ORDER
  643. * Set to choose code point order instead of code unit order
  644. * (see u_strCompare for details).
  645. *
  646. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  647. *
  648. * @return A negative, zero, or positive integer indicating the comparison result.
  649. * @stable ICU 2.0
  650. */
  651. inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
  652. /**
  653. * Compare two strings case-insensitively using full case folding.
  654. * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
  655. *
  656. * @param start The start offset in this string at which the compare operation begins.
  657. * @param length The number of code units from this string to compare.
  658. * @param srcText Another string to compare this one to.
  659. * @param options A bit set of options:
  660. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  661. * Comparison in code unit order with default case folding.
  662. *
  663. * - U_COMPARE_CODE_POINT_ORDER
  664. * Set to choose code point order instead of code unit order
  665. * (see u_strCompare for details).
  666. *
  667. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  668. *
  669. * @return A negative, zero, or positive integer indicating the comparison result.
  670. * @stable ICU 2.0
  671. */
  672. inline int8_t caseCompare(int32_t start,
  673. int32_t length,
  674. const UnicodeString& srcText,
  675. uint32_t options) const;
  676. /**
  677. * Compare two strings case-insensitively using full case folding.
  678. * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
  679. *
  680. * @param start The start offset in this string at which the compare operation begins.
  681. * @param length The number of code units from this string to compare.
  682. * @param srcText Another string to compare this one to.
  683. * @param srcStart The start offset in that string at which the compare operation begins.
  684. * @param srcLength The number of code units from that string to compare.
  685. * @param options A bit set of options:
  686. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  687. * Comparison in code unit order with default case folding.
  688. *
  689. * - U_COMPARE_CODE_POINT_ORDER
  690. * Set to choose code point order instead of code unit order
  691. * (see u_strCompare for details).
  692. *
  693. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  694. *
  695. * @return A negative, zero, or positive integer indicating the comparison result.
  696. * @stable ICU 2.0
  697. */
  698. inline int8_t caseCompare(int32_t start,
  699. int32_t length,
  700. const UnicodeString& srcText,
  701. int32_t srcStart,
  702. int32_t srcLength,
  703. uint32_t options) const;
  704. /**
  705. * Compare two strings case-insensitively using full case folding.
  706. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  707. *
  708. * @param srcChars A pointer to another string to compare this one to.
  709. * @param srcLength The number of code units from that string to compare.
  710. * @param options A bit set of options:
  711. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  712. * Comparison in code unit order with default case folding.
  713. *
  714. * - U_COMPARE_CODE_POINT_ORDER
  715. * Set to choose code point order instead of code unit order
  716. * (see u_strCompare for details).
  717. *
  718. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  719. *
  720. * @return A negative, zero, or positive integer indicating the comparison result.
  721. * @stable ICU 2.0
  722. */
  723. inline int8_t caseCompare(const UChar *srcChars,
  724. int32_t srcLength,
  725. uint32_t options) const;
  726. /**
  727. * Compare two strings case-insensitively using full case folding.
  728. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  729. *
  730. * @param start The start offset in this string at which the compare operation begins.
  731. * @param length The number of code units from this string to compare.
  732. * @param srcChars A pointer to another string to compare this one to.
  733. * @param options A bit set of options:
  734. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  735. * Comparison in code unit order with default case folding.
  736. *
  737. * - U_COMPARE_CODE_POINT_ORDER
  738. * Set to choose code point order instead of code unit order
  739. * (see u_strCompare for details).
  740. *
  741. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  742. *
  743. * @return A negative, zero, or positive integer indicating the comparison result.
  744. * @stable ICU 2.0
  745. */
  746. inline int8_t caseCompare(int32_t start,
  747. int32_t length,
  748. const UChar *srcChars,
  749. uint32_t options) const;
  750. /**
  751. * Compare two strings case-insensitively using full case folding.
  752. * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
  753. *
  754. * @param start The start offset in this string at which the compare operation begins.
  755. * @param length The number of code units from this string to compare.
  756. * @param srcChars A pointer to another string to compare this one to.
  757. * @param srcStart The start offset in that string at which the compare operation begins.
  758. * @param srcLength The number of code units from that string to compare.
  759. * @param options A bit set of options:
  760. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  761. * Comparison in code unit order with default case folding.
  762. *
  763. * - U_COMPARE_CODE_POINT_ORDER
  764. * Set to choose code point order instead of code unit order
  765. * (see u_strCompare for details).
  766. *
  767. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  768. *
  769. * @return A negative, zero, or positive integer indicating the comparison result.
  770. * @stable ICU 2.0
  771. */
  772. inline int8_t caseCompare(int32_t start,
  773. int32_t length,
  774. const UChar *srcChars,
  775. int32_t srcStart,
  776. int32_t srcLength,
  777. uint32_t options) const;
  778. /**
  779. * Compare two strings case-insensitively using full case folding.
  780. * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
  781. *
  782. * @param start The start offset in this string at which the compare operation begins.
  783. * @param limit The offset after the last code unit from this string to compare.
  784. * @param srcText Another string to compare this one to.
  785. * @param srcStart The start offset in that string at which the compare operation begins.
  786. * @param srcLimit The offset after the last code unit from that string to compare.
  787. * @param options A bit set of options:
  788. * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
  789. * Comparison in code unit order with default case folding.
  790. *
  791. * - U_COMPARE_CODE_POINT_ORDER
  792. * Set to choose code point order instead of code unit order
  793. * (see u_strCompare for details).
  794. *
  795. * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
  796. *
  797. * @return A negative, zero, or positive integer indicating the comparison result.
  798. * @stable ICU 2.0
  799. */
  800. inline int8_t caseCompareBetween(int32_t start,
  801. int32_t limit,
  802. const UnicodeString& srcText,
  803. int32_t srcStart,
  804. int32_t srcLimit,
  805. uint32_t options) const;
  806. /**
  807. * Determine if this starts with the characters in <TT>text</TT>
  808. * @param text The text to match.
  809. * @return TRUE if this starts with the characters in <TT>text</TT>,
  810. * FALSE otherwise
  811. * @stable ICU 2.0
  812. */
  813. inline UBool startsWith(const UnicodeString& text) const;
  814. /**
  815. * Determine if this starts with the characters in <TT>srcText</TT>
  816. * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  817. * @param srcText The text to match.
  818. * @param srcStart the offset into <TT>srcText</TT> to start matching
  819. * @param srcLength the number of characters in <TT>srcText</TT> to match
  820. * @return TRUE if this starts with the characters in <TT>text</TT>,
  821. * FALSE otherwise
  822. * @stable ICU 2.0
  823. */
  824. inline UBool startsWith(const UnicodeString& srcText,
  825. int32_t srcStart,
  826. int32_t srcLength) const;
  827. /**
  828. * Determine if this starts with the characters in <TT>srcChars</TT>
  829. * @param srcChars The characters to match.
  830. * @param srcLength the number of characters in <TT>srcChars</TT>
  831. * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
  832. * FALSE otherwise
  833. * @stable ICU 2.0
  834. */
  835. inline UBool startsWith(const UChar *srcChars,
  836. int32_t srcLength) const;
  837. /**
  838. * Determine if this ends with the characters in <TT>srcChars</TT>
  839. * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  840. * @param srcChars The characters to match.
  841. * @param srcStart the offset into <TT>srcText</TT> to start matching
  842. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  843. * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
  844. * @stable ICU 2.0
  845. */
  846. inline UBool startsWith(const UChar *srcChars,
  847. int32_t srcStart,
  848. int32_t srcLength) const;
  849. /**
  850. * Determine if this ends with the characters in <TT>text</TT>
  851. * @param text The text to match.
  852. * @return TRUE if this ends with the characters in <TT>text</TT>,
  853. * FALSE otherwise
  854. * @stable ICU 2.0
  855. */
  856. inline UBool endsWith(const UnicodeString& text) const;
  857. /**
  858. * Determine if this ends with the characters in <TT>srcText</TT>
  859. * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  860. * @param srcText The text to match.
  861. * @param srcStart the offset into <TT>srcText</TT> to start matching
  862. * @param srcLength the number of characters in <TT>srcText</TT> to match
  863. * @return TRUE if this ends with the characters in <TT>text</TT>,
  864. * FALSE otherwise
  865. * @stable ICU 2.0
  866. */
  867. inline UBool endsWith(const UnicodeString& srcText,
  868. int32_t srcStart,
  869. int32_t srcLength) const;
  870. /**
  871. * Determine if this ends with the characters in <TT>srcChars</TT>
  872. * @param srcChars The characters to match.
  873. * @param srcLength the number of characters in <TT>srcChars</TT>
  874. * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
  875. * FALSE otherwise
  876. * @stable ICU 2.0
  877. */
  878. inline UBool endsWith(const UChar *srcChars,
  879. int32_t srcLength) const;
  880. /**
  881. * Determine if this ends with the characters in <TT>srcChars</TT>
  882. * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  883. * @param srcChars The characters to match.
  884. * @param srcStart the offset into <TT>srcText</TT> to start matching
  885. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  886. * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
  887. * FALSE otherwise
  888. * @stable ICU 2.0
  889. */
  890. inline UBool endsWith(const UChar *srcChars,
  891. int32_t srcStart,
  892. int32_t srcLength) const;
  893. /* Searching - bitwise only */
  894. /**
  895. * Locate in this the first occurrence of the characters in <TT>text</TT>,
  896. * using bitwise comparison.
  897. * @param text The text to search for.
  898. * @return The offset into this of the start of <TT>text</TT>,
  899. * or -1 if not found.
  900. * @stable ICU 2.0
  901. */
  902. inline int32_t indexOf(const UnicodeString& text) const;
  903. /**
  904. * Locate in this the first occurrence of the characters in <TT>text</TT>
  905. * starting at offset <TT>start</TT>, using bitwise comparison.
  906. * @param text The text to search for.
  907. * @param start The offset at which searching will start.
  908. * @return The offset into this of the start of <TT>text</TT>,
  909. * or -1 if not found.
  910. * @stable ICU 2.0
  911. */
  912. inline int32_t indexOf(const UnicodeString& text,
  913. int32_t start) const;
  914. /**
  915. * Locate in this the first occurrence in the range
  916. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  917. * in <TT>text</TT>, using bitwise comparison.
  918. * @param text The text to search for.
  919. * @param start The offset at which searching will start.
  920. * @param length The number of characters to search
  921. * @return The offset into this of the start of <TT>text</TT>,
  922. * or -1 if not found.
  923. * @stable ICU 2.0
  924. */
  925. inline int32_t indexOf(const UnicodeString& text,
  926. int32_t start,
  927. int32_t length) const;
  928. /**
  929. * Locate in this the first occurrence in the range
  930. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  931. * in <TT>srcText</TT> in the range
  932. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  933. * using bitwise comparison.
  934. * @param srcText The text to search for.
  935. * @param srcStart the offset into <TT>srcText</TT> at which
  936. * to start matching
  937. * @param srcLength the number of characters in <TT>srcText</TT> to match
  938. * @param start the offset into this at which to start matching
  939. * @param length the number of characters in this to search
  940. * @return The offset into this of the start of <TT>text</TT>,
  941. * or -1 if not found.
  942. * @stable ICU 2.0
  943. */
  944. inline int32_t indexOf(const UnicodeString& srcText,
  945. int32_t srcStart,
  946. int32_t srcLength,
  947. int32_t start,
  948. int32_t length) const;
  949. /**
  950. * Locate in this the first occurrence of the characters in
  951. * <TT>srcChars</TT>
  952. * starting at offset <TT>start</TT>, using bitwise comparison.
  953. * @param srcChars The text to search for.
  954. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  955. * @param start the offset into this at which to start matching
  956. * @return The offset into this of the start of <TT>text</TT>,
  957. * or -1 if not found.
  958. * @stable ICU 2.0
  959. */
  960. inline int32_t indexOf(const UChar *srcChars,
  961. int32_t srcLength,
  962. int32_t start) const;
  963. /**
  964. * Locate in this the first occurrence in the range
  965. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  966. * in <TT>srcChars</TT>, using bitwise comparison.
  967. * @param srcChars The text to search for.
  968. * @param srcLength the number of characters in <TT>srcChars</TT>
  969. * @param start The offset at which searching will start.
  970. * @param length The number of characters to search
  971. * @return The offset into this of the start of <TT>srcChars</TT>,
  972. * or -1 if not found.
  973. * @stable ICU 2.0
  974. */
  975. inline int32_t indexOf(const UChar *srcChars,
  976. int32_t srcLength,
  977. int32_t start,
  978. int32_t length) const;
  979. /**
  980. * Locate in this the first occurrence in the range
  981. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  982. * in <TT>srcChars</TT> in the range
  983. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  984. * using bitwise comparison.
  985. * @param srcChars The text to search for.
  986. * @param srcStart the offset into <TT>srcChars</TT> at which
  987. * to start matching
  988. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  989. * @param start the offset into this at which to start matching
  990. * @param length the number of characters in this to search
  991. * @return The offset into this of the start of <TT>text</TT>,
  992. * or -1 if not found.
  993. * @stable ICU 2.0
  994. */
  995. int32_t indexOf(const UChar *srcChars,
  996. int32_t srcStart,
  997. int32_t srcLength,
  998. int32_t start,
  999. int32_t length) const;
  1000. /**
  1001. * Locate in this the first occurrence of the BMP code point <code>c</code>,
  1002. * using bitwise comparison.
  1003. * @param c The code unit to search for.
  1004. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1005. * @stable ICU 2.0
  1006. */
  1007. inline int32_t indexOf(UChar c) const;
  1008. /**
  1009. * Locate in this the first occurrence of the code point <TT>c</TT>,
  1010. * using bitwise comparison.
  1011. *
  1012. * @param c The code point to search for.
  1013. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1014. * @stable ICU 2.0
  1015. */
  1016. inline int32_t indexOf(UChar32 c) const;
  1017. /**
  1018. * Locate in this the first occurrence of the BMP code point <code>c</code>,
  1019. * starting at offset <TT>start</TT>, using bitwise comparison.
  1020. * @param c The code unit to search for.
  1021. * @param start The offset at which searching will start.
  1022. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1023. * @stable ICU 2.0
  1024. */
  1025. inline int32_t indexOf(UChar c,
  1026. int32_t start) const;
  1027. /**
  1028. * Locate in this the first occurrence of the code point <TT>c</TT>
  1029. * starting at offset <TT>start</TT>, using bitwise comparison.
  1030. *
  1031. * @param c The code point to search for.
  1032. * @param start The offset at which searching will start.
  1033. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1034. * @stable ICU 2.0
  1035. */
  1036. inline int32_t indexOf(UChar32 c,
  1037. int32_t start) const;
  1038. /**
  1039. * Locate in this the first occurrence of the BMP code point <code>c</code>
  1040. * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1041. * using bitwise comparison.
  1042. * @param c The code unit to search for.
  1043. * @param start the offset into this at which to start matching
  1044. * @param length the number of characters in this to search
  1045. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1046. * @stable ICU 2.0
  1047. */
  1048. inline int32_t indexOf(UChar c,
  1049. int32_t start,
  1050. int32_t length) const;
  1051. /**
  1052. * Locate in this the first occurrence of the code point <TT>c</TT>
  1053. * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1054. * using bitwise comparison.
  1055. *
  1056. * @param c The code point to search for.
  1057. * @param start the offset into this at which to start matching
  1058. * @param length the number of characters in this to search
  1059. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1060. * @stable ICU 2.0
  1061. */
  1062. inline int32_t indexOf(UChar32 c,
  1063. int32_t start,
  1064. int32_t length) const;
  1065. /**
  1066. * Locate in this the last occurrence of the characters in <TT>text</TT>,
  1067. * using bitwise comparison.
  1068. * @param text The text to search for.
  1069. * @return The offset into this of the start of <TT>text</TT>,
  1070. * or -1 if not found.
  1071. * @stable ICU 2.0
  1072. */
  1073. inline int32_t lastIndexOf(const UnicodeString& text) const;
  1074. /**
  1075. * Locate in this the last occurrence of the characters in <TT>text</TT>
  1076. * starting at offset <TT>start</TT>, using bitwise comparison.
  1077. * @param text The text to search for.
  1078. * @param start The offset at which searching will start.
  1079. * @return The offset into this of the start of <TT>text</TT>,
  1080. * or -1 if not found.
  1081. * @stable ICU 2.0
  1082. */
  1083. inline int32_t lastIndexOf(const UnicodeString& text,
  1084. int32_t start) const;
  1085. /**
  1086. * Locate in this the last occurrence in the range
  1087. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1088. * in <TT>text</TT>, using bitwise comparison.
  1089. * @param text The text to search for.
  1090. * @param start The offset at which searching will start.
  1091. * @param length The number of characters to search
  1092. * @return The offset into this of the start of <TT>text</TT>,
  1093. * or -1 if not found.
  1094. * @stable ICU 2.0
  1095. */
  1096. inline int32_t lastIndexOf(const UnicodeString& text,
  1097. int32_t start,
  1098. int32_t length) const;
  1099. /**
  1100. * Locate in this the last occurrence in the range
  1101. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1102. * in <TT>srcText</TT> in the range
  1103. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1104. * using bitwise comparison.
  1105. * @param srcText The text to search for.
  1106. * @param srcStart the offset into <TT>srcText</TT> at which
  1107. * to start matching
  1108. * @param srcLength the number of characters in <TT>srcText</TT> to match
  1109. * @param start the offset into this at which to start matching
  1110. * @param length the number of characters in this to search
  1111. * @return The offset into this of the start of <TT>text</TT>,
  1112. * or -1 if not found.
  1113. * @stable ICU 2.0
  1114. */
  1115. inline int32_t lastIndexOf(const UnicodeString& srcText,
  1116. int32_t srcStart,
  1117. int32_t srcLength,
  1118. int32_t start,
  1119. int32_t length) const;
  1120. /**
  1121. * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
  1122. * starting at offset <TT>start</TT>, using bitwise comparison.
  1123. * @param srcChars The text to search for.
  1124. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1125. * @param start the offset into this at which to start matching
  1126. * @return The offset into this of the start of <TT>text</TT>,
  1127. * or -1 if not found.
  1128. * @stable ICU 2.0
  1129. */
  1130. inline int32_t lastIndexOf(const UChar *srcChars,
  1131. int32_t srcLength,
  1132. int32_t start) const;
  1133. /**
  1134. * Locate in this the last occurrence in the range
  1135. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1136. * in <TT>srcChars</TT>, using bitwise comparison.
  1137. * @param srcChars The text to search for.
  1138. * @param srcLength the number of characters in <TT>srcChars</TT>
  1139. * @param start The offset at which searching will start.
  1140. * @param length The number of characters to search
  1141. * @return The offset into this of the start of <TT>srcChars</TT>,
  1142. * or -1 if not found.
  1143. * @stable ICU 2.0
  1144. */
  1145. inline int32_t lastIndexOf(const UChar *srcChars,
  1146. int32_t srcLength,
  1147. int32_t start,
  1148. int32_t length) const;
  1149. /**
  1150. * Locate in this the last occurrence in the range
  1151. * [<TT>start</TT>, <TT>start + length</TT>) of the characters
  1152. * in <TT>srcChars</TT> in the range
  1153. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
  1154. * using bitwise comparison.
  1155. * @param srcChars The text to search for.
  1156. * @param srcStart the offset into <TT>srcChars</TT> at which
  1157. * to start matching
  1158. * @param srcLength the number of characters in <TT>srcChars</TT> to match
  1159. * @param start the offset into this at which to start matching
  1160. * @param length the number of characters in this to search
  1161. * @return The offset into this of the start of <TT>text</TT>,
  1162. * or -1 if not found.
  1163. * @stable ICU 2.0
  1164. */
  1165. int32_t lastIndexOf(const UChar *srcChars,
  1166. int32_t srcStart,
  1167. int32_t srcLength,
  1168. int32_t start,
  1169. int32_t length) const;
  1170. /**
  1171. * Locate in this the last occurrence of the BMP code point <code>c</code>,
  1172. * using bitwise comparison.
  1173. * @param c The code unit to search for.
  1174. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1175. * @stable ICU 2.0
  1176. */
  1177. inline int32_t lastIndexOf(UChar c) const;
  1178. /**
  1179. * Locate in this the last occurrence of the code point <TT>c</TT>,
  1180. * using bitwise comparison.
  1181. *
  1182. * @param c The code point to search for.
  1183. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1184. * @stable ICU 2.0
  1185. */
  1186. inline int32_t lastIndexOf(UChar32 c) const;
  1187. /**
  1188. * Locate in this the last occurrence of the BMP code point <code>c</code>
  1189. * starting at offset <TT>start</TT>, using bitwise comparison.
  1190. * @param c The code unit to search for.
  1191. * @param start The offset at which searching will start.
  1192. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1193. * @stable ICU 2.0
  1194. */
  1195. inline int32_t lastIndexOf(UChar c,
  1196. int32_t start) const;
  1197. /**
  1198. * Locate in this the last occurrence of the code point <TT>c</TT>
  1199. * starting at offset <TT>start</TT>, using bitwise comparison.
  1200. *
  1201. * @param c The code point to search for.
  1202. * @param start The offset at which searching will start.
  1203. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1204. * @stable ICU 2.0
  1205. */
  1206. inline int32_t lastIndexOf(UChar32 c,
  1207. int32_t start) const;
  1208. /**
  1209. * Locate in this the last occurrence of the BMP code point <code>c</code>
  1210. * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1211. * using bitwise comparison.
  1212. * @param c The code unit to search for.
  1213. * @param start the offset into this at which to start matching
  1214. * @param length the number of characters in this to search
  1215. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1216. * @stable ICU 2.0
  1217. */
  1218. inline int32_t lastIndexOf(UChar c,
  1219. int32_t start,
  1220. int32_t length) const;
  1221. /**
  1222. * Locate in this the last occurrence of the code point <TT>c</TT>
  1223. * in the range [<TT>start</TT>, <TT>start + length</TT>),
  1224. * using bitwise comparison.
  1225. *
  1226. * @param c The code point to search for.
  1227. * @param start the offset into this at which to start matching
  1228. * @param length the number of characters in this to search
  1229. * @return The offset into this of <TT>c</TT>, or -1 if not found.
  1230. * @stable ICU 2.0
  1231. */
  1232. inline int32_t lastIndexOf(UChar32 c,
  1233. int32_t start,
  1234. int32_t length) const;
  1235. /* Character access */
  1236. /**
  1237. * Return the code unit at offset <tt>offset</tt>.
  1238. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1239. * @param offset a valid offset into the text
  1240. * @return the code unit at offset <tt>offset</tt>
  1241. * or 0xffff if the offset is not valid for this string
  1242. * @stable ICU 2.0
  1243. */
  1244. inline UChar charAt(int32_t offset) const;
  1245. /**
  1246. * Return the code unit at offset <tt>offset</tt>.
  1247. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1248. * @param offset a valid offset into the text
  1249. * @return the code unit at offset <tt>offset</tt>
  1250. * @stable ICU 2.0
  1251. */
  1252. inline UChar operator[] (int32_t offset) const;
  1253. /**
  1254. * Return the code point that contains the code unit
  1255. * at offset <tt>offset</tt>.
  1256. * If the offset is not valid (0..length()-1) then U+ffff is returned.
  1257. * @param offset a valid offset into the text
  1258. * that indicates the text offset of any of the code units
  1259. * that will be assembled into a code point (21-bit value) and returned
  1260. * @return the code point of text at <tt>offset</tt>
  1261. * or 0xffff if the offset is not valid for this string
  1262. * @stable ICU 2.0
  1263. */
  1264. UChar32 char32At(int32_t offset) const;
  1265. /**
  1266. * Adjust a random-access offset so that
  1267. * it points to the beginning of a Unicode character.
  1268. * The offset that is passed in points to
  1269. * any code unit of a code point,
  1270. * while the returned offset will point to the first code unit
  1271. * of the same code point.
  1272. * In UTF-16, if the input offset points to a second surrogate
  1273. * of a surrogate pair, then the returned offset will point
  1274. * to the first surrogate.
  1275. * @param offset a valid offset into one code point of the text
  1276. * @return offset of the first code unit of the same code point
  1277. * @see U16_SET_CP_START
  1278. * @stable ICU 2.0
  1279. */
  1280. int32_t getChar32Start(int32_t offset) const;
  1281. /**
  1282. * Adjust a random-access offset so that
  1283. * it points behind a Unicode character.
  1284. * The offset that is passed in points behind
  1285. * any code unit of a code point,
  1286. * while the returned offset will point behind the last code unit
  1287. * of the same code point.
  1288. * In UTF-16, if the input offset points behind the first surrogate
  1289. * (i.e., to the second surrogate)
  1290. * of a surrogate pair, then the returned offset will point
  1291. * behind the second surrogate (i.e., to the first surrogate).
  1292. * @param offset a valid offset after any code unit of a code point of the text
  1293. * @return offset of the first code unit after the same code point
  1294. * @see U16_SET_CP_LIMIT
  1295. * @stable ICU 2.0
  1296. */
  1297. int32_t getChar32Limit(int32_t offset) const;
  1298. /**
  1299. * Move the code unit index along the string by delta code points.
  1300. * Interpret the input index as a code unit-based offset into the string,
  1301. * move the index forward or backward by delta code points, and
  1302. * return the resulting index.
  1303. * The input index should point to the first code unit of a code point,
  1304. * if there is more than one.
  1305. *
  1306. * Both input and output indexes are code unit-based as for all
  1307. * string indexes/offsets in ICU (and other libraries, like MBCS char*).
  1308. * If delta<0 then the index is moved backward (toward the start of the string).
  1309. * If delta>0 then the index is moved forward (toward the end of the string).
  1310. *
  1311. * This behaves like CharacterIterator::move32(delta, kCurrent).
  1312. *
  1313. * Behavior for out-of-bounds indexes:
  1314. * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
  1315. * if the input index<0 then it is pinned to 0;
  1316. * if it is index>length() then it is pinned to length().
  1317. * Afterwards, the index is moved by <code>delta</code> code points
  1318. * forward or backward,
  1319. * but no further backward than to 0 and no further forward than to length().
  1320. * The resulting index return value will be in between 0 and length(), inclusively.
  1321. *
  1322. * Examples:
  1323. * <pre>
  1324. * // s has code points 'a' U+10000 'b' U+10ffff U+2029
  1325. * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
  1326. *
  1327. * // initial index: position of U+10000
  1328. * int32_t index=1;
  1329. *
  1330. * // the following examples will all result in index==4, position of U+10ffff
  1331. *
  1332. * // skip 2 code points from some position in the string
  1333. * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
  1334. *
  1335. * // go to the 3rd code point from the start of s (0-based)
  1336. * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
  1337. *
  1338. * // go to the next-to-last code point of s
  1339. * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
  1340. * </pre>
  1341. *
  1342. * @param index input code unit index
  1343. * @param delta (signed) code point count to move the index forward or backward
  1344. * in the string
  1345. * @return the resulting code unit index
  1346. * @stable ICU 2.0
  1347. */
  1348. int32_t moveIndex32(int32_t index, int32_t delta) const;
  1349. /* Substring extraction */
  1350. /**
  1351. * Copy the characters in the range
  1352. * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
  1353. * beginning at <tt>dstStart</tt>.
  1354. * If the string aliases to <code>dst</code> itself as an external buffer,
  1355. * then extract() will not copy the contents.
  1356. *
  1357. * @param start offset of first character which will be copied into the array
  1358. * @param length the number of characters to extract
  1359. * @param dst array in which to copy characters. The length of <tt>dst</tt>
  1360. * must be at least (<tt>dstStart + length</tt>).
  1361. * @param dstStart the offset in <TT>dst</TT> where the first character
  1362. * will be extracted
  1363. * @stable ICU 2.0
  1364. */
  1365. inline void extract(int32_t start,
  1366. int32_t length,
  1367. UChar *dst,
  1368. int32_t dstStart = 0) const;
  1369. /**
  1370. * Copy the contents of the string into dest.
  1371. * This is a convenience function that
  1372. * checks if there is enough space in dest,
  1373. * extracts the entire string if possible,
  1374. * and NUL-terminates dest if possible.
  1375. *
  1376. * If the string fits into dest but cannot be NUL-terminated
  1377. * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
  1378. * If the string itself does not fit into dest
  1379. * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
  1380. *
  1381. * If the string aliases to <code>dest</code> itself as an external buffer,
  1382. * then extract() will not copy the contents.
  1383. *
  1384. * @param dest Destination string buffer.
  1385. * @param destCapacity Number of UChars available at dest.
  1386. * @param errorCode ICU error code.
  1387. * @return length()
  1388. * @stable ICU 2.0
  1389. */
  1390. int32_t
  1391. extract(UChar *dest, int32_t destCapacity,
  1392. UErrorCode &errorCode) const;
  1393. /**
  1394. * Copy the characters in the range
  1395. * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
  1396. * <tt>target</tt>.
  1397. * @param start offset of first character which will be copied
  1398. * @param length the number of characters to extract
  1399. * @param target UnicodeString into which to copy characters.
  1400. * @return A reference to <TT>target</TT>
  1401. * @stable ICU 2.0
  1402. */
  1403. inline void extract(int32_t start,
  1404. int32_t length,
  1405. UnicodeString& target) const;
  1406. /**
  1407. * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1408. * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
  1409. * @param start offset of first character which will be copied into the array
  1410. * @param limit offset immediately following the last character to be copied
  1411. * @param dst array in which to copy characters. The length of <tt>dst</tt>
  1412. * must be at least (<tt>dstStart + (limit - start)</tt>).
  1413. * @param dstStart the offset in <TT>dst</TT> where the first character
  1414. * will be extracted
  1415. * @stable ICU 2.0
  1416. */
  1417. inline void extractBetween(int32_t start,
  1418. int32_t limit,
  1419. UChar *dst,
  1420. int32_t dstStart = 0) const;
  1421. /**
  1422. * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
  1423. * into the UnicodeString <tt>target</tt>. Replaceable API.
  1424. * @param start offset of first character which will be copied
  1425. * @param limit offset immediately following the last character to be copied
  1426. * @param target UnicodeString into which to copy characters.
  1427. * @return A reference to <TT>target</TT>
  1428. * @stable ICU 2.0
  1429. */
  1430. virtual void extractBetween(int32_t start,
  1431. int32_t limit,
  1432. UnicodeString& target) const;
  1433. /**
  1434. * Copy the characters in the range
  1435. * [<tt>start</TT>, <tt>start + startLength</TT>) into an array of characters.
  1436. * All characters must be invariant (see utypes.h).
  1437. * Use US_INV as the last, signature-distinguishing parameter.
  1438. *
  1439. * This function does not write any more than <code>targetCapacity</code>
  1440. * characters but returns the length of the entire output string
  1441. * so that one can allocate a larger buffer and call the function again
  1442. * if necessary.
  1443. * The output string is NUL-terminated if possible.
  1444. *
  1445. * @param start offset of first character which will be copied
  1446. * @param startLength the number of characters to extract
  1447. * @param target the target buffer for extraction, can be NULL
  1448. * if targetLength is 0
  1449. * @param targetCapacity the length of the target buffer
  1450. * @param inv Signature-distinguishing paramater, use US_INV.
  1451. * @return the output string length, not including the terminating NUL
  1452. * @stable ICU 3.2
  1453. */
  1454. int32_t extract(int32_t start,
  1455. int32_t startLength,
  1456. char *target,
  1457. int32_t targetCapacity,
  1458. enum EInvariant inv) const;
  1459. #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  1460. /**
  1461. * Copy the characters in the range
  1462. * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1463. * in the platform's default codepage.
  1464. * This function does not write any more than <code>targetLength</code>
  1465. * characters but returns the length of the entire output string
  1466. * so that one can allocate a larger buffer and call the function again
  1467. * if necessary.
  1468. * The output string is NUL-terminated if possible.
  1469. *
  1470. * @param start offset of first character which will be copied
  1471. * @param startLength the number of characters to extract
  1472. * @param target the target buffer for extraction
  1473. * @param targetLength the length of the target buffer
  1474. * If <TT>target</TT> is NULL, then the number of bytes required for
  1475. * <TT>target</TT> is returned.
  1476. * @return the output string length, not including the terminating NUL
  1477. * @stable ICU 2.0
  1478. */
  1479. int32_t extract(int32_t start,
  1480. int32_t startLength,
  1481. char *target,
  1482. uint32_t targetLength) const;
  1483. #endif
  1484. #if !UCONFIG_NO_CONVERSION
  1485. /**
  1486. * Copy the characters in the range
  1487. * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1488. * in a specified codepage.
  1489. * The output string is NUL-terminated.
  1490. *
  1491. * Recommendation: For invariant-character strings use
  1492. * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1493. * because it avoids object code dependencies of UnicodeString on
  1494. * the conversion code.
  1495. *
  1496. * @param start offset of first character which will be copied
  1497. * @param startLength the number of characters to extract
  1498. * @param target the target buffer for extraction
  1499. * @param codepage the desired codepage for the characters. 0 has
  1500. * the special meaning of the default codepage
  1501. * If <code>codepage</code> is an empty string (<code>""</code>),
  1502. * then a simple conversion is performed on the codepage-invariant
  1503. * subset ("invariant characters") of the platform encoding. See utypes.h.
  1504. * If <TT>target</TT> is NULL, then the number of bytes required for
  1505. * <TT>target</TT> is returned. It is assumed that the target is big enough
  1506. * to fit all of the characters.
  1507. * @return the output string length, not including the terminating NUL
  1508. * @stable ICU 2.0
  1509. */
  1510. inline int32_t extract(int32_t start,
  1511. int32_t startLength,
  1512. char *target,
  1513. const char *codepage = 0) const;
  1514. /**
  1515. * Copy the characters in the range
  1516. * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
  1517. * in a specified codepage.
  1518. * This function does not write any more than <code>targetLength</code>
  1519. * characters but returns the length of the entire output string
  1520. * so that one can allocate a larger buffer and call the function again
  1521. * if necessary.
  1522. * The output string is NUL-terminated if possible.
  1523. *
  1524. * Recommendation: For invariant-character strings use
  1525. * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
  1526. * because it avoids object code dependencies of UnicodeString on
  1527. * the conversion code.
  1528. *
  1529. * @param start offset of first character which will be copied
  1530. * @param startLength the number of characters to extract
  1531. * @param target the target buffer for extraction
  1532. * @param targetLength the length of the target buffer
  1533. * @param codepage the desired codepage for the characters. 0 has
  1534. * the special meaning of the default codepage
  1535. * If <code>codepage</code> is an empty string (<code>""</code>),
  1536. * then a simple conversion is performed on the codepage-invariant
  1537. * subset ("invariant characters") of the platform encoding. See utypes.h.
  1538. * If <TT>target</TT> is NULL, then the number of bytes required for
  1539. * <TT>target</TT> is returned.
  1540. * @return the output string length, not including the terminating NUL
  1541. * @stable ICU 2.0
  1542. */
  1543. int32_t extract(int32_t start,
  1544. int32_t startLength,
  1545. char *target,
  1546. uint32_t targetLength,
  1547. const char *codepage) const;
  1548. /**
  1549. * Convert the UnicodeString into a codepage string using an existing UConverter.
  1550. * The output string is NUL-terminated if possible.
  1551. *
  1552. * This function avoids the overhead of opening and closing a converter if
  1553. * multiple strings are extracted.
  1554. *
  1555. * @param dest destination string buffer, can be NULL if destCapacity==0
  1556. * @param destCapacity the number of chars available at dest
  1557. * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
  1558. * or NULL for the default converter
  1559. * @param errorCode normal ICU error code
  1560. * @return the length of the output string, not counting the terminating NUL;
  1561. * if the length is greater than destCapacity, then the string will not fit
  1562. * and a buffer of the indicated length would need to be passed in
  1563. * @stable ICU 2.0
  1564. */
  1565. int32_t extract(char *dest, int32_t destCapacity,
  1566. UConverter *cnv,
  1567. UErrorCode &errorCode) const;
  1568. #endif
  1569. /**
  1570. * Create a temporary substring for the specified range.
  1571. * Unlike the substring constructor and setTo() functions,
  1572. * the object returned here will be a read-only alias (using getBuffer())
  1573. * rather than copying the text.
  1574. * As a result, this substring operation is much faster but requires
  1575. * that the original string not be modified or deleted during the lifetime
  1576. * of the returned substring object.
  1577. * @param start offset of the first character visible in the substring
  1578. * @param length length of the substring
  1579. * @return a read-only alias UnicodeString object for the substring
  1580. * @stable ICU 4.4
  1581. */
  1582. UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
  1583. /**
  1584. * Create a temporary substring for the specified range.
  1585. * Same as tempSubString(start, length) except that the substring range
  1586. * is specified as a (start, limit) pair (with an exclusive limit index)
  1587. * rather than a (start, length) pair.
  1588. * @param start offset of the first character visible in the substring
  1589. * @param limit offset immediately following the last character visible in the substring
  1590. * @return a read-only alias UnicodeString object for the substring
  1591. * @stable ICU 4.4
  1592. */
  1593. inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
  1594. /**
  1595. * Convert the UnicodeString to UTF-8 and write the result
  1596. * to a ByteSink. This is called by toUTF8String().
  1597. * Unpaired surrogates are replaced with U+FFFD.
  1598. * Calls u_strToUTF8WithSub().
  1599. *
  1600. * @param sink A ByteSink to which the UTF-8 version of the string is written.
  1601. * sink.Flush() is called at the end.
  1602. * @stable ICU 4.2
  1603. * @see toUTF8String
  1604. */
  1605. void toUTF8(ByteSink &sink) const;
  1606. #if U_HAVE_STD_STRING
  1607. /**
  1608. * Convert the UnicodeString to UTF-8 and append the result
  1609. * to a standard string.
  1610. * Unpaired surrogates are replaced with U+FFFD.
  1611. * Calls toUTF8().
  1612. *
  1613. * @param result A standard string (or a compatible object)
  1614. * to which the UTF-8 version of the string is appended.
  1615. * @return The string object.
  1616. * @stable ICU 4.2
  1617. * @see toUTF8
  1618. */
  1619. template<typename StringClass>
  1620. StringClass &toUTF8String(StringClass &result) const {
  1621. StringByteSink<StringClass> sbs(&result);
  1622. toUTF8(sbs);
  1623. return result;
  1624. }
  1625. #endif
  1626. /**
  1627. * Convert the UnicodeString to UTF-32.
  1628. * Unpaired surrogates are replaced with U+FFFD.
  1629. * Calls u_strToUTF32WithSub().
  1630. *
  1631. * @param utf32 destination string buffer, can be NULL if capacity==0
  1632. * @param capacity the number of UChar32s available at utf32
  1633. * @param errorCode Standard ICU error code. Its input value must
  1634. * pass the U_SUCCESS() test, or else the function returns
  1635. * immediately. Check for U_FAILURE() on output or use with
  1636. * function chaining. (See User Guide for details.)
  1637. * @return The length of the UTF-32 string.
  1638. * @see fromUTF32
  1639. * @stable ICU 4.2
  1640. */
  1641. int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
  1642. /* Length operations */
  1643. /**
  1644. * Return the length of the UnicodeString object.
  1645. * The length is the number of UChar code units are in the UnicodeString.
  1646. * If you want the number of code points, please use countChar32().
  1647. * @return the length of the UnicodeString object
  1648. * @see countChar32
  1649. * @stable ICU 2.0
  1650. */
  1651. inline int32_t length(void) const;
  1652. /**
  1653. * Count Unicode code points in the length UChar code units of the string.
  1654. * A code point may occupy either one or two UChar code units.
  1655. * Counting code points involves reading all code units.
  1656. *
  1657. * This functions is basically the inverse of moveIndex32().
  1658. *
  1659. * @param start the index of the first code unit to check
  1660. * @param length the number of UChar code units to check
  1661. * @return the number of code points in the specified code units
  1662. * @see length
  1663. * @stable ICU 2.0
  1664. */
  1665. int32_t
  1666. countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
  1667. /**
  1668. * Check if the length UChar code units of the string
  1669. * contain more Unicode code points than a certain number.
  1670. * This is more efficient than counting all code points in this part of the string
  1671. * and comparing that number with a threshold.
  1672. * This function may not need to scan the string at all if the length
  1673. * falls within a certain range, and
  1674. * never needs to count more than 'number+1' code points.
  1675. * Logically equivalent to (countChar32(start, length)>number).
  1676. * A Unicode code point may occupy either one or two UChar code units.
  1677. *
  1678. * @param start the index of the first code unit to check (0 for the entire string)
  1679. * @param length the number of UChar code units to check
  1680. * (use INT32_MAX for the entire string; remember that start/length
  1681. * values are pinned)
  1682. * @param number The number of code points in the (sub)string is compared against
  1683. * the 'number' parameter.
  1684. * @return Boolean value for whether the string contains more Unicode code points
  1685. * than 'number'. Same as (u_countChar32(s, length)>number).
  1686. * @see countChar32
  1687. * @see u_strHasMoreChar32Than
  1688. * @stable ICU 2.4
  1689. */
  1690. UBool
  1691. hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
  1692. /**
  1693. * Determine if this string is empty.
  1694. * @return TRUE if this string contains 0 characters, FALSE otherwise.
  1695. * @stable ICU 2.0
  1696. */
  1697. inline UBool isEmpty(void) const;
  1698. /**
  1699. * Return the capacity of the internal buffer of the UnicodeString object.
  1700. * This is useful together with the getBuffer functions.
  1701. * See there for details.
  1702. *
  1703. * @return the number of UChars available in the internal buffer
  1704. * @see getBuffer
  1705. * @stable ICU 2.0
  1706. */
  1707. inline int32_t getCapacity(void) const;
  1708. /* Other operations */
  1709. /**
  1710. * Generate a hash code for this object.
  1711. * @return The hash code of this UnicodeString.
  1712. * @stable ICU 2.0
  1713. */
  1714. inline int32_t hashCode(void) const;
  1715. /**
  1716. * Determine if this object contains a valid string.
  1717. * A bogus string has no value. It is different from an empty string,
  1718. * although in both cases isEmpty() returns TRUE and length() returns 0.
  1719. * setToBogus() and isBogus() can be used to indicate that no string value is available.
  1720. * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
  1721. * length() returns 0.
  1722. *
  1723. * @return TRUE if the string is bogus/invalid, FALSE otherwise
  1724. * @see setToBogus()
  1725. * @stable ICU 2.0
  1726. */
  1727. inline UBool isBogus(void) const;
  1728. //========================================
  1729. // Write operations
  1730. //========================================
  1731. /* Assignment operations */
  1732. /**
  1733. * Assignment operator. Replace the characters in this UnicodeString
  1734. * with the characters from <TT>srcText</TT>.
  1735. *
  1736. * Starting with ICU 2.4, the assignment operator and the copy constructor
  1737. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1738. * By contrast, the fastCopyFrom() function implements the old,
  1739. * more efficient but less safe behavior
  1740. * of making this string also a readonly alias to the same buffer.
  1741. *
  1742. * If the source object has an "open" buffer from getBuffer(minCapacity),
  1743. * then the copy is an empty string.
  1744. *
  1745. * @param srcText The text containing the characters to replace
  1746. * @return a reference to this
  1747. * @stable ICU 2.0
  1748. * @see fastCopyFrom
  1749. */
  1750. UnicodeString &operator=(const UnicodeString &srcText);
  1751. /**
  1752. * Almost the same as the assignment operator.
  1753. * Replace the characters in this UnicodeString
  1754. * with the characters from <code>srcText</code>.
  1755. *
  1756. * This function works the same as the assignment operator
  1757. * for all strings except for ones that are readonly aliases.
  1758. *
  1759. * Starting with ICU 2.4, the assignment operator and the copy constructor
  1760. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  1761. * This function implements the old, more efficient but less safe behavior
  1762. * of making this string also a readonly alias to the same buffer.
  1763. *
  1764. * The fastCopyFrom function must be used only if it is known that the lifetime of
  1765. * this UnicodeString does not exceed the lifetime of the aliased buffer
  1766. * including its contents, for example for strings from resource bundles
  1767. * or aliases to string constants.
  1768. *
  1769. * If the source object has an "open" buffer from getBuffer(minCapacity),
  1770. * then the copy is an empty string.
  1771. *
  1772. * @param src The text containing the characters to replace.
  1773. * @return a reference to this
  1774. * @stable ICU 2.4
  1775. */
  1776. UnicodeString &fastCopyFrom(const UnicodeString &src);
  1777. #if U_HAVE_RVALUE_REFERENCES
  1778. /**
  1779. * Move assignment operator, might leave src in bogus state.
  1780. * This string will have the same contents and state that the source string had.
  1781. * The behavior is undefined if *this and src are the same object.
  1782. * @param src source string
  1783. * @return *this
  1784. * @stable ICU 56
  1785. */
  1786. UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
  1787. return moveFrom(src);
  1788. }
  1789. #endif
  1790. // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
  1791. /**
  1792. * Move assignment, might leave src in bogus state.
  1793. * This string will have the same contents and state that the source string had.
  1794. * The behavior is undefined if *this and src are the same object.
  1795. *
  1796. * Can be called explicitly, does not need C++11 support.
  1797. * @param src source string
  1798. * @return *this
  1799. * @draft ICU 56
  1800. */
  1801. UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
  1802. /**
  1803. * Swap strings.
  1804. * @param other other string
  1805. * @stable ICU 56
  1806. */
  1807. void swap(UnicodeString &other) U_NOEXCEPT;
  1808. /**
  1809. * Non-member UnicodeString swap function.
  1810. * @param s1 will get s2's contents and state
  1811. * @param s2 will get s1's contents and state
  1812. * @stable ICU 56
  1813. */
  1814. friend U_COMMON_API inline void U_EXPORT2
  1815. swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
  1816. s1.swap(s2);
  1817. }
  1818. /**
  1819. * Assignment operator. Replace the characters in this UnicodeString
  1820. * with the code unit <TT>ch</TT>.
  1821. * @param ch the code unit to replace
  1822. * @return a reference to this
  1823. * @stable ICU 2.0
  1824. */
  1825. inline UnicodeString& operator= (UChar ch);
  1826. /**
  1827. * Assignment operator. Replace the characters in this UnicodeString
  1828. * with the code point <TT>ch</TT>.
  1829. * @param ch the code point to replace
  1830. * @return a reference to this
  1831. * @stable ICU 2.0
  1832. */
  1833. inline UnicodeString& operator= (UChar32 ch);
  1834. /**
  1835. * Set the text in the UnicodeString object to the characters
  1836. * in <TT>srcText</TT> in the range
  1837. * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
  1838. * <TT>srcText</TT> is not modified.
  1839. * @param srcText the source for the new characters
  1840. * @param srcStart the offset into <TT>srcText</TT> where new characters
  1841. * will be obtained
  1842. * @return a reference to this
  1843. * @stable ICU 2.2
  1844. */
  1845. inline UnicodeString& setTo(const UnicodeString& srcText,
  1846. int32_t srcStart);
  1847. /**
  1848. * Set the text in the UnicodeString object to the characters
  1849. * in <TT>srcText</TT> in the range
  1850. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  1851. * <TT>srcText</TT> is not modified.
  1852. * @param srcText the source for the new characters
  1853. * @param srcStart the offset into <TT>srcText</TT> where new characters
  1854. * will be obtained
  1855. * @param srcLength the number of characters in <TT>srcText</TT> in the
  1856. * replace string.
  1857. * @return a reference to this
  1858. * @stable ICU 2.0
  1859. */
  1860. inline UnicodeString& setTo(const UnicodeString& srcText,
  1861. int32_t srcStart,
  1862. int32_t srcLength);
  1863. /**
  1864. * Set the text in the UnicodeString object to the characters in
  1865. * <TT>srcText</TT>.
  1866. * <TT>srcText</TT> is not modified.
  1867. * @param srcText the source for the new characters
  1868. * @return a reference to this
  1869. * @stable ICU 2.0
  1870. */
  1871. inline UnicodeString& setTo(const UnicodeString& srcText);
  1872. /**
  1873. * Set the characters in the UnicodeString object to the characters
  1874. * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
  1875. * @param srcChars the source for the new characters
  1876. * @param srcLength the number of Unicode characters in srcChars.
  1877. * @return a reference to this
  1878. * @stable ICU 2.0
  1879. */
  1880. inline UnicodeString& setTo(const UChar *srcChars,
  1881. int32_t srcLength);
  1882. /**
  1883. * Set the characters in the UnicodeString object to the code unit
  1884. * <TT>srcChar</TT>.
  1885. * @param srcChar the code unit which becomes the UnicodeString's character
  1886. * content
  1887. * @return a reference to this
  1888. * @stable ICU 2.0
  1889. */
  1890. UnicodeString& setTo(UChar srcChar);
  1891. /**
  1892. * Set the characters in the UnicodeString object to the code point
  1893. * <TT>srcChar</TT>.
  1894. * @param srcChar the code point which becomes the UnicodeString's character
  1895. * content
  1896. * @return a reference to this
  1897. * @stable ICU 2.0
  1898. */
  1899. UnicodeString& setTo(UChar32 srcChar);
  1900. /**
  1901. * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
  1902. * The text will be used for the UnicodeString object, but
  1903. * it will not be released when the UnicodeString is destroyed.
  1904. * This has copy-on-write semantics:
  1905. * When the string is modified, then the buffer is first copied into
  1906. * newly allocated memory.
  1907. * The aliased buffer is never modified.
  1908. *
  1909. * In an assignment to another UnicodeString, when using the copy constructor
  1910. * or the assignment operator, the text will be copied.
  1911. * When using fastCopyFrom(), the text will be aliased again,
  1912. * so that both strings then alias the same readonly-text.
  1913. *
  1914. * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  1915. * This must be true if <code>textLength==-1</code>.
  1916. * @param text The characters to alias for the UnicodeString.
  1917. * @param textLength The number of Unicode characters in <code>text</code> to alias.
  1918. * If -1, then this constructor will determine the length
  1919. * by calling <code>u_strlen()</code>.
  1920. * @return a reference to this
  1921. * @stable ICU 2.0
  1922. */
  1923. UnicodeString &setTo(UBool isTerminated,
  1924. const UChar *text,
  1925. int32_t textLength);
  1926. /**
  1927. * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
  1928. * The text will be used for the UnicodeString object, but
  1929. * it will not be released when the UnicodeString is destroyed.
  1930. * This has write-through semantics:
  1931. * For as long as the capacity of the buffer is sufficient, write operations
  1932. * will directly affect the buffer. When more capacity is necessary, then
  1933. * a new buffer will be allocated and the contents copied as with regularly
  1934. * constructed strings.
  1935. * In an assignment to another UnicodeString, the buffer will be copied.
  1936. * The extract(UChar *dst) function detects whether the dst pointer is the same
  1937. * as the string buffer itself and will in this case not copy the contents.
  1938. *
  1939. * @param buffer The characters to alias for the UnicodeString.
  1940. * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  1941. * @param buffCapacity The size of <code>buffer</code> in UChars.
  1942. * @return a reference to this
  1943. * @stable ICU 2.0
  1944. */
  1945. UnicodeString &setTo(UChar *buffer,
  1946. int32_t buffLength,
  1947. int32_t buffCapacity);
  1948. /**
  1949. * Make this UnicodeString object invalid.
  1950. * The string will test TRUE with isBogus().
  1951. *
  1952. * A bogus string has no value. It is different from an empty string.
  1953. * It can be used to indicate that no string value is available.
  1954. * getBuffer() and getTerminatedBuffer() return NULL, and
  1955. * length() returns 0.
  1956. *
  1957. * This utility function is used throughout the UnicodeString
  1958. * implementation to indicate that a UnicodeString operation failed,
  1959. * and may be used in other functions,
  1960. * especially but not exclusively when such functions do not
  1961. * take a UErrorCode for simplicity.
  1962. *
  1963. * The following methods, and no others, will clear a string object's bogus flag:
  1964. * - remove()
  1965. * - remove(0, INT32_MAX)
  1966. * - truncate(0)
  1967. * - operator=() (assignment operator)
  1968. * - setTo(...)
  1969. *
  1970. * The simplest ways to turn a bogus string into an empty one
  1971. * is to use the remove() function.
  1972. * Examples for other functions that are equivalent to "set to empty string":
  1973. * \code
  1974. * if(s.isBogus()) {
  1975. * s.remove(); // set to an empty string (remove all), or
  1976. * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
  1977. * s.truncate(0); // set to an empty string (complete truncation), or
  1978. * s=UnicodeString(); // assign an empty string, or
  1979. * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
  1980. * static const UChar nul=0;
  1981. * s.setTo(&nul, 0); // set to an empty C Unicode string
  1982. * }
  1983. * \endcode
  1984. *
  1985. * @see isBogus()
  1986. * @stable ICU 2.0
  1987. */
  1988. void setToBogus();
  1989. /**
  1990. * Set the character at the specified offset to the specified character.
  1991. * @param offset A valid offset into the text of the character to set
  1992. * @param ch The new character
  1993. * @return A reference to this
  1994. * @stable ICU 2.0
  1995. */
  1996. UnicodeString& setCharAt(int32_t offset,
  1997. UChar ch);
  1998. /* Append operations */
  1999. /**
  2000. * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
  2001. * object.
  2002. * @param ch the code unit to be appended
  2003. * @return a reference to this
  2004. * @stable ICU 2.0
  2005. */
  2006. inline UnicodeString& operator+= (UChar ch);
  2007. /**
  2008. * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
  2009. * object.
  2010. * @param ch the code point to be appended
  2011. * @return a reference to this
  2012. * @stable ICU 2.0
  2013. */
  2014. inline UnicodeString& operator+= (UChar32 ch);
  2015. /**
  2016. * Append operator. Append the characters in <TT>srcText</TT> to the
  2017. * UnicodeString object. <TT>srcText</TT> is not modified.
  2018. * @param srcText the source for the new characters
  2019. * @return a reference to this
  2020. * @stable ICU 2.0
  2021. */
  2022. inline UnicodeString& operator+= (const UnicodeString& srcText);
  2023. /**
  2024. * Append the characters
  2025. * in <TT>srcText</TT> in the range
  2026. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
  2027. * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
  2028. * is not modified.
  2029. * @param srcText the source for the new characters
  2030. * @param srcStart the offset into <TT>srcText</TT> where new characters
  2031. * will be obtained
  2032. * @param srcLength the number of characters in <TT>srcText</TT> in
  2033. * the append string
  2034. * @return a reference to this
  2035. * @stable ICU 2.0
  2036. */
  2037. inline UnicodeString& append(const UnicodeString& srcText,
  2038. int32_t srcStart,
  2039. int32_t srcLength);
  2040. /**
  2041. * Append the characters in <TT>srcText</TT> to the UnicodeString object.
  2042. * <TT>srcText</TT> is not modified.
  2043. * @param srcText the source for the new characters
  2044. * @return a reference to this
  2045. * @stable ICU 2.0
  2046. */
  2047. inline UnicodeString& append(const UnicodeString& srcText);
  2048. /**
  2049. * Append the characters in <TT>srcChars</TT> in the range
  2050. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
  2051. * object at offset
  2052. * <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2053. * @param srcChars the source for the new characters
  2054. * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2055. * will be obtained
  2056. * @param srcLength the number of characters in <TT>srcChars</TT> in
  2057. * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
  2058. * @return a reference to this
  2059. * @stable ICU 2.0
  2060. */
  2061. inline UnicodeString& append(const UChar *srcChars,
  2062. int32_t srcStart,
  2063. int32_t srcLength);
  2064. /**
  2065. * Append the characters in <TT>srcChars</TT> to the UnicodeString object
  2066. * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2067. * @param srcChars the source for the new characters
  2068. * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
  2069. * can be -1 if <TT>srcChars</TT> is NUL-terminated
  2070. * @return a reference to this
  2071. * @stable ICU 2.0
  2072. */
  2073. inline UnicodeString& append(const UChar *srcChars,
  2074. int32_t srcLength);
  2075. /**
  2076. * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
  2077. * @param srcChar the code unit to append
  2078. * @return a reference to this
  2079. * @stable ICU 2.0
  2080. */
  2081. inline UnicodeString& append(UChar srcChar);
  2082. /**
  2083. * Append the code point <TT>srcChar</TT> to the UnicodeString object.
  2084. * @param srcChar the code point to append
  2085. * @return a reference to this
  2086. * @stable ICU 2.0
  2087. */
  2088. UnicodeString& append(UChar32 srcChar);
  2089. /* Insert operations */
  2090. /**
  2091. * Insert the characters in <TT>srcText</TT> in the range
  2092. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  2093. * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  2094. * @param start the offset where the insertion begins
  2095. * @param srcText the source for the new characters
  2096. * @param srcStart the offset into <TT>srcText</TT> where new characters
  2097. * will be obtained
  2098. * @param srcLength the number of characters in <TT>srcText</TT> in
  2099. * the insert string
  2100. * @return a reference to this
  2101. * @stable ICU 2.0
  2102. */
  2103. inline UnicodeString& insert(int32_t start,
  2104. const UnicodeString& srcText,
  2105. int32_t srcStart,
  2106. int32_t srcLength);
  2107. /**
  2108. * Insert the characters in <TT>srcText</TT> into the UnicodeString object
  2109. * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
  2110. * @param start the offset where the insertion begins
  2111. * @param srcText the source for the new characters
  2112. * @return a reference to this
  2113. * @stable ICU 2.0
  2114. */
  2115. inline UnicodeString& insert(int32_t start,
  2116. const UnicodeString& srcText);
  2117. /**
  2118. * Insert the characters in <TT>srcChars</TT> in the range
  2119. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
  2120. * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2121. * @param start the offset at which the insertion begins
  2122. * @param srcChars the source for the new characters
  2123. * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2124. * will be obtained
  2125. * @param srcLength the number of characters in <TT>srcChars</TT>
  2126. * in the insert string
  2127. * @return a reference to this
  2128. * @stable ICU 2.0
  2129. */
  2130. inline UnicodeString& insert(int32_t start,
  2131. const UChar *srcChars,
  2132. int32_t srcStart,
  2133. int32_t srcLength);
  2134. /**
  2135. * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
  2136. * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
  2137. * @param start the offset where the insertion begins
  2138. * @param srcChars the source for the new characters
  2139. * @param srcLength the number of Unicode characters in srcChars.
  2140. * @return a reference to this
  2141. * @stable ICU 2.0
  2142. */
  2143. inline UnicodeString& insert(int32_t start,
  2144. const UChar *srcChars,
  2145. int32_t srcLength);
  2146. /**
  2147. * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
  2148. * offset <TT>start</TT>.
  2149. * @param start the offset at which the insertion occurs
  2150. * @param srcChar the code unit to insert
  2151. * @return a reference to this
  2152. * @stable ICU 2.0
  2153. */
  2154. inline UnicodeString& insert(int32_t start,
  2155. UChar srcChar);
  2156. /**
  2157. * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
  2158. * offset <TT>start</TT>.
  2159. * @param start the offset at which the insertion occurs
  2160. * @param srcChar the code point to insert
  2161. * @return a reference to this
  2162. * @stable ICU 2.0
  2163. */
  2164. inline UnicodeString& insert(int32_t start,
  2165. UChar32 srcChar);
  2166. /* Replace operations */
  2167. /**
  2168. * Replace the characters in the range
  2169. * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2170. * <TT>srcText</TT> in the range
  2171. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
  2172. * <TT>srcText</TT> is not modified.
  2173. * @param start the offset at which the replace operation begins
  2174. * @param length the number of characters to replace. The character at
  2175. * <TT>start + length</TT> is not modified.
  2176. * @param srcText the source for the new characters
  2177. * @param srcStart the offset into <TT>srcText</TT> where new characters
  2178. * will be obtained
  2179. * @param srcLength the number of characters in <TT>srcText</TT> in
  2180. * the replace string
  2181. * @return a reference to this
  2182. * @stable ICU 2.0
  2183. */
  2184. UnicodeString& replace(int32_t start,
  2185. int32_t length,
  2186. const UnicodeString& srcText,
  2187. int32_t srcStart,
  2188. int32_t srcLength);
  2189. /**
  2190. * Replace the characters in the range
  2191. * [<TT>start</TT>, <TT>start + length</TT>)
  2192. * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
  2193. * not modified.
  2194. * @param start the offset at which the replace operation begins
  2195. * @param length the number of characters to replace. The character at
  2196. * <TT>start + length</TT> is not modified.
  2197. * @param srcText the source for the new characters
  2198. * @return a reference to this
  2199. * @stable ICU 2.0
  2200. */
  2201. UnicodeString& replace(int32_t start,
  2202. int32_t length,
  2203. const UnicodeString& srcText);
  2204. /**
  2205. * Replace the characters in the range
  2206. * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2207. * <TT>srcChars</TT> in the range
  2208. * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
  2209. * is not modified.
  2210. * @param start the offset at which the replace operation begins
  2211. * @param length the number of characters to replace. The character at
  2212. * <TT>start + length</TT> is not modified.
  2213. * @param srcChars the source for the new characters
  2214. * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2215. * will be obtained
  2216. * @param srcLength the number of characters in <TT>srcChars</TT>
  2217. * in the replace string
  2218. * @return a reference to this
  2219. * @stable ICU 2.0
  2220. */
  2221. UnicodeString& replace(int32_t start,
  2222. int32_t length,
  2223. const UChar *srcChars,
  2224. int32_t srcStart,
  2225. int32_t srcLength);
  2226. /**
  2227. * Replace the characters in the range
  2228. * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
  2229. * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
  2230. * @param start the offset at which the replace operation begins
  2231. * @param length number of characters to replace. The character at
  2232. * <TT>start + length</TT> is not modified.
  2233. * @param srcChars the source for the new characters
  2234. * @param srcLength the number of Unicode characters in srcChars
  2235. * @return a reference to this
  2236. * @stable ICU 2.0
  2237. */
  2238. inline UnicodeString& replace(int32_t start,
  2239. int32_t length,
  2240. const UChar *srcChars,
  2241. int32_t srcLength);
  2242. /**
  2243. * Replace the characters in the range
  2244. * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
  2245. * <TT>srcChar</TT>.
  2246. * @param start the offset at which the replace operation begins
  2247. * @param length the number of characters to replace. The character at
  2248. * <TT>start + length</TT> is not modified.
  2249. * @param srcChar the new code unit
  2250. * @return a reference to this
  2251. * @stable ICU 2.0
  2252. */
  2253. inline UnicodeString& replace(int32_t start,
  2254. int32_t length,
  2255. UChar srcChar);
  2256. /**
  2257. * Replace the characters in the range
  2258. * [<TT>start</TT>, <TT>start + length</TT>) with the code point
  2259. * <TT>srcChar</TT>.
  2260. * @param start the offset at which the replace operation begins
  2261. * @param length the number of characters to replace. The character at
  2262. * <TT>start + length</TT> is not modified.
  2263. * @param srcChar the new code point
  2264. * @return a reference to this
  2265. * @stable ICU 2.0
  2266. */
  2267. UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
  2268. /**
  2269. * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  2270. * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
  2271. * @param start the offset at which the replace operation begins
  2272. * @param limit the offset immediately following the replace range
  2273. * @param srcText the source for the new characters
  2274. * @return a reference to this
  2275. * @stable ICU 2.0
  2276. */
  2277. inline UnicodeString& replaceBetween(int32_t start,
  2278. int32_t limit,
  2279. const UnicodeString& srcText);
  2280. /**
  2281. * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
  2282. * with the characters in <TT>srcText</TT> in the range
  2283. * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
  2284. * @param start the offset at which the replace operation begins
  2285. * @param limit the offset immediately following the replace range
  2286. * @param srcText the source for the new characters
  2287. * @param srcStart the offset into <TT>srcChars</TT> where new characters
  2288. * will be obtained
  2289. * @param srcLimit the offset immediately following the range to copy
  2290. * in <TT>srcText</TT>
  2291. * @return a reference to this
  2292. * @stable ICU 2.0
  2293. */
  2294. inline UnicodeString& replaceBetween(int32_t start,
  2295. int32_t limit,
  2296. const UnicodeString& srcText,
  2297. int32_t srcStart,
  2298. int32_t srcLimit);
  2299. /**
  2300. * Replace a substring of this object with the given text.
  2301. * @param start the beginning index, inclusive; <code>0 <= start
  2302. * <= limit</code>.
  2303. * @param limit the ending index, exclusive; <code>start <= limit
  2304. * <= length()</code>.
  2305. * @param text the text to replace characters <code>start</code>
  2306. * to <code>limit - 1</code>
  2307. * @stable ICU 2.0
  2308. */
  2309. virtual void handleReplaceBetween(int32_t start,
  2310. int32_t limit,
  2311. const UnicodeString& text);
  2312. /**
  2313. * Replaceable API
  2314. * @return TRUE if it has MetaData
  2315. * @stable ICU 2.4
  2316. */
  2317. virtual UBool hasMetaData() const;
  2318. /**
  2319. * Copy a substring of this object, retaining attribute (out-of-band)
  2320. * information. This method is used to duplicate or reorder substrings.
  2321. * The destination index must not overlap the source range.
  2322. *
  2323. * @param start the beginning index, inclusive; <code>0 <= start <=
  2324. * limit</code>.
  2325. * @param limit the ending index, exclusive; <code>start <= limit <=
  2326. * length()</code>.
  2327. * @param dest the destination index. The characters from
  2328. * <code>start..limit-1</code> will be copied to <code>dest</code>.
  2329. * Implementations of this method may assume that <code>dest <= start ||
  2330. * dest >= limit</code>.
  2331. * @stable ICU 2.0
  2332. */
  2333. virtual void copy(int32_t start, int32_t limit, int32_t dest);
  2334. /* Search and replace operations */
  2335. /**
  2336. * Replace all occurrences of characters in oldText with the characters
  2337. * in newText
  2338. * @param oldText the text containing the search text
  2339. * @param newText the text containing the replacement text
  2340. * @return a reference to this
  2341. * @stable ICU 2.0
  2342. */
  2343. inline UnicodeString& findAndReplace(const UnicodeString& oldText,
  2344. const UnicodeString& newText);
  2345. /**
  2346. * Replace all occurrences of characters in oldText with characters
  2347. * in newText
  2348. * in the range [<TT>start</TT>, <TT>start + length</TT>).
  2349. * @param start the start of the range in which replace will performed
  2350. * @param length the length of the range in which replace will be performed
  2351. * @param oldText the text containing the search text
  2352. * @param newText the text containing the replacement text
  2353. * @return a reference to this
  2354. * @stable ICU 2.0
  2355. */
  2356. inline UnicodeString& findAndReplace(int32_t start,
  2357. int32_t length,
  2358. const UnicodeString& oldText,
  2359. const UnicodeString& newText);
  2360. /**
  2361. * Replace all occurrences of characters in oldText in the range
  2362. * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
  2363. * in newText in the range
  2364. * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
  2365. * in the range [<TT>start</TT>, <TT>start + length</TT>).
  2366. * @param start the start of the range in which replace will performed
  2367. * @param length the length of the range in which replace will be performed
  2368. * @param oldText the text containing the search text
  2369. * @param oldStart the start of the search range in <TT>oldText</TT>
  2370. * @param oldLength the length of the search range in <TT>oldText</TT>
  2371. * @param newText the text containing the replacement text
  2372. * @param newStart the start of the replacement range in <TT>newText</TT>
  2373. * @param newLength the length of the replacement range in <TT>newText</TT>
  2374. * @return a reference to this
  2375. * @stable ICU 2.0
  2376. */
  2377. UnicodeString& findAndReplace(int32_t start,
  2378. int32_t length,
  2379. const UnicodeString& oldText,
  2380. int32_t oldStart,
  2381. int32_t oldLength,
  2382. const UnicodeString& newText,
  2383. int32_t newStart,
  2384. int32_t newLength);
  2385. /* Remove operations */
  2386. /**
  2387. * Remove all characters from the UnicodeString object.
  2388. * @return a reference to this
  2389. * @stable ICU 2.0
  2390. */
  2391. inline UnicodeString& remove(void);
  2392. /**
  2393. * Remove the characters in the range
  2394. * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
  2395. * @param start the offset of the first character to remove
  2396. * @param length the number of characters to remove
  2397. * @return a reference to this
  2398. * @stable ICU 2.0
  2399. */
  2400. inline UnicodeString& remove(int32_t start,
  2401. int32_t length = (int32_t)INT32_MAX);
  2402. /**
  2403. * Remove the characters in the range
  2404. * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
  2405. * @param start the offset of the first character to remove
  2406. * @param limit the offset immediately following the range to remove
  2407. * @return a reference to this
  2408. * @stable ICU 2.0
  2409. */
  2410. inline UnicodeString& removeBetween(int32_t start,
  2411. int32_t limit = (int32_t)INT32_MAX);
  2412. /**
  2413. * Retain only the characters in the range
  2414. * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
  2415. * Removes characters before <code>start</code> and at and after <code>limit</code>.
  2416. * @param start the offset of the first character to retain
  2417. * @param limit the offset immediately following the range to retain
  2418. * @return a reference to this
  2419. * @stable ICU 4.4
  2420. */
  2421. inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
  2422. /* Length operations */
  2423. /**
  2424. * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
  2425. * If the length of this UnicodeString is less than targetLength,
  2426. * length() - targetLength copies of padChar will be added to the
  2427. * beginning of this UnicodeString.
  2428. * @param targetLength the desired length of the string
  2429. * @param padChar the character to use for padding. Defaults to
  2430. * space (U+0020)
  2431. * @return TRUE if the text was padded, FALSE otherwise.
  2432. * @stable ICU 2.0
  2433. */
  2434. UBool padLeading(int32_t targetLength,
  2435. UChar padChar = 0x0020);
  2436. /**
  2437. * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
  2438. * If the length of this UnicodeString is less than targetLength,
  2439. * length() - targetLength copies of padChar will be added to the
  2440. * end of this UnicodeString.
  2441. * @param targetLength the desired length of the string
  2442. * @param padChar the character to use for padding. Defaults to
  2443. * space (U+0020)
  2444. * @return TRUE if the text was padded, FALSE otherwise.
  2445. * @stable ICU 2.0
  2446. */
  2447. UBool padTrailing(int32_t targetLength,
  2448. UChar padChar = 0x0020);
  2449. /**
  2450. * Truncate this UnicodeString to the <TT>targetLength</TT>.
  2451. * @param targetLength the desired length of this UnicodeString.
  2452. * @return TRUE if the text was truncated, FALSE otherwise
  2453. * @stable ICU 2.0
  2454. */
  2455. inline UBool truncate(int32_t targetLength);
  2456. /**
  2457. * Trims leading and trailing whitespace from this UnicodeString.
  2458. * @return a reference to this
  2459. * @stable ICU 2.0
  2460. */
  2461. UnicodeString& trim(void);
  2462. /* Miscellaneous operations */
  2463. /**
  2464. * Reverse this UnicodeString in place.
  2465. * @return a reference to this
  2466. * @stable ICU 2.0
  2467. */
  2468. inline UnicodeString& reverse(void);
  2469. /**
  2470. * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
  2471. * this UnicodeString.
  2472. * @param start the start of the range to reverse
  2473. * @param length the number of characters to to reverse
  2474. * @return a reference to this
  2475. * @stable ICU 2.0
  2476. */
  2477. inline UnicodeString& reverse(int32_t start,
  2478. int32_t length);
  2479. /**
  2480. * Convert the characters in this to UPPER CASE following the conventions of
  2481. * the default locale.
  2482. * @return A reference to this.
  2483. * @stable ICU 2.0
  2484. */
  2485. UnicodeString& toUpper(void);
  2486. /**
  2487. * Convert the characters in this to UPPER CASE following the conventions of
  2488. * a specific locale.
  2489. * @param locale The locale containing the conventions to use.
  2490. * @return A reference to this.
  2491. * @stable ICU 2.0
  2492. */
  2493. UnicodeString& toUpper(const Locale& locale);
  2494. /**
  2495. * Convert the characters in this to lower case following the conventions of
  2496. * the default locale.
  2497. * @return A reference to this.
  2498. * @stable ICU 2.0
  2499. */
  2500. UnicodeString& toLower(void);
  2501. /**
  2502. * Convert the characters in this to lower case following the conventions of
  2503. * a specific locale.
  2504. * @param locale The locale containing the conventions to use.
  2505. * @return A reference to this.
  2506. * @stable ICU 2.0
  2507. */
  2508. UnicodeString& toLower(const Locale& locale);
  2509. #if !UCONFIG_NO_BREAK_ITERATION
  2510. /**
  2511. * Titlecase this string, convenience function using the default locale.
  2512. *
  2513. * Casing is locale-dependent and context-sensitive.
  2514. * Titlecasing uses a break iterator to find the first characters of words
  2515. * that are to be titlecased. It titlecases those characters and lowercases
  2516. * all others.
  2517. *
  2518. * The titlecase break iterator can be provided to customize for arbitrary
  2519. * styles, using rules and dictionaries beyond the standard iterators.
  2520. * It may be more efficient to always provide an iterator to avoid
  2521. * opening and closing one for each string.
  2522. * The standard titlecase iterator for the root locale implements the
  2523. * algorithm of Unicode TR 21.
  2524. *
  2525. * This function uses only the setText(), first() and next() methods of the
  2526. * provided break iterator.
  2527. *
  2528. * @param titleIter A break iterator to find the first characters of words
  2529. * that are to be titlecased.
  2530. * If none is provided (0), then a standard titlecase
  2531. * break iterator is opened.
  2532. * Otherwise the provided iterator is set to the string's text.
  2533. * @return A reference to this.
  2534. * @stable ICU 2.1
  2535. */
  2536. UnicodeString &toTitle(BreakIterator *titleIter);
  2537. /**
  2538. * Titlecase this string.
  2539. *
  2540. * Casing is locale-dependent and context-sensitive.
  2541. * Titlecasing uses a break iterator to find the first characters of words
  2542. * that are to be titlecased. It titlecases those characters and lowercases
  2543. * all others.
  2544. *
  2545. * The titlecase break iterator can be provided to customize for arbitrary
  2546. * styles, using rules and dictionaries beyond the standard iterators.
  2547. * It may be more efficient to always provide an iterator to avoid
  2548. * opening and closing one for each string.
  2549. * The standard titlecase iterator for the root locale implements the
  2550. * algorithm of Unicode TR 21.
  2551. *
  2552. * This function uses only the setText(), first() and next() methods of the
  2553. * provided break iterator.
  2554. *
  2555. * @param titleIter A break iterator to find the first characters of words
  2556. * that are to be titlecased.
  2557. * If none is provided (0), then a standard titlecase
  2558. * break iterator is opened.
  2559. * Otherwise the provided iterator is set to the string's text.
  2560. * @param locale The locale to consider.
  2561. * @return A reference to this.
  2562. * @stable ICU 2.1
  2563. */
  2564. UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
  2565. /**
  2566. * Titlecase this string, with options.
  2567. *
  2568. * Casing is locale-dependent and context-sensitive.
  2569. * Titlecasing uses a break iterator to find the first characters of words
  2570. * that are to be titlecased. It titlecases those characters and lowercases
  2571. * all others. (This can be modified with options.)
  2572. *
  2573. * The titlecase break iterator can be provided to customize for arbitrary
  2574. * styles, using rules and dictionaries beyond the standard iterators.
  2575. * It may be more efficient to always provide an iterator to avoid
  2576. * opening and closing one for each string.
  2577. * The standard titlecase iterator for the root locale implements the
  2578. * algorithm of Unicode TR 21.
  2579. *
  2580. * This function uses only the setText(), first() and next() methods of the
  2581. * provided break iterator.
  2582. *
  2583. * @param titleIter A break iterator to find the first characters of words
  2584. * that are to be titlecased.
  2585. * If none is provided (0), then a standard titlecase
  2586. * break iterator is opened.
  2587. * Otherwise the provided iterator is set to the string's text.
  2588. * @param locale The locale to consider.
  2589. * @param options Options bit set, see ucasemap_open().
  2590. * @return A reference to this.
  2591. * @see U_TITLECASE_NO_LOWERCASE
  2592. * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
  2593. * @see ucasemap_open
  2594. * @stable ICU 3.8
  2595. */
  2596. UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
  2597. #endif
  2598. /**
  2599. * Case-folds the characters in this string.
  2600. *
  2601. * Case-folding is locale-independent and not context-sensitive,
  2602. * but there is an option for whether to include or exclude mappings for dotted I
  2603. * and dotless i that are marked with 'T' in CaseFolding.txt.
  2604. *
  2605. * The result may be longer or shorter than the original.
  2606. *
  2607. * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
  2608. * @return A reference to this.
  2609. * @stable ICU 2.0
  2610. */
  2611. UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
  2612. //========================================
  2613. // Access to the internal buffer
  2614. //========================================
  2615. /**
  2616. * Get a read/write pointer to the internal buffer.
  2617. * The buffer is guaranteed to be large enough for at least minCapacity UChars,
  2618. * writable, and is still owned by the UnicodeString object.
  2619. * Calls to getBuffer(minCapacity) must not be nested, and
  2620. * must be matched with calls to releaseBuffer(newLength).
  2621. * If the string buffer was read-only or shared,
  2622. * then it will be reallocated and copied.
  2623. *
  2624. * An attempted nested call will return 0, and will not further modify the
  2625. * state of the UnicodeString object.
  2626. * It also returns 0 if the string is bogus.
  2627. *
  2628. * The actual capacity of the string buffer may be larger than minCapacity.
  2629. * getCapacity() returns the actual capacity.
  2630. * For many operations, the full capacity should be used to avoid reallocations.
  2631. *
  2632. * While the buffer is "open" between getBuffer(minCapacity)
  2633. * and releaseBuffer(newLength), the following applies:
  2634. * - The string length is set to 0.
  2635. * - Any read API call on the UnicodeString object will behave like on a 0-length string.
  2636. * - Any write API call on the UnicodeString object is disallowed and will have no effect.
  2637. * - You can read from and write to the returned buffer.
  2638. * - The previous string contents will still be in the buffer;
  2639. * if you want to use it, then you need to call length() before getBuffer(minCapacity).
  2640. * If the length() was greater than minCapacity, then any contents after minCapacity
  2641. * may be lost.
  2642. * The buffer contents is not NUL-terminated by getBuffer().
  2643. * If length()<getCapacity() then you can terminate it by writing a NUL
  2644. * at index length().
  2645. * - You must call releaseBuffer(newLength) before and in order to
  2646. * return to normal UnicodeString operation.
  2647. *
  2648. * @param minCapacity the minimum number of UChars that are to be available
  2649. * in the buffer, starting at the returned pointer;
  2650. * default to the current string capacity if minCapacity==-1
  2651. * @return a writable pointer to the internal string buffer,
  2652. * or 0 if an error occurs (nested calls, out of memory)
  2653. *
  2654. * @see releaseBuffer
  2655. * @see getTerminatedBuffer()
  2656. * @stable ICU 2.0
  2657. */
  2658. UChar *getBuffer(int32_t minCapacity);
  2659. /**
  2660. * Release a read/write buffer on a UnicodeString object with an
  2661. * "open" getBuffer(minCapacity).
  2662. * This function must be called in a matched pair with getBuffer(minCapacity).
  2663. * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
  2664. *
  2665. * It will set the string length to newLength, at most to the current capacity.
  2666. * If newLength==-1 then it will set the length according to the
  2667. * first NUL in the buffer, or to the capacity if there is no NUL.
  2668. *
  2669. * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
  2670. *
  2671. * @param newLength the new length of the UnicodeString object;
  2672. * defaults to the current capacity if newLength is greater than that;
  2673. * if newLength==-1, it defaults to u_strlen(buffer) but not more than
  2674. * the current capacity of the string
  2675. *
  2676. * @see getBuffer(int32_t minCapacity)
  2677. * @stable ICU 2.0
  2678. */
  2679. void releaseBuffer(int32_t newLength=-1);
  2680. /**
  2681. * Get a read-only pointer to the internal buffer.
  2682. * This can be called at any time on a valid UnicodeString.
  2683. *
  2684. * It returns 0 if the string is bogus, or
  2685. * during an "open" getBuffer(minCapacity).
  2686. *
  2687. * It can be called as many times as desired.
  2688. * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2689. * at which time the pointer is semantically invalidated and must not be used any more.
  2690. *
  2691. * The capacity of the buffer can be determined with getCapacity().
  2692. * The part after length() may or may not be initialized and valid,
  2693. * depending on the history of the UnicodeString object.
  2694. *
  2695. * The buffer contents is (probably) not NUL-terminated.
  2696. * You can check if it is with
  2697. * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
  2698. * (See getTerminatedBuffer().)
  2699. *
  2700. * The buffer may reside in read-only memory. Its contents must not
  2701. * be modified.
  2702. *
  2703. * @return a read-only pointer to the internal string buffer,
  2704. * or 0 if the string is empty or bogus
  2705. *
  2706. * @see getBuffer(int32_t minCapacity)
  2707. * @see getTerminatedBuffer()
  2708. * @stable ICU 2.0
  2709. */
  2710. inline const UChar *getBuffer() const;
  2711. /**
  2712. * Get a read-only pointer to the internal buffer,
  2713. * making sure that it is NUL-terminated.
  2714. * This can be called at any time on a valid UnicodeString.
  2715. *
  2716. * It returns 0 if the string is bogus, or
  2717. * during an "open" getBuffer(minCapacity), or if the buffer cannot
  2718. * be NUL-terminated (because memory allocation failed).
  2719. *
  2720. * It can be called as many times as desired.
  2721. * The pointer that it returns will remain valid until the UnicodeString object is modified,
  2722. * at which time the pointer is semantically invalidated and must not be used any more.
  2723. *
  2724. * The capacity of the buffer can be determined with getCapacity().
  2725. * The part after length()+1 may or may not be initialized and valid,
  2726. * depending on the history of the UnicodeString object.
  2727. *
  2728. * The buffer contents is guaranteed to be NUL-terminated.
  2729. * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
  2730. * is written.
  2731. * For this reason, this function is not const, unlike getBuffer().
  2732. * Note that a UnicodeString may also contain NUL characters as part of its contents.
  2733. *
  2734. * The buffer may reside in read-only memory. Its contents must not
  2735. * be modified.
  2736. *
  2737. * @return a read-only pointer to the internal string buffer,
  2738. * or 0 if the string is empty or bogus
  2739. *
  2740. * @see getBuffer(int32_t minCapacity)
  2741. * @see getBuffer()
  2742. * @stable ICU 2.2
  2743. */
  2744. const UChar *getTerminatedBuffer();
  2745. //========================================
  2746. // Constructors
  2747. //========================================
  2748. /** Construct an empty UnicodeString.
  2749. * @stable ICU 2.0
  2750. */
  2751. inline UnicodeString();
  2752. /**
  2753. * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
  2754. * @param capacity the number of UChars this UnicodeString should hold
  2755. * before a resize is necessary; if count is greater than 0 and count
  2756. * code points c take up more space than capacity, then capacity is adjusted
  2757. * accordingly.
  2758. * @param c is used to initially fill the string
  2759. * @param count specifies how many code points c are to be written in the
  2760. * string
  2761. * @stable ICU 2.0
  2762. */
  2763. UnicodeString(int32_t capacity, UChar32 c, int32_t count);
  2764. /**
  2765. * Single UChar (code unit) constructor.
  2766. *
  2767. * It is recommended to mark this constructor "explicit" by
  2768. * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
  2769. * on the compiler command line or similar.
  2770. * @param ch the character to place in the UnicodeString
  2771. * @stable ICU 2.0
  2772. */
  2773. UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
  2774. /**
  2775. * Single UChar32 (code point) constructor.
  2776. *
  2777. * It is recommended to mark this constructor "explicit" by
  2778. * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
  2779. * on the compiler command line or similar.
  2780. * @param ch the character to place in the UnicodeString
  2781. * @stable ICU 2.0
  2782. */
  2783. UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
  2784. /**
  2785. * UChar* constructor.
  2786. *
  2787. * It is recommended to mark this constructor "explicit" by
  2788. * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
  2789. * on the compiler command line or similar.
  2790. * @param text The characters to place in the UnicodeString. <TT>text</TT>
  2791. * must be NULL (U+0000) terminated.
  2792. * @stable ICU 2.0
  2793. */
  2794. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
  2795. /**
  2796. * UChar* constructor.
  2797. * @param text The characters to place in the UnicodeString.
  2798. * @param textLength The number of Unicode characters in <TT>text</TT>
  2799. * to copy.
  2800. * @stable ICU 2.0
  2801. */
  2802. UnicodeString(const UChar *text,
  2803. int32_t textLength);
  2804. /**
  2805. * Readonly-aliasing UChar* constructor.
  2806. * The text will be used for the UnicodeString object, but
  2807. * it will not be released when the UnicodeString is destroyed.
  2808. * This has copy-on-write semantics:
  2809. * When the string is modified, then the buffer is first copied into
  2810. * newly allocated memory.
  2811. * The aliased buffer is never modified.
  2812. *
  2813. * In an assignment to another UnicodeString, when using the copy constructor
  2814. * or the assignment operator, the text will be copied.
  2815. * When using fastCopyFrom(), the text will be aliased again,
  2816. * so that both strings then alias the same readonly-text.
  2817. *
  2818. * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
  2819. * This must be true if <code>textLength==-1</code>.
  2820. * @param text The characters to alias for the UnicodeString.
  2821. * @param textLength The number of Unicode characters in <code>text</code> to alias.
  2822. * If -1, then this constructor will determine the length
  2823. * by calling <code>u_strlen()</code>.
  2824. * @stable ICU 2.0
  2825. */
  2826. UnicodeString(UBool isTerminated,
  2827. const UChar *text,
  2828. int32_t textLength);
  2829. /**
  2830. * Writable-aliasing UChar* constructor.
  2831. * The text will be used for the UnicodeString object, but
  2832. * it will not be released when the UnicodeString is destroyed.
  2833. * This has write-through semantics:
  2834. * For as long as the capacity of the buffer is sufficient, write operations
  2835. * will directly affect the buffer. When more capacity is necessary, then
  2836. * a new buffer will be allocated and the contents copied as with regularly
  2837. * constructed strings.
  2838. * In an assignment to another UnicodeString, the buffer will be copied.
  2839. * The extract(UChar *dst) function detects whether the dst pointer is the same
  2840. * as the string buffer itself and will in this case not copy the contents.
  2841. *
  2842. * @param buffer The characters to alias for the UnicodeString.
  2843. * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
  2844. * @param buffCapacity The size of <code>buffer</code> in UChars.
  2845. * @stable ICU 2.0
  2846. */
  2847. UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
  2848. #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
  2849. /**
  2850. * char* constructor.
  2851. * Uses the default converter (and thus depends on the ICU conversion code)
  2852. * unless U_CHARSET_IS_UTF8 is set to 1.
  2853. *
  2854. * For ASCII (really "invariant character") strings it is more efficient to use
  2855. * the constructor that takes a US_INV (for its enum EInvariant).
  2856. * For ASCII (invariant-character) string literals, see UNICODE_STRING and
  2857. * UNICODE_STRING_SIMPLE.
  2858. *
  2859. * It is recommended to mark this constructor "explicit" by
  2860. * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
  2861. * on the compiler command line or similar.
  2862. * @param codepageData an array of bytes, null-terminated,
  2863. * in the platform's default codepage.
  2864. * @stable ICU 2.0
  2865. * @see UNICODE_STRING
  2866. * @see UNICODE_STRING_SIMPLE
  2867. */
  2868. UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
  2869. /**
  2870. * char* constructor.
  2871. * Uses the default converter (and thus depends on the ICU conversion code)
  2872. * unless U_CHARSET_IS_UTF8 is set to 1.
  2873. * @param codepageData an array of bytes in the platform's default codepage.
  2874. * @param dataLength The number of bytes in <TT>codepageData</TT>.
  2875. * @stable ICU 2.0
  2876. */
  2877. UnicodeString(const char *codepageData, int32_t dataLength);
  2878. #endif
  2879. #if !UCONFIG_NO_CONVERSION
  2880. /**
  2881. * char* constructor.
  2882. * @param codepageData an array of bytes, null-terminated
  2883. * @param codepage the encoding of <TT>codepageData</TT>. The special
  2884. * value 0 for <TT>codepage</TT> indicates that the text is in the
  2885. * platform's default codepage.
  2886. *
  2887. * If <code>codepage</code> is an empty string (<code>""</code>),
  2888. * then a simple conversion is performed on the codepage-invariant
  2889. * subset ("invariant characters") of the platform encoding. See utypes.h.
  2890. * Recommendation: For invariant-character strings use the constructor
  2891. * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  2892. * because it avoids object code dependencies of UnicodeString on
  2893. * the conversion code.
  2894. *
  2895. * @stable ICU 2.0
  2896. */
  2897. UnicodeString(const char *codepageData, const char *codepage);
  2898. /**
  2899. * char* constructor.
  2900. * @param codepageData an array of bytes.
  2901. * @param dataLength The number of bytes in <TT>codepageData</TT>.
  2902. * @param codepage the encoding of <TT>codepageData</TT>. The special
  2903. * value 0 for <TT>codepage</TT> indicates that the text is in the
  2904. * platform's default codepage.
  2905. * If <code>codepage</code> is an empty string (<code>""</code>),
  2906. * then a simple conversion is performed on the codepage-invariant
  2907. * subset ("invariant characters") of the platform encoding. See utypes.h.
  2908. * Recommendation: For invariant-character strings use the constructor
  2909. * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
  2910. * because it avoids object code dependencies of UnicodeString on
  2911. * the conversion code.
  2912. *
  2913. * @stable ICU 2.0
  2914. */
  2915. UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
  2916. /**
  2917. * char * / UConverter constructor.
  2918. * This constructor uses an existing UConverter object to
  2919. * convert the codepage string to Unicode and construct a UnicodeString
  2920. * from that.
  2921. *
  2922. * The converter is reset at first.
  2923. * If the error code indicates a failure before this constructor is called,
  2924. * or if an error occurs during conversion or construction,
  2925. * then the string will be bogus.
  2926. *
  2927. * This function avoids the overhead of opening and closing a converter if
  2928. * multiple strings are constructed.
  2929. *
  2930. * @param src input codepage string
  2931. * @param srcLength length of the input string, can be -1 for NUL-terminated strings
  2932. * @param cnv converter object (ucnv_resetToUnicode() will be called),
  2933. * can be NULL for the default converter
  2934. * @param errorCode normal ICU error code
  2935. * @stable ICU 2.0
  2936. */
  2937. UnicodeString(
  2938. const char *src, int32_t srcLength,
  2939. UConverter *cnv,
  2940. UErrorCode &errorCode);
  2941. #endif
  2942. /**
  2943. * Constructs a Unicode string from an invariant-character char * string.
  2944. * About invariant characters see utypes.h.
  2945. * This constructor has no runtime dependency on conversion code and is
  2946. * therefore recommended over ones taking a charset name string
  2947. * (where the empty string "" indicates invariant-character conversion).
  2948. *
  2949. * Use the macro US_INV as the third, signature-distinguishing parameter.
  2950. *
  2951. * For example:
  2952. * \code
  2953. * void fn(const char *s) {
  2954. * UnicodeString ustr(s, -1, US_INV);
  2955. * // use ustr ...
  2956. * }
  2957. * \endcode
  2958. *
  2959. * @param src String using only invariant characters.
  2960. * @param length Length of src, or -1 if NUL-terminated.
  2961. * @param inv Signature-distinguishing paramater, use US_INV.
  2962. *
  2963. * @see US_INV
  2964. * @stable ICU 3.2
  2965. */
  2966. UnicodeString(const char *src, int32_t length, enum EInvariant inv);
  2967. /**
  2968. * Copy constructor.
  2969. *
  2970. * Starting with ICU 2.4, the assignment operator and the copy constructor
  2971. * allocate a new buffer and copy the buffer contents even for readonly aliases.
  2972. * By contrast, the fastCopyFrom() function implements the old,
  2973. * more efficient but less safe behavior
  2974. * of making this string also a readonly alias to the same buffer.
  2975. *
  2976. * If the source object has an "open" buffer from getBuffer(minCapacity),
  2977. * then the copy is an empty string.
  2978. *
  2979. * @param that The UnicodeString object to copy.
  2980. * @stable ICU 2.0
  2981. * @see fastCopyFrom
  2982. */
  2983. UnicodeString(const UnicodeString& that);
  2984. #if U_HAVE_RVALUE_REFERENCES
  2985. /**
  2986. * Move constructor, might leave src in bogus state.
  2987. * This string will have the same contents and state that the source string had.
  2988. * @param src source string
  2989. * @stable ICU 56
  2990. */
  2991. UnicodeString(UnicodeString &&src) U_NOEXCEPT;
  2992. #endif
  2993. /**
  2994. * 'Substring' constructor from tail of source string.
  2995. * @param src The UnicodeString object to copy.
  2996. * @param srcStart The offset into <tt>src</tt> at which to start copying.
  2997. * @stable ICU 2.2
  2998. */
  2999. UnicodeString(const UnicodeString& src, int32_t srcStart);
  3000. /**
  3001. * 'Substring' constructor from subrange of source string.
  3002. * @param src The UnicodeString object to copy.
  3003. * @param srcStart The offset into <tt>src</tt> at which to start copying.
  3004. * @param srcLength The number of characters from <tt>src</tt> to copy.
  3005. * @stable ICU 2.2
  3006. */
  3007. UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3008. /**
  3009. * Clone this object, an instance of a subclass of Replaceable.
  3010. * Clones can be used concurrently in multiple threads.
  3011. * If a subclass does not implement clone(), or if an error occurs,
  3012. * then NULL is returned.
  3013. * The clone functions in all subclasses return a pointer to a Replaceable
  3014. * because some compilers do not support covariant (same-as-this)
  3015. * return types; cast to the appropriate subclass if necessary.
  3016. * The caller must delete the clone.
  3017. *
  3018. * @return a clone of this object
  3019. *
  3020. * @see Replaceable::clone
  3021. * @see getDynamicClassID
  3022. * @stable ICU 2.6
  3023. */
  3024. virtual Replaceable *clone() const;
  3025. /** Destructor.
  3026. * @stable ICU 2.0
  3027. */
  3028. virtual ~UnicodeString();
  3029. /**
  3030. * Create a UnicodeString from a UTF-8 string.
  3031. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3032. * Calls u_strFromUTF8WithSub().
  3033. *
  3034. * @param utf8 UTF-8 input string.
  3035. * Note that a StringPiece can be implicitly constructed
  3036. * from a std::string or a NUL-terminated const char * string.
  3037. * @return A UnicodeString with equivalent UTF-16 contents.
  3038. * @see toUTF8
  3039. * @see toUTF8String
  3040. * @stable ICU 4.2
  3041. */
  3042. static UnicodeString fromUTF8(StringPiece utf8);
  3043. /**
  3044. * Create a UnicodeString from a UTF-32 string.
  3045. * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
  3046. * Calls u_strFromUTF32WithSub().
  3047. *
  3048. * @param utf32 UTF-32 input string. Must not be NULL.
  3049. * @param length Length of the input string, or -1 if NUL-terminated.
  3050. * @return A UnicodeString with equivalent UTF-16 contents.
  3051. * @see toUTF32
  3052. * @stable ICU 4.2
  3053. */
  3054. static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
  3055. /* Miscellaneous operations */
  3056. /**
  3057. * Unescape a string of characters and return a string containing
  3058. * the result. The following escape sequences are recognized:
  3059. *
  3060. * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
  3061. * \\Uhhhhhhhh 8 hex digits
  3062. * \\xhh 1-2 hex digits
  3063. * \\ooo 1-3 octal digits; o in [0-7]
  3064. * \\cX control-X; X is masked with 0x1F
  3065. *
  3066. * as well as the standard ANSI C escapes:
  3067. *
  3068. * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
  3069. * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
  3070. * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
  3071. *
  3072. * Anything else following a backslash is generically escaped. For
  3073. * example, "[a\\-z]" returns "[a-z]".
  3074. *
  3075. * If an escape sequence is ill-formed, this method returns an empty
  3076. * string. An example of an ill-formed sequence is "\\u" followed by
  3077. * fewer than 4 hex digits.
  3078. *
  3079. * This function is similar to u_unescape() but not identical to it.
  3080. * The latter takes a source char*, so it does escape recognition
  3081. * and also invariant conversion.
  3082. *
  3083. * @return a string with backslash escapes interpreted, or an
  3084. * empty string on error.
  3085. * @see UnicodeString#unescapeAt()
  3086. * @see u_unescape()
  3087. * @see u_unescapeAt()
  3088. * @stable ICU 2.0
  3089. */
  3090. UnicodeString unescape() const;
  3091. /**
  3092. * Unescape a single escape sequence and return the represented
  3093. * character. See unescape() for a listing of the recognized escape
  3094. * sequences. The character at offset-1 is assumed (without
  3095. * checking) to be a backslash. If the escape sequence is
  3096. * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
  3097. * returned.
  3098. *
  3099. * @param offset an input output parameter. On input, it is the
  3100. * offset into this string where the escape sequence is located,
  3101. * after the initial backslash. On output, it is advanced after the
  3102. * last character parsed. On error, it is not advanced at all.
  3103. * @return the character represented by the escape sequence at
  3104. * offset, or U_SENTINEL=-1 on error.
  3105. * @see UnicodeString#unescape()
  3106. * @see u_unescape()
  3107. * @see u_unescapeAt()
  3108. * @stable ICU 2.0
  3109. */
  3110. UChar32 unescapeAt(int32_t &offset) const;
  3111. /**
  3112. * ICU "poor man's RTTI", returns a UClassID for this class.
  3113. *
  3114. * @stable ICU 2.2
  3115. */
  3116. static UClassID U_EXPORT2 getStaticClassID();
  3117. /**
  3118. * ICU "poor man's RTTI", returns a UClassID for the actual class.
  3119. *
  3120. * @stable ICU 2.2
  3121. */
  3122. virtual UClassID getDynamicClassID() const;
  3123. //========================================
  3124. // Implementation methods
  3125. //========================================
  3126. protected:
  3127. /**
  3128. * Implement Replaceable::getLength() (see jitterbug 1027).
  3129. * @stable ICU 2.4
  3130. */
  3131. virtual int32_t getLength() const;
  3132. /**
  3133. * The change in Replaceable to use virtual getCharAt() allows
  3134. * UnicodeString::charAt() to be inline again (see jitterbug 709).
  3135. * @stable ICU 2.4
  3136. */
  3137. virtual UChar getCharAt(int32_t offset) const;
  3138. /**
  3139. * The change in Replaceable to use virtual getChar32At() allows
  3140. * UnicodeString::char32At() to be inline again (see jitterbug 709).
  3141. * @stable ICU 2.4
  3142. */
  3143. virtual UChar32 getChar32At(int32_t offset) const;
  3144. private:
  3145. // For char* constructors. Could be made public.
  3146. UnicodeString &setToUTF8(StringPiece utf8);
  3147. // For extract(char*).
  3148. // We could make a toUTF8(target, capacity, errorCode) public but not
  3149. // this version: New API will be cleaner if we make callers create substrings
  3150. // rather than having start+length on every method,
  3151. // and it should take a UErrorCode&.
  3152. int32_t
  3153. toUTF8(int32_t start, int32_t len,
  3154. char *target, int32_t capacity) const;
  3155. /**
  3156. * Internal string contents comparison, called by operator==.
  3157. * Requires: this & text not bogus and have same lengths.
  3158. */
  3159. UBool doEquals(const UnicodeString &text, int32_t len) const;
  3160. inline int8_t
  3161. doCompare(int32_t start,
  3162. int32_t length,
  3163. const UnicodeString& srcText,
  3164. int32_t srcStart,
  3165. int32_t srcLength) const;
  3166. int8_t doCompare(int32_t start,
  3167. int32_t length,
  3168. const UChar *srcChars,
  3169. int32_t srcStart,
  3170. int32_t srcLength) const;
  3171. inline int8_t
  3172. doCompareCodePointOrder(int32_t start,
  3173. int32_t length,
  3174. const UnicodeString& srcText,
  3175. int32_t srcStart,
  3176. int32_t srcLength) const;
  3177. int8_t doCompareCodePointOrder(int32_t start,
  3178. int32_t length,
  3179. const UChar *srcChars,
  3180. int32_t srcStart,
  3181. int32_t srcLength) const;
  3182. inline int8_t
  3183. doCaseCompare(int32_t start,
  3184. int32_t length,
  3185. const UnicodeString &srcText,
  3186. int32_t srcStart,
  3187. int32_t srcLength,
  3188. uint32_t options) const;
  3189. int8_t
  3190. doCaseCompare(int32_t start,
  3191. int32_t length,
  3192. const UChar *srcChars,
  3193. int32_t srcStart,
  3194. int32_t srcLength,
  3195. uint32_t options) const;
  3196. int32_t doIndexOf(UChar c,
  3197. int32_t start,
  3198. int32_t length) const;
  3199. int32_t doIndexOf(UChar32 c,
  3200. int32_t start,
  3201. int32_t length) const;
  3202. int32_t doLastIndexOf(UChar c,
  3203. int32_t start,
  3204. int32_t length) const;
  3205. int32_t doLastIndexOf(UChar32 c,
  3206. int32_t start,
  3207. int32_t length) const;
  3208. void doExtract(int32_t start,
  3209. int32_t length,
  3210. UChar *dst,
  3211. int32_t dstStart) const;
  3212. inline void doExtract(int32_t start,
  3213. int32_t length,
  3214. UnicodeString& target) const;
  3215. inline UChar doCharAt(int32_t offset) const;
  3216. UnicodeString& doReplace(int32_t start,
  3217. int32_t length,
  3218. const UnicodeString& srcText,
  3219. int32_t srcStart,
  3220. int32_t srcLength);
  3221. UnicodeString& doReplace(int32_t start,
  3222. int32_t length,
  3223. const UChar *srcChars,
  3224. int32_t srcStart,
  3225. int32_t srcLength);
  3226. UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
  3227. UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength);
  3228. UnicodeString& doReverse(int32_t start,
  3229. int32_t length);
  3230. // calculate hash code
  3231. int32_t doHashCode(void) const;
  3232. // get pointer to start of array
  3233. // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
  3234. inline UChar* getArrayStart(void);
  3235. inline const UChar* getArrayStart(void) const;
  3236. inline UBool hasShortLength() const;
  3237. inline int32_t getShortLength() const;
  3238. // A UnicodeString object (not necessarily its current buffer)
  3239. // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
  3240. inline UBool isWritable() const;
  3241. // Is the current buffer writable?
  3242. inline UBool isBufferWritable() const;
  3243. // None of the following does releaseArray().
  3244. inline void setZeroLength();
  3245. inline void setShortLength(int32_t len);
  3246. inline void setLength(int32_t len);
  3247. inline void setToEmpty();
  3248. inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags
  3249. // allocate the array; result may be the stack buffer
  3250. // sets refCount to 1 if appropriate
  3251. // sets fArray, fCapacity, and flags
  3252. // sets length to 0
  3253. // returns boolean for success or failure
  3254. UBool allocate(int32_t capacity);
  3255. // release the array if owned
  3256. void releaseArray(void);
  3257. // turn a bogus string into an empty one
  3258. void unBogus();
  3259. // implements assigment operator, copy constructor, and fastCopyFrom()
  3260. UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
  3261. // Copies just the fields without memory management.
  3262. void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
  3263. // Pin start and limit to acceptable values.
  3264. inline void pinIndex(int32_t& start) const;
  3265. inline void pinIndices(int32_t& start,
  3266. int32_t& length) const;
  3267. #if !UCONFIG_NO_CONVERSION
  3268. /* Internal extract() using UConverter. */
  3269. int32_t doExtract(int32_t start, int32_t length,
  3270. char *dest, int32_t destCapacity,
  3271. UConverter *cnv,
  3272. UErrorCode &errorCode) const;
  3273. /*
  3274. * Real constructor for converting from codepage data.
  3275. * It assumes that it is called with !fRefCounted.
  3276. *
  3277. * If <code>codepage==0</code>, then the default converter
  3278. * is used for the platform encoding.
  3279. * If <code>codepage</code> is an empty string (<code>""</code>),
  3280. * then a simple conversion is performed on the codepage-invariant
  3281. * subset ("invariant characters") of the platform encoding. See utypes.h.
  3282. */
  3283. void doCodepageCreate(const char *codepageData,
  3284. int32_t dataLength,
  3285. const char *codepage);
  3286. /*
  3287. * Worker function for creating a UnicodeString from
  3288. * a codepage string using a UConverter.
  3289. */
  3290. void
  3291. doCodepageCreate(const char *codepageData,
  3292. int32_t dataLength,
  3293. UConverter *converter,
  3294. UErrorCode &status);
  3295. #endif
  3296. /*
  3297. * This function is called when write access to the array
  3298. * is necessary.
  3299. *
  3300. * We need to make a copy of the array if
  3301. * the buffer is read-only, or
  3302. * the buffer is refCounted (shared), and refCount>1, or
  3303. * the buffer is too small.
  3304. *
  3305. * Return FALSE if memory could not be allocated.
  3306. */
  3307. UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
  3308. int32_t growCapacity = -1,
  3309. UBool doCopyArray = TRUE,
  3310. int32_t **pBufferToDelete = 0,
  3311. UBool forceClone = FALSE);
  3312. /**
  3313. * Common function for UnicodeString case mappings.
  3314. * The stringCaseMapper has the same type UStringCaseMapper
  3315. * as in ustr_imp.h for ustrcase_map().
  3316. */
  3317. UnicodeString &
  3318. caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
  3319. // ref counting
  3320. void addRef(void);
  3321. int32_t removeRef(void);
  3322. int32_t refCount(void) const;
  3323. // constants
  3324. enum {
  3325. /**
  3326. * Size of stack buffer for short strings.
  3327. * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
  3328. * @see UNISTR_OBJECT_SIZE
  3329. */
  3330. US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
  3331. kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
  3332. kInvalidHashCode=0, // invalid hash code
  3333. kEmptyHashCode=1, // hash code for empty string
  3334. // bit flag values for fLengthAndFlags
  3335. kIsBogus=1, // this string is bogus, i.e., not valid or NULL
  3336. kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
  3337. kRefCounted=4, // there is a refCount field before the characters in fArray
  3338. kBufferIsReadonly=8,// do not write to this buffer
  3339. kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
  3340. // and releaseBuffer(newLength) must be called
  3341. kAllStorageFlags=0x1f,
  3342. kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
  3343. kLength1=1<<kLengthShift,
  3344. kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
  3345. kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
  3346. // combined values for convenience
  3347. kShortString=kUsingStackBuffer,
  3348. kLongString=kRefCounted,
  3349. kReadonlyAlias=kBufferIsReadonly,
  3350. kWritableAlias=0
  3351. };
  3352. friend class UnicodeStringAppendable;
  3353. union StackBufferOrFields; // forward declaration necessary before friend declaration
  3354. friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
  3355. /*
  3356. * The following are all the class fields that are stored
  3357. * in each UnicodeString object.
  3358. * Note that UnicodeString has virtual functions,
  3359. * therefore there is an implicit vtable pointer
  3360. * as the first real field.
  3361. * The fields should be aligned such that no padding is necessary.
  3362. * On 32-bit machines, the size should be 32 bytes,
  3363. * on 64-bit machines (8-byte pointers), it should be 40 bytes.
  3364. *
  3365. * We use a hack to achieve this.
  3366. *
  3367. * With at least some compilers, each of the following is forced to
  3368. * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
  3369. * rounded up with additional padding if the fields do not already fit that requirement:
  3370. * - sizeof(class UnicodeString)
  3371. * - offsetof(UnicodeString, fUnion)
  3372. * - sizeof(fUnion)
  3373. * - sizeof(fStackFields)
  3374. *
  3375. * We optimize for the longest possible internal buffer for short strings.
  3376. * fUnion.fStackFields begins with 2 bytes for storage flags
  3377. * and the length of relatively short strings,
  3378. * followed by the buffer for short string contents.
  3379. * There is no padding inside fStackFields.
  3380. *
  3381. * Heap-allocated and aliased strings use fUnion.fFields.
  3382. * Both fStackFields and fFields must begin with the same fields for flags and short length,
  3383. * that is, those must have the same memory offsets inside the object,
  3384. * because the flags must be inspected in order to decide which half of fUnion is being used.
  3385. * We assume that the compiler does not reorder the fields.
  3386. *
  3387. * (Padding at the end of fFields is ok:
  3388. * As long as it is no larger than fStackFields, it is not wasted space.)
  3389. *
  3390. * For some of the history of the UnicodeString class fields layout, see
  3391. * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
  3392. * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
  3393. * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
  3394. */
  3395. // (implicit) *vtable;
  3396. union StackBufferOrFields {
  3397. // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
  3398. // Each struct of the union must begin with fLengthAndFlags.
  3399. struct {
  3400. int16_t fLengthAndFlags; // bit fields: see constants above
  3401. UChar fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
  3402. } fStackFields;
  3403. struct {
  3404. int16_t fLengthAndFlags; // bit fields: see constants above
  3405. int32_t fLength; // number of characters in fArray if >127; else undefined
  3406. int32_t fCapacity; // capacity of fArray (in UChars)
  3407. // array pointer last to minimize padding for machines with P128 data model
  3408. // or pointer sizes that are not a power of 2
  3409. UChar *fArray; // the Unicode data
  3410. } fFields;
  3411. } fUnion;
  3412. };
  3413. /**
  3414. * Create a new UnicodeString with the concatenation of two others.
  3415. *
  3416. * @param s1 The first string to be copied to the new one.
  3417. * @param s2 The second string to be copied to the new one, after s1.
  3418. * @return UnicodeString(s1).append(s2)
  3419. * @stable ICU 2.8
  3420. */
  3421. U_COMMON_API UnicodeString U_EXPORT2
  3422. operator+ (const UnicodeString &s1, const UnicodeString &s2);
  3423. //========================================
  3424. // Inline members
  3425. //========================================
  3426. //========================================
  3427. // Privates
  3428. //========================================
  3429. inline void
  3430. UnicodeString::pinIndex(int32_t& start) const
  3431. {
  3432. // pin index
  3433. if(start < 0) {
  3434. start = 0;
  3435. } else if(start > length()) {
  3436. start = length();
  3437. }
  3438. }
  3439. inline void
  3440. UnicodeString::pinIndices(int32_t& start,
  3441. int32_t& _length) const
  3442. {
  3443. // pin indices
  3444. int32_t len = length();
  3445. if(start < 0) {
  3446. start = 0;
  3447. } else if(start > len) {
  3448. start = len;
  3449. }
  3450. if(_length < 0) {
  3451. _length = 0;
  3452. } else if(_length > (len - start)) {
  3453. _length = (len - start);
  3454. }
  3455. }
  3456. inline UChar*
  3457. UnicodeString::getArrayStart() {
  3458. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3459. fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
  3460. }
  3461. inline const UChar*
  3462. UnicodeString::getArrayStart() const {
  3463. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3464. fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
  3465. }
  3466. //========================================
  3467. // Default constructor
  3468. //========================================
  3469. inline
  3470. UnicodeString::UnicodeString() {
  3471. fUnion.fStackFields.fLengthAndFlags=kShortString;
  3472. }
  3473. //========================================
  3474. // Read-only implementation methods
  3475. //========================================
  3476. inline UBool
  3477. UnicodeString::hasShortLength() const {
  3478. return fUnion.fFields.fLengthAndFlags>=0;
  3479. }
  3480. inline int32_t
  3481. UnicodeString::getShortLength() const {
  3482. // fLengthAndFlags must be non-negative -> short length >= 0
  3483. // and arithmetic or logical shift does not matter.
  3484. return fUnion.fFields.fLengthAndFlags>>kLengthShift;
  3485. }
  3486. inline int32_t
  3487. UnicodeString::length() const {
  3488. return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
  3489. }
  3490. inline int32_t
  3491. UnicodeString::getCapacity() const {
  3492. return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
  3493. US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
  3494. }
  3495. inline int32_t
  3496. UnicodeString::hashCode() const
  3497. { return doHashCode(); }
  3498. inline UBool
  3499. UnicodeString::isBogus() const
  3500. { return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
  3501. inline UBool
  3502. UnicodeString::isWritable() const
  3503. { return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
  3504. inline UBool
  3505. UnicodeString::isBufferWritable() const
  3506. {
  3507. return (UBool)(
  3508. !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
  3509. (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
  3510. }
  3511. inline const UChar *
  3512. UnicodeString::getBuffer() const {
  3513. if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
  3514. return 0;
  3515. } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
  3516. return fUnion.fStackFields.fBuffer;
  3517. } else {
  3518. return fUnion.fFields.fArray;
  3519. }
  3520. }
  3521. //========================================
  3522. // Read-only alias methods
  3523. //========================================
  3524. inline int8_t
  3525. UnicodeString::doCompare(int32_t start,
  3526. int32_t thisLength,
  3527. const UnicodeString& srcText,
  3528. int32_t srcStart,
  3529. int32_t srcLength) const
  3530. {
  3531. if(srcText.isBogus()) {
  3532. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3533. } else {
  3534. srcText.pinIndices(srcStart, srcLength);
  3535. return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3536. }
  3537. }
  3538. inline UBool
  3539. UnicodeString::operator== (const UnicodeString& text) const
  3540. {
  3541. if(isBogus()) {
  3542. return text.isBogus();
  3543. } else {
  3544. int32_t len = length(), textLength = text.length();
  3545. return !text.isBogus() && len == textLength && doEquals(text, len);
  3546. }
  3547. }
  3548. inline UBool
  3549. UnicodeString::operator!= (const UnicodeString& text) const
  3550. { return (! operator==(text)); }
  3551. inline UBool
  3552. UnicodeString::operator> (const UnicodeString& text) const
  3553. { return doCompare(0, length(), text, 0, text.length()) == 1; }
  3554. inline UBool
  3555. UnicodeString::operator< (const UnicodeString& text) const
  3556. { return doCompare(0, length(), text, 0, text.length()) == -1; }
  3557. inline UBool
  3558. UnicodeString::operator>= (const UnicodeString& text) const
  3559. { return doCompare(0, length(), text, 0, text.length()) != -1; }
  3560. inline UBool
  3561. UnicodeString::operator<= (const UnicodeString& text) const
  3562. { return doCompare(0, length(), text, 0, text.length()) != 1; }
  3563. inline int8_t
  3564. UnicodeString::compare(const UnicodeString& text) const
  3565. { return doCompare(0, length(), text, 0, text.length()); }
  3566. inline int8_t
  3567. UnicodeString::compare(int32_t start,
  3568. int32_t _length,
  3569. const UnicodeString& srcText) const
  3570. { return doCompare(start, _length, srcText, 0, srcText.length()); }
  3571. inline int8_t
  3572. UnicodeString::compare(const UChar *srcChars,
  3573. int32_t srcLength) const
  3574. { return doCompare(0, length(), srcChars, 0, srcLength); }
  3575. inline int8_t
  3576. UnicodeString::compare(int32_t start,
  3577. int32_t _length,
  3578. const UnicodeString& srcText,
  3579. int32_t srcStart,
  3580. int32_t srcLength) const
  3581. { return doCompare(start, _length, srcText, srcStart, srcLength); }
  3582. inline int8_t
  3583. UnicodeString::compare(int32_t start,
  3584. int32_t _length,
  3585. const UChar *srcChars) const
  3586. { return doCompare(start, _length, srcChars, 0, _length); }
  3587. inline int8_t
  3588. UnicodeString::compare(int32_t start,
  3589. int32_t _length,
  3590. const UChar *srcChars,
  3591. int32_t srcStart,
  3592. int32_t srcLength) const
  3593. { return doCompare(start, _length, srcChars, srcStart, srcLength); }
  3594. inline int8_t
  3595. UnicodeString::compareBetween(int32_t start,
  3596. int32_t limit,
  3597. const UnicodeString& srcText,
  3598. int32_t srcStart,
  3599. int32_t srcLimit) const
  3600. { return doCompare(start, limit - start,
  3601. srcText, srcStart, srcLimit - srcStart); }
  3602. inline int8_t
  3603. UnicodeString::doCompareCodePointOrder(int32_t start,
  3604. int32_t thisLength,
  3605. const UnicodeString& srcText,
  3606. int32_t srcStart,
  3607. int32_t srcLength) const
  3608. {
  3609. if(srcText.isBogus()) {
  3610. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3611. } else {
  3612. srcText.pinIndices(srcStart, srcLength);
  3613. return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
  3614. }
  3615. }
  3616. inline int8_t
  3617. UnicodeString::compareCodePointOrder(const UnicodeString& text) const
  3618. { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
  3619. inline int8_t
  3620. UnicodeString::compareCodePointOrder(int32_t start,
  3621. int32_t _length,
  3622. const UnicodeString& srcText) const
  3623. { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
  3624. inline int8_t
  3625. UnicodeString::compareCodePointOrder(const UChar *srcChars,
  3626. int32_t srcLength) const
  3627. { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
  3628. inline int8_t
  3629. UnicodeString::compareCodePointOrder(int32_t start,
  3630. int32_t _length,
  3631. const UnicodeString& srcText,
  3632. int32_t srcStart,
  3633. int32_t srcLength) const
  3634. { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
  3635. inline int8_t
  3636. UnicodeString::compareCodePointOrder(int32_t start,
  3637. int32_t _length,
  3638. const UChar *srcChars) const
  3639. { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
  3640. inline int8_t
  3641. UnicodeString::compareCodePointOrder(int32_t start,
  3642. int32_t _length,
  3643. const UChar *srcChars,
  3644. int32_t srcStart,
  3645. int32_t srcLength) const
  3646. { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
  3647. inline int8_t
  3648. UnicodeString::compareCodePointOrderBetween(int32_t start,
  3649. int32_t limit,
  3650. const UnicodeString& srcText,
  3651. int32_t srcStart,
  3652. int32_t srcLimit) const
  3653. { return doCompareCodePointOrder(start, limit - start,
  3654. srcText, srcStart, srcLimit - srcStart); }
  3655. inline int8_t
  3656. UnicodeString::doCaseCompare(int32_t start,
  3657. int32_t thisLength,
  3658. const UnicodeString &srcText,
  3659. int32_t srcStart,
  3660. int32_t srcLength,
  3661. uint32_t options) const
  3662. {
  3663. if(srcText.isBogus()) {
  3664. return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
  3665. } else {
  3666. srcText.pinIndices(srcStart, srcLength);
  3667. return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
  3668. }
  3669. }
  3670. inline int8_t
  3671. UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
  3672. return doCaseCompare(0, length(), text, 0, text.length(), options);
  3673. }
  3674. inline int8_t
  3675. UnicodeString::caseCompare(int32_t start,
  3676. int32_t _length,
  3677. const UnicodeString &srcText,
  3678. uint32_t options) const {
  3679. return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
  3680. }
  3681. inline int8_t
  3682. UnicodeString::caseCompare(const UChar *srcChars,
  3683. int32_t srcLength,
  3684. uint32_t options) const {
  3685. return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
  3686. }
  3687. inline int8_t
  3688. UnicodeString::caseCompare(int32_t start,
  3689. int32_t _length,
  3690. const UnicodeString &srcText,
  3691. int32_t srcStart,
  3692. int32_t srcLength,
  3693. uint32_t options) const {
  3694. return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
  3695. }
  3696. inline int8_t
  3697. UnicodeString::caseCompare(int32_t start,
  3698. int32_t _length,
  3699. const UChar *srcChars,
  3700. uint32_t options) const {
  3701. return doCaseCompare(start, _length, srcChars, 0, _length, options);
  3702. }
  3703. inline int8_t
  3704. UnicodeString::caseCompare(int32_t start,
  3705. int32_t _length,
  3706. const UChar *srcChars,
  3707. int32_t srcStart,
  3708. int32_t srcLength,
  3709. uint32_t options) const {
  3710. return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
  3711. }
  3712. inline int8_t
  3713. UnicodeString::caseCompareBetween(int32_t start,
  3714. int32_t limit,
  3715. const UnicodeString &srcText,
  3716. int32_t srcStart,
  3717. int32_t srcLimit,
  3718. uint32_t options) const {
  3719. return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
  3720. }
  3721. inline int32_t
  3722. UnicodeString::indexOf(const UnicodeString& srcText,
  3723. int32_t srcStart,
  3724. int32_t srcLength,
  3725. int32_t start,
  3726. int32_t _length) const
  3727. {
  3728. if(!srcText.isBogus()) {
  3729. srcText.pinIndices(srcStart, srcLength);
  3730. if(srcLength > 0) {
  3731. return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3732. }
  3733. }
  3734. return -1;
  3735. }
  3736. inline int32_t
  3737. UnicodeString::indexOf(const UnicodeString& text) const
  3738. { return indexOf(text, 0, text.length(), 0, length()); }
  3739. inline int32_t
  3740. UnicodeString::indexOf(const UnicodeString& text,
  3741. int32_t start) const {
  3742. pinIndex(start);
  3743. return indexOf(text, 0, text.length(), start, length() - start);
  3744. }
  3745. inline int32_t
  3746. UnicodeString::indexOf(const UnicodeString& text,
  3747. int32_t start,
  3748. int32_t _length) const
  3749. { return indexOf(text, 0, text.length(), start, _length); }
  3750. inline int32_t
  3751. UnicodeString::indexOf(const UChar *srcChars,
  3752. int32_t srcLength,
  3753. int32_t start) const {
  3754. pinIndex(start);
  3755. return indexOf(srcChars, 0, srcLength, start, length() - start);
  3756. }
  3757. inline int32_t
  3758. UnicodeString::indexOf(const UChar *srcChars,
  3759. int32_t srcLength,
  3760. int32_t start,
  3761. int32_t _length) const
  3762. { return indexOf(srcChars, 0, srcLength, start, _length); }
  3763. inline int32_t
  3764. UnicodeString::indexOf(UChar c,
  3765. int32_t start,
  3766. int32_t _length) const
  3767. { return doIndexOf(c, start, _length); }
  3768. inline int32_t
  3769. UnicodeString::indexOf(UChar32 c,
  3770. int32_t start,
  3771. int32_t _length) const
  3772. { return doIndexOf(c, start, _length); }
  3773. inline int32_t
  3774. UnicodeString::indexOf(UChar c) const
  3775. { return doIndexOf(c, 0, length()); }
  3776. inline int32_t
  3777. UnicodeString::indexOf(UChar32 c) const
  3778. { return indexOf(c, 0, length()); }
  3779. inline int32_t
  3780. UnicodeString::indexOf(UChar c,
  3781. int32_t start) const {
  3782. pinIndex(start);
  3783. return doIndexOf(c, start, length() - start);
  3784. }
  3785. inline int32_t
  3786. UnicodeString::indexOf(UChar32 c,
  3787. int32_t start) const {
  3788. pinIndex(start);
  3789. return indexOf(c, start, length() - start);
  3790. }
  3791. inline int32_t
  3792. UnicodeString::lastIndexOf(const UChar *srcChars,
  3793. int32_t srcLength,
  3794. int32_t start,
  3795. int32_t _length) const
  3796. { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
  3797. inline int32_t
  3798. UnicodeString::lastIndexOf(const UChar *srcChars,
  3799. int32_t srcLength,
  3800. int32_t start) const {
  3801. pinIndex(start);
  3802. return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
  3803. }
  3804. inline int32_t
  3805. UnicodeString::lastIndexOf(const UnicodeString& srcText,
  3806. int32_t srcStart,
  3807. int32_t srcLength,
  3808. int32_t start,
  3809. int32_t _length) const
  3810. {
  3811. if(!srcText.isBogus()) {
  3812. srcText.pinIndices(srcStart, srcLength);
  3813. if(srcLength > 0) {
  3814. return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
  3815. }
  3816. }
  3817. return -1;
  3818. }
  3819. inline int32_t
  3820. UnicodeString::lastIndexOf(const UnicodeString& text,
  3821. int32_t start,
  3822. int32_t _length) const
  3823. { return lastIndexOf(text, 0, text.length(), start, _length); }
  3824. inline int32_t
  3825. UnicodeString::lastIndexOf(const UnicodeString& text,
  3826. int32_t start) const {
  3827. pinIndex(start);
  3828. return lastIndexOf(text, 0, text.length(), start, length() - start);
  3829. }
  3830. inline int32_t
  3831. UnicodeString::lastIndexOf(const UnicodeString& text) const
  3832. { return lastIndexOf(text, 0, text.length(), 0, length()); }
  3833. inline int32_t
  3834. UnicodeString::lastIndexOf(UChar c,
  3835. int32_t start,
  3836. int32_t _length) const
  3837. { return doLastIndexOf(c, start, _length); }
  3838. inline int32_t
  3839. UnicodeString::lastIndexOf(UChar32 c,
  3840. int32_t start,
  3841. int32_t _length) const {
  3842. return doLastIndexOf(c, start, _length);
  3843. }
  3844. inline int32_t
  3845. UnicodeString::lastIndexOf(UChar c) const
  3846. { return doLastIndexOf(c, 0, length()); }
  3847. inline int32_t
  3848. UnicodeString::lastIndexOf(UChar32 c) const {
  3849. return lastIndexOf(c, 0, length());
  3850. }
  3851. inline int32_t
  3852. UnicodeString::lastIndexOf(UChar c,
  3853. int32_t start) const {
  3854. pinIndex(start);
  3855. return doLastIndexOf(c, start, length() - start);
  3856. }
  3857. inline int32_t
  3858. UnicodeString::lastIndexOf(UChar32 c,
  3859. int32_t start) const {
  3860. pinIndex(start);
  3861. return lastIndexOf(c, start, length() - start);
  3862. }
  3863. inline UBool
  3864. UnicodeString::startsWith(const UnicodeString& text) const
  3865. { return compare(0, text.length(), text, 0, text.length()) == 0; }
  3866. inline UBool
  3867. UnicodeString::startsWith(const UnicodeString& srcText,
  3868. int32_t srcStart,
  3869. int32_t srcLength) const
  3870. { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
  3871. inline UBool
  3872. UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
  3873. if(srcLength < 0) {
  3874. srcLength = u_strlen(srcChars);
  3875. }
  3876. return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
  3877. }
  3878. inline UBool
  3879. UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
  3880. if(srcLength < 0) {
  3881. srcLength = u_strlen(srcChars);
  3882. }
  3883. return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
  3884. }
  3885. inline UBool
  3886. UnicodeString::endsWith(const UnicodeString& text) const
  3887. { return doCompare(length() - text.length(), text.length(),
  3888. text, 0, text.length()) == 0; }
  3889. inline UBool
  3890. UnicodeString::endsWith(const UnicodeString& srcText,
  3891. int32_t srcStart,
  3892. int32_t srcLength) const {
  3893. srcText.pinIndices(srcStart, srcLength);
  3894. return doCompare(length() - srcLength, srcLength,
  3895. srcText, srcStart, srcLength) == 0;
  3896. }
  3897. inline UBool
  3898. UnicodeString::endsWith(const UChar *srcChars,
  3899. int32_t srcLength) const {
  3900. if(srcLength < 0) {
  3901. srcLength = u_strlen(srcChars);
  3902. }
  3903. return doCompare(length() - srcLength, srcLength,
  3904. srcChars, 0, srcLength) == 0;
  3905. }
  3906. inline UBool
  3907. UnicodeString::endsWith(const UChar *srcChars,
  3908. int32_t srcStart,
  3909. int32_t srcLength) const {
  3910. if(srcLength < 0) {
  3911. srcLength = u_strlen(srcChars + srcStart);
  3912. }
  3913. return doCompare(length() - srcLength, srcLength,
  3914. srcChars, srcStart, srcLength) == 0;
  3915. }
  3916. //========================================
  3917. // replace
  3918. //========================================
  3919. inline UnicodeString&
  3920. UnicodeString::replace(int32_t start,
  3921. int32_t _length,
  3922. const UnicodeString& srcText)
  3923. { return doReplace(start, _length, srcText, 0, srcText.length()); }
  3924. inline UnicodeString&
  3925. UnicodeString::replace(int32_t start,
  3926. int32_t _length,
  3927. const UnicodeString& srcText,
  3928. int32_t srcStart,
  3929. int32_t srcLength)
  3930. { return doReplace(start, _length, srcText, srcStart, srcLength); }
  3931. inline UnicodeString&
  3932. UnicodeString::replace(int32_t start,
  3933. int32_t _length,
  3934. const UChar *srcChars,
  3935. int32_t srcLength)
  3936. { return doReplace(start, _length, srcChars, 0, srcLength); }
  3937. inline UnicodeString&
  3938. UnicodeString::replace(int32_t start,
  3939. int32_t _length,
  3940. const UChar *srcChars,
  3941. int32_t srcStart,
  3942. int32_t srcLength)
  3943. { return doReplace(start, _length, srcChars, srcStart, srcLength); }
  3944. inline UnicodeString&
  3945. UnicodeString::replace(int32_t start,
  3946. int32_t _length,
  3947. UChar srcChar)
  3948. { return doReplace(start, _length, &srcChar, 0, 1); }
  3949. inline UnicodeString&
  3950. UnicodeString::replaceBetween(int32_t start,
  3951. int32_t limit,
  3952. const UnicodeString& srcText)
  3953. { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
  3954. inline UnicodeString&
  3955. UnicodeString::replaceBetween(int32_t start,
  3956. int32_t limit,
  3957. const UnicodeString& srcText,
  3958. int32_t srcStart,
  3959. int32_t srcLimit)
  3960. { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
  3961. inline UnicodeString&
  3962. UnicodeString::findAndReplace(const UnicodeString& oldText,
  3963. const UnicodeString& newText)
  3964. { return findAndReplace(0, length(), oldText, 0, oldText.length(),
  3965. newText, 0, newText.length()); }
  3966. inline UnicodeString&
  3967. UnicodeString::findAndReplace(int32_t start,
  3968. int32_t _length,
  3969. const UnicodeString& oldText,
  3970. const UnicodeString& newText)
  3971. { return findAndReplace(start, _length, oldText, 0, oldText.length(),
  3972. newText, 0, newText.length()); }
  3973. // ============================
  3974. // extract
  3975. // ============================
  3976. inline void
  3977. UnicodeString::doExtract(int32_t start,
  3978. int32_t _length,
  3979. UnicodeString& target) const
  3980. { target.replace(0, target.length(), *this, start, _length); }
  3981. inline void
  3982. UnicodeString::extract(int32_t start,
  3983. int32_t _length,
  3984. UChar *target,
  3985. int32_t targetStart) const
  3986. { doExtract(start, _length, target, targetStart); }
  3987. inline void
  3988. UnicodeString::extract(int32_t start,
  3989. int32_t _length,
  3990. UnicodeString& target) const
  3991. { doExtract(start, _length, target); }
  3992. #if !UCONFIG_NO_CONVERSION
  3993. inline int32_t
  3994. UnicodeString::extract(int32_t start,
  3995. int32_t _length,
  3996. char *dst,
  3997. const char *codepage) const
  3998. {
  3999. // This dstSize value will be checked explicitly
  4000. return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
  4001. }
  4002. #endif
  4003. inline void
  4004. UnicodeString::extractBetween(int32_t start,
  4005. int32_t limit,
  4006. UChar *dst,
  4007. int32_t dstStart) const {
  4008. pinIndex(start);
  4009. pinIndex(limit);
  4010. doExtract(start, limit - start, dst, dstStart);
  4011. }
  4012. inline UnicodeString
  4013. UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
  4014. return tempSubString(start, limit - start);
  4015. }
  4016. inline UChar
  4017. UnicodeString::doCharAt(int32_t offset) const
  4018. {
  4019. if((uint32_t)offset < (uint32_t)length()) {
  4020. return getArrayStart()[offset];
  4021. } else {
  4022. return kInvalidUChar;
  4023. }
  4024. }
  4025. inline UChar
  4026. UnicodeString::charAt(int32_t offset) const
  4027. { return doCharAt(offset); }
  4028. inline UChar
  4029. UnicodeString::operator[] (int32_t offset) const
  4030. { return doCharAt(offset); }
  4031. inline UBool
  4032. UnicodeString::isEmpty() const {
  4033. // Arithmetic or logical right shift does not matter: only testing for 0.
  4034. return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
  4035. }
  4036. //========================================
  4037. // Write implementation methods
  4038. //========================================
  4039. inline void
  4040. UnicodeString::setZeroLength() {
  4041. fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
  4042. }
  4043. inline void
  4044. UnicodeString::setShortLength(int32_t len) {
  4045. // requires 0 <= len <= kMaxShortLength
  4046. fUnion.fFields.fLengthAndFlags =
  4047. (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
  4048. }
  4049. inline void
  4050. UnicodeString::setLength(int32_t len) {
  4051. if(len <= kMaxShortLength) {
  4052. setShortLength(len);
  4053. } else {
  4054. fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
  4055. fUnion.fFields.fLength = len;
  4056. }
  4057. }
  4058. inline void
  4059. UnicodeString::setToEmpty() {
  4060. fUnion.fFields.fLengthAndFlags = kShortString;
  4061. }
  4062. inline void
  4063. UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
  4064. setLength(len);
  4065. fUnion.fFields.fArray = array;
  4066. fUnion.fFields.fCapacity = capacity;
  4067. }
  4068. inline UnicodeString&
  4069. UnicodeString::operator= (UChar ch)
  4070. { return doReplace(0, length(), &ch, 0, 1); }
  4071. inline UnicodeString&
  4072. UnicodeString::operator= (UChar32 ch)
  4073. { return replace(0, length(), ch); }
  4074. inline UnicodeString&
  4075. UnicodeString::setTo(const UnicodeString& srcText,
  4076. int32_t srcStart,
  4077. int32_t srcLength)
  4078. {
  4079. unBogus();
  4080. return doReplace(0, length(), srcText, srcStart, srcLength);
  4081. }
  4082. inline UnicodeString&
  4083. UnicodeString::setTo(const UnicodeString& srcText,
  4084. int32_t srcStart)
  4085. {
  4086. unBogus();
  4087. srcText.pinIndex(srcStart);
  4088. return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
  4089. }
  4090. inline UnicodeString&
  4091. UnicodeString::setTo(const UnicodeString& srcText)
  4092. {
  4093. return copyFrom(srcText);
  4094. }
  4095. inline UnicodeString&
  4096. UnicodeString::setTo(const UChar *srcChars,
  4097. int32_t srcLength)
  4098. {
  4099. unBogus();
  4100. return doReplace(0, length(), srcChars, 0, srcLength);
  4101. }
  4102. inline UnicodeString&
  4103. UnicodeString::setTo(UChar srcChar)
  4104. {
  4105. unBogus();
  4106. return doReplace(0, length(), &srcChar, 0, 1);
  4107. }
  4108. inline UnicodeString&
  4109. UnicodeString::setTo(UChar32 srcChar)
  4110. {
  4111. unBogus();
  4112. return replace(0, length(), srcChar);
  4113. }
  4114. inline UnicodeString&
  4115. UnicodeString::append(const UnicodeString& srcText,
  4116. int32_t srcStart,
  4117. int32_t srcLength)
  4118. { return doAppend(srcText, srcStart, srcLength); }
  4119. inline UnicodeString&
  4120. UnicodeString::append(const UnicodeString& srcText)
  4121. { return doAppend(srcText, 0, srcText.length()); }
  4122. inline UnicodeString&
  4123. UnicodeString::append(const UChar *srcChars,
  4124. int32_t srcStart,
  4125. int32_t srcLength)
  4126. { return doAppend(srcChars, srcStart, srcLength); }
  4127. inline UnicodeString&
  4128. UnicodeString::append(const UChar *srcChars,
  4129. int32_t srcLength)
  4130. { return doAppend(srcChars, 0, srcLength); }
  4131. inline UnicodeString&
  4132. UnicodeString::append(UChar srcChar)
  4133. { return doAppend(&srcChar, 0, 1); }
  4134. inline UnicodeString&
  4135. UnicodeString::operator+= (UChar ch)
  4136. { return doAppend(&ch, 0, 1); }
  4137. inline UnicodeString&
  4138. UnicodeString::operator+= (UChar32 ch) {
  4139. return append(ch);
  4140. }
  4141. inline UnicodeString&
  4142. UnicodeString::operator+= (const UnicodeString& srcText)
  4143. { return doAppend(srcText, 0, srcText.length()); }
  4144. inline UnicodeString&
  4145. UnicodeString::insert(int32_t start,
  4146. const UnicodeString& srcText,
  4147. int32_t srcStart,
  4148. int32_t srcLength)
  4149. { return doReplace(start, 0, srcText, srcStart, srcLength); }
  4150. inline UnicodeString&
  4151. UnicodeString::insert(int32_t start,
  4152. const UnicodeString& srcText)
  4153. { return doReplace(start, 0, srcText, 0, srcText.length()); }
  4154. inline UnicodeString&
  4155. UnicodeString::insert(int32_t start,
  4156. const UChar *srcChars,
  4157. int32_t srcStart,
  4158. int32_t srcLength)
  4159. { return doReplace(start, 0, srcChars, srcStart, srcLength); }
  4160. inline UnicodeString&
  4161. UnicodeString::insert(int32_t start,
  4162. const UChar *srcChars,
  4163. int32_t srcLength)
  4164. { return doReplace(start, 0, srcChars, 0, srcLength); }
  4165. inline UnicodeString&
  4166. UnicodeString::insert(int32_t start,
  4167. UChar srcChar)
  4168. { return doReplace(start, 0, &srcChar, 0, 1); }
  4169. inline UnicodeString&
  4170. UnicodeString::insert(int32_t start,
  4171. UChar32 srcChar)
  4172. { return replace(start, 0, srcChar); }
  4173. inline UnicodeString&
  4174. UnicodeString::remove()
  4175. {
  4176. // remove() of a bogus string makes the string empty and non-bogus
  4177. if(isBogus()) {
  4178. setToEmpty();
  4179. } else {
  4180. setZeroLength();
  4181. }
  4182. return *this;
  4183. }
  4184. inline UnicodeString&
  4185. UnicodeString::remove(int32_t start,
  4186. int32_t _length)
  4187. {
  4188. if(start <= 0 && _length == INT32_MAX) {
  4189. // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
  4190. return remove();
  4191. }
  4192. return doReplace(start, _length, NULL, 0, 0);
  4193. }
  4194. inline UnicodeString&
  4195. UnicodeString::removeBetween(int32_t start,
  4196. int32_t limit)
  4197. { return doReplace(start, limit - start, NULL, 0, 0); }
  4198. inline UnicodeString &
  4199. UnicodeString::retainBetween(int32_t start, int32_t limit) {
  4200. truncate(limit);
  4201. return doReplace(0, start, NULL, 0, 0);
  4202. }
  4203. inline UBool
  4204. UnicodeString::truncate(int32_t targetLength)
  4205. {
  4206. if(isBogus() && targetLength == 0) {
  4207. // truncate(0) of a bogus string makes the string empty and non-bogus
  4208. unBogus();
  4209. return FALSE;
  4210. } else if((uint32_t)targetLength < (uint32_t)length()) {
  4211. setLength(targetLength);
  4212. return TRUE;
  4213. } else {
  4214. return FALSE;
  4215. }
  4216. }
  4217. inline UnicodeString&
  4218. UnicodeString::reverse()
  4219. { return doReverse(0, length()); }
  4220. inline UnicodeString&
  4221. UnicodeString::reverse(int32_t start,
  4222. int32_t _length)
  4223. { return doReverse(start, _length); }
  4224. U_NAMESPACE_END
  4225. #endif