ICU 76.1  76.1
brkiter.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ********************************************************************************
5 * Copyright (C) 1997-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ********************************************************************************
8 *
9 * File brkiter.h
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 02/18/97 aliu Added typedef for TextCount. Made DONE const.
15 * 05/07/97 aliu Fixed DLL declaration.
16 * 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
17 * 08/11/98 helena Sync-up JDK1.2.
18 * 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
19 ********************************************************************************
20 */
21 
22 #ifndef BRKITER_H
23 #define BRKITER_H
24 
25 #include "unicode/utypes.h"
26 
32 #include "unicode/utypes.h"
33 
34 #if U_SHOW_CPLUSPLUS_API
35 
36 #if UCONFIG_NO_BREAK_ITERATION
37 
38 U_NAMESPACE_BEGIN
39 
40 /*
41  * Allow the declaration of APIs with pointers to BreakIterator
42  * even when break iteration is removed from the build.
43  */
44 class BreakIterator;
45 
46 U_NAMESPACE_END
47 
48 #else
49 
50 #include "unicode/uobject.h"
51 #include "unicode/unistr.h"
52 #include "unicode/chariter.h"
53 #include "unicode/locid.h"
54 #include "unicode/ubrk.h"
55 #include "unicode/strenum.h"
56 #include "unicode/utext.h"
57 #include "unicode/umisc.h"
58 
59 U_NAMESPACE_BEGIN
60 
107 public:
112  virtual ~BreakIterator();
113 
127  virtual bool operator==(const BreakIterator&) const = 0;
128 
135  bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
136 
142  virtual BreakIterator* clone() const = 0;
143 
149  virtual UClassID getDynamicClassID() const override = 0;
150 
155  virtual CharacterIterator& getText() const = 0;
156 
171  virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
172 
184  virtual void setText(const UnicodeString &text) = 0;
185 
204  virtual void setText(UText *text, UErrorCode &status) = 0;
205 
214  virtual void adoptText(CharacterIterator* it) = 0;
215 
216  enum {
222  DONE = static_cast<int32_t>(-1)
223  };
224 
230  virtual int32_t first() = 0;
231 
237  virtual int32_t last() = 0;
238 
245  virtual int32_t previous() = 0;
246 
253  virtual int32_t next() = 0;
254 
260  virtual int32_t current() const = 0;
261 
270  virtual int32_t following(int32_t offset) = 0;
271 
280  virtual int32_t preceding(int32_t offset) = 0;
281 
290  virtual UBool isBoundary(int32_t offset) = 0;
291 
301  virtual int32_t next(int32_t n) = 0;
302 
316  virtual int32_t getRuleStatus() const;
317 
346  virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
347 
367  static BreakIterator* U_EXPORT2
368  createWordInstance(const Locale& where, UErrorCode& status);
369 
391  static BreakIterator* U_EXPORT2
392  createLineInstance(const Locale& where, UErrorCode& status);
393 
413  static BreakIterator* U_EXPORT2
414  createCharacterInstance(const Locale& where, UErrorCode& status);
415 
434  static BreakIterator* U_EXPORT2
435  createSentenceInstance(const Locale& where, UErrorCode& status);
436 
437 #ifndef U_HIDE_DEPRECATED_API
460  static BreakIterator* U_EXPORT2
461  createTitleInstance(const Locale& where, UErrorCode& status);
462 #endif /* U_HIDE_DEPRECATED_API */
463 
473  static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
474 
484  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
485  const Locale& displayLocale,
486  UnicodeString& name);
487 
496  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
497  UnicodeString& name);
498 
499 #ifndef U_FORCE_HIDE_DEPRECATED_API
519  virtual BreakIterator * createBufferClone(void *stackBuffer,
520  int32_t &BufferSize,
521  UErrorCode &status) = 0;
522 #endif // U_FORCE_HIDE_DEPRECATED_API
523 
524 #ifndef U_HIDE_DEPRECATED_API
525 
532  inline UBool isBufferClone();
533 
534 #endif /* U_HIDE_DEPRECATED_API */
535 
536 #if !UCONFIG_NO_SERVICE
552  static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
553  const Locale& locale,
554  UBreakIteratorType kind,
555  UErrorCode& status);
556 
569  static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
570 
578 #endif
579 
586 
587 #ifndef U_HIDE_INTERNAL_API
594  const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
595 #endif /* U_HIDE_INTERNAL_API */
596 
622  virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
623 
624  private:
625  static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
626  static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
627  static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
628 
629  friend class ICUBreakIteratorFactory;
630  friend class ICUBreakIteratorService;
631 
632 protected:
633  // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
634  // or else the compiler will create a public ones.
638  BreakIterator (const BreakIterator &other);
639 #ifndef U_HIDE_INTERNAL_API
641  BreakIterator (const Locale& valid, const Locale &actual);
643  BreakIterator &operator = (const BreakIterator &other);
644 #endif /* U_HIDE_INTERNAL_API */
645 
646 private:
647 
649  char actualLocale[ULOC_FULLNAME_CAPACITY];
650  char validLocale[ULOC_FULLNAME_CAPACITY];
651  char requestLocale[ULOC_FULLNAME_CAPACITY];
652 };
653 
654 #ifndef U_HIDE_DEPRECATED_API
655 
656 inline UBool BreakIterator::isBufferClone()
657 {
658  return false;
659 }
660 
661 #endif /* U_HIDE_DEPRECATED_API */
662 
663 U_NAMESPACE_END
664 
665 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
666 
667 #endif /* U_SHOW_CPLUSPLUS_API */
668 
669 #endif // BRKITER_H
670 //eof
C++ API: Character Iterator.
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:106
virtual bool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
virtual ~BreakIterator()
destructor
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the language of the default locale.
BreakIterator(const Locale &valid, const Locale &actual)
BreakIterator(const BreakIterator &other)
static BreakIterator * createSentenceInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for sentence-breaks using specified locale Returns an instance of a BreakIterato...
virtual int32_t previous()=0
Set the iterator position to the boundary preceding the current boundary.
static BreakIterator * createWordInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for word-breaks using the given locale.
virtual int32_t getRuleStatus() const
For RuleBasedBreakIterators, return the status tag from the break rule that determined the boundary a...
virtual BreakIterator * clone() const =0
Return a polymorphic copy of this object.
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which TextBoundaries are installed.
virtual void adoptText(CharacterIterator *it)=0
Change the text over which this operates.
virtual void setText(UText *text, UErrorCode &status)=0
Reset the break iterator to operate over the text represented by the UText.
virtual int32_t last()=0
Set the iterator position to the index immediately BEYOND the last character in the text being scanne...
static BreakIterator * createTitleInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for title-casing breaks using the specified locale Returns an instance of a Brea...
virtual int32_t next()=0
Advance the iterator to the boundary following the current boundary.
virtual int32_t first()=0
Sets the current iteration position to the beginning of the text, position zero.
const char * getLocaleID(ULocDataLocaleType type, UErrorCode &status) const
Get the locale for this break iterator object.
Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const
Returns the locale for this break iterator.
bool operator!=(const BreakIterator &rhs) const
Returns the complement of the result of operator==.
Definition: brkiter.h:135
virtual BreakIterator & refreshInputText(UText *input, UErrorCode &status)=0
Set the subject text string upon which the break iterator is operating without changing any other asp...
virtual void setText(const UnicodeString &text)=0
Change the text over which this operates.
virtual int32_t next(int32_t n)=0
Set the iterator position to the nth boundary from the current boundary.
virtual int32_t preceding(int32_t offset)=0
Set the iterator position to the first boundary preceding the specified offset.
virtual CharacterIterator & getText() const =0
Return a CharacterIterator over the text being analyzed.
virtual int32_t current() const =0
Return character index of the current iterator position within the text.
virtual UBool isBoundary(int32_t offset)=0
Return true if the specified position is a boundary position.
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired language.
virtual UClassID getDynamicClassID() const override=0
Return a polymorphic class ID for this object.
virtual BreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status)=0
Deprecated functionality.
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered BreakIterator using the key returned from the register call.
static BreakIterator * createLineInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for line-breaks using specified locale.
static BreakIterator * createCharacterInstance(const Locale &where, UErrorCode &status)
Create BreakIterator for character-breaks using specified locale Returns an instance of a BreakIterat...
static StringEnumeration * getAvailableLocales()
Return a StringEnumeration over the locales available at the time of the call, including registered l...
virtual int32_t following(int32_t offset)=0
Advance the iterator to the first boundary following the specified offset.
static URegistryKey registerInstance(BreakIterator *toAdopt, const Locale &locale, UBreakIteratorType kind, UErrorCode &status)
Register a new break iterator of the indicated kind, to use in the given locale.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status)
For RuleBasedBreakIterators, get the status (tag) values from the break rule(s) that determined the b...
virtual UText * getUText(UText *fillIn, UErrorCode &status) const =0
Get a UText for the text being analyzed.
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:61
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
C++ API: Locale ID object.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C++ API: String Enumeration.
UText struct.
Definition: utext.h:1328
C API: BreakIterator.
UBreakIteratorType
The possible types of text boundaries.
Definition: ubrk.h:102
#define ULOC_FULLNAME_CAPACITY
Useful constant for the maximum size of the whole locale ID (including the terminating NULL and all k...
Definition: uloc.h:264
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:338
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
C API: Miscellaneous definitions.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition: umisc.h:57
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:315