uniset.h

Go to the documentation of this file.
00001 /*
00002 ***************************************************************************
00003 * Copyright (C) 1999-2008, International Business Machines Corporation
00004 * and others. All Rights Reserved.
00005 ***************************************************************************
00006 *   Date        Name        Description
00007 *   10/20/99    alan        Creation.
00008 ***************************************************************************
00009 */
00010 
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013 
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017 
00023 U_NAMESPACE_BEGIN
00024 
00025 class BMPSet;
00026 class ParsePosition;
00027 class SymbolTable;
00028 class UnicodeSetStringSpan;
00029 class UVector;
00030 class RuleCharacterIterator;
00031 
00272 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00273 
00274     int32_t len; // length of list used; 0 <= len <= capacity
00275     int32_t capacity; // capacity of list
00276     UChar32* list; // MUST be terminated with HIGH
00277     BMPSet *bmpSet; // The set is frozen iff either bmpSet or stringSpan is not NULL.
00278     UChar32* buffer; // internal buffer, may be NULL
00279     int32_t bufferCapacity; // capacity of buffer
00280     int32_t patLen;
00281 
00291     UChar *pat;
00292     UVector* strings; // maintained in sorted order
00293     UnicodeSetStringSpan *stringSpan;
00294 
00295 private:
00296     enum { // constants
00297         kIsBogus = 1       // This set is bogus (i.e. not valid)
00298     };
00299     uint8_t fFlags;         // Bit flag (see constants above)
00300 public:
00310     inline UBool isBogus(void) const;
00311     
00328     void setToBogus();
00329 
00330 public:
00331 
00332     enum {
00337         MIN_VALUE = 0,
00338 
00343         MAX_VALUE = 0x10ffff
00344     };
00345 
00346     //----------------------------------------------------------------
00347     // Constructors &c
00348     //----------------------------------------------------------------
00349 
00350 public:
00351 
00356     UnicodeSet();
00357 
00366     UnicodeSet(UChar32 start, UChar32 end);
00367 
00376     UnicodeSet(const UnicodeString& pattern,
00377                UErrorCode& status);
00378 
00391     UnicodeSet(const UnicodeString& pattern,
00392                uint32_t options,
00393                const SymbolTable* symbols,
00394                UErrorCode& status);
00395 
00409     UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00410                uint32_t options,
00411                const SymbolTable* symbols,
00412                UErrorCode& status);
00413 
00418     UnicodeSet(const UnicodeSet& o);
00419 
00424     virtual ~UnicodeSet();
00425 
00431     UnicodeSet& operator=(const UnicodeSet& o);
00432 
00444     virtual UBool operator==(const UnicodeSet& o) const;
00445 
00451     UBool operator!=(const UnicodeSet& o) const;
00452 
00462     virtual UnicodeFunctor* clone() const;
00463 
00471     virtual int32_t hashCode(void) const;
00472 
00473     //----------------------------------------------------------------
00474     // Freezable API
00475     //----------------------------------------------------------------
00476 
00485     inline UBool isFrozen() const;
00486 
00500     UnicodeFunctor *freeze();
00501 
00510     UnicodeFunctor *cloneAsThawed() const;
00511 
00512     //----------------------------------------------------------------
00513     // Public API
00514     //----------------------------------------------------------------
00515 
00526     UnicodeSet& set(UChar32 start, UChar32 end);
00527 
00533     static UBool resemblesPattern(const UnicodeString& pattern,
00534                                   int32_t pos);
00535 
00548     UnicodeSet& applyPattern(const UnicodeString& pattern,
00549                              UErrorCode& status);
00550 
00567     UnicodeSet& applyPattern(const UnicodeString& pattern,
00568                              uint32_t options,
00569                              const SymbolTable* symbols,
00570                              UErrorCode& status);
00571 
00603     UnicodeSet& applyPattern(const UnicodeString& pattern,
00604                              ParsePosition& pos,
00605                              uint32_t options,
00606                              const SymbolTable* symbols,
00607                              UErrorCode& status);
00608 
00622     virtual UnicodeString& toPattern(UnicodeString& result,
00623                              UBool escapeUnprintable = FALSE) const;
00624 
00647     UnicodeSet& applyIntPropertyValue(UProperty prop,
00648                                       int32_t value,
00649                                       UErrorCode& ec);
00650 
00680     UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00681                                    const UnicodeString& value,
00682                                    UErrorCode& ec);
00683 
00692     virtual int32_t size(void) const;
00693 
00700     virtual UBool isEmpty(void) const;
00701 
00709     virtual UBool contains(UChar32 c) const;
00710 
00719     virtual UBool contains(UChar32 start, UChar32 end) const;
00720 
00728     UBool contains(const UnicodeString& s) const;
00729 
00737     virtual UBool containsAll(const UnicodeSet& c) const;
00738 
00746     UBool containsAll(const UnicodeString& s) const;
00747 
00756     UBool containsNone(UChar32 start, UChar32 end) const;
00757 
00765     UBool containsNone(const UnicodeSet& c) const;
00766 
00774     UBool containsNone(const UnicodeString& s) const;
00775 
00784     inline UBool containsSome(UChar32 start, UChar32 end) const;
00785 
00793     inline UBool containsSome(const UnicodeSet& s) const;
00794 
00802     inline UBool containsSome(const UnicodeString& s) const;
00803 
00822     int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00823 
00841     int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00842 
00861     int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00862 
00880     int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00881 
00886     virtual UMatchDegree matches(const Replaceable& text,
00887                          int32_t& offset,
00888                          int32_t limit,
00889                          UBool incremental);
00890 
00891 private:
00913     static int32_t matchRest(const Replaceable& text,
00914                              int32_t start, int32_t limit,
00915                              const UnicodeString& s);
00916 
00926     int32_t findCodePoint(UChar32 c) const;
00927 
00928 public:
00929 
00937     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00938 
00947     int32_t indexOf(UChar32 c) const;
00948 
00958     UChar32 charAt(int32_t index) const;
00959 
00974     virtual UnicodeSet& add(UChar32 start, UChar32 end);
00975 
00983     UnicodeSet& add(UChar32 c);
00984 
00996     UnicodeSet& add(const UnicodeString& s);
00997 
00998  private:
01004     static int32_t getSingleCP(const UnicodeString& s);
01005 
01006     void _add(const UnicodeString& s);
01007 
01008  public:
01017     UnicodeSet& addAll(const UnicodeString& s);
01018 
01027     UnicodeSet& retainAll(const UnicodeString& s);
01028 
01037     UnicodeSet& complementAll(const UnicodeString& s);
01038 
01047     UnicodeSet& removeAll(const UnicodeString& s);
01048 
01057     static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01058 
01059 
01067     static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01068 
01082     virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01083 
01084 
01090     UnicodeSet& retain(UChar32 c);
01091 
01105     virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01106 
01114     UnicodeSet& remove(UChar32 c);
01115 
01125     UnicodeSet& remove(const UnicodeString& s);
01126 
01134     virtual UnicodeSet& complement(void);
01135 
01150     virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01151 
01159     UnicodeSet& complement(UChar32 c);
01160 
01171     UnicodeSet& complement(const UnicodeString& s);
01172 
01185     virtual UnicodeSet& addAll(const UnicodeSet& c);
01186 
01198     virtual UnicodeSet& retainAll(const UnicodeSet& c);
01199 
01211     virtual UnicodeSet& removeAll(const UnicodeSet& c);
01212 
01223     virtual UnicodeSet& complementAll(const UnicodeSet& c);
01224 
01231     virtual UnicodeSet& clear(void);
01232 
01258     UnicodeSet& closeOver(int32_t attribute);
01259 
01266     virtual UnicodeSet &removeAllStrings();
01267 
01275     virtual int32_t getRangeCount(void) const;
01276 
01284     virtual UChar32 getRangeStart(int32_t index) const;
01285 
01293     virtual UChar32 getRangeEnd(int32_t index) const;
01294 
01343     int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01344 
01351     virtual UnicodeSet& compact();
01352 
01364     static UClassID U_EXPORT2 getStaticClassID(void);
01365 
01374     virtual UClassID getDynamicClassID(void) const;
01375 
01376 private:
01377 
01378     // Private API for the USet API
01379 
01380     friend class USetAccess;
01381 
01382     int32_t getStringCount() const;
01383 
01384     const UnicodeString* getString(int32_t index) const;
01385 
01386     //----------------------------------------------------------------
01387     // RuleBasedTransliterator support
01388     //----------------------------------------------------------------
01389 
01390 private:
01391 
01397     virtual UBool matchesIndexValue(uint8_t v) const;
01398 
01399 private:
01400 
01401     //----------------------------------------------------------------
01402     // Implementation: Clone as thawed (see ICU4J Freezable)
01403     //----------------------------------------------------------------
01404 
01405     UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
01406 
01407     //----------------------------------------------------------------
01408     // Implementation: Pattern parsing
01409     //----------------------------------------------------------------
01410 
01411     void applyPattern(RuleCharacterIterator& chars,
01412                       const SymbolTable* symbols,
01413                       UnicodeString& rebuiltPat,
01414                       uint32_t options,
01415                       UErrorCode& ec);
01416 
01417     //----------------------------------------------------------------
01418     // Implementation: Utility methods
01419     //----------------------------------------------------------------
01420 
01421     void ensureCapacity(int32_t newLen, UErrorCode& ec);
01422 
01423     void ensureBufferCapacity(int32_t newLen, UErrorCode& ec);
01424 
01425     void swapBuffers(void);
01426 
01427     UBool allocateStrings(UErrorCode &status);
01428 
01429     UnicodeString& _toPattern(UnicodeString& result,
01430                               UBool escapeUnprintable) const;
01431 
01432     UnicodeString& _generatePattern(UnicodeString& result,
01433                                     UBool escapeUnprintable) const;
01434 
01435     static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01436 
01437     static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01438 
01439     //----------------------------------------------------------------
01440     // Implementation: Fundamental operators
01441     //----------------------------------------------------------------
01442 
01443     void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01444 
01445     void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01446 
01447     void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01448 
01454     static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01455                                           int32_t pos);
01456 
01457     static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01458                                           int32_t iterOpts);
01459 
01498     UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01499                                      ParsePosition& ppos,
01500                                      UErrorCode &ec);
01501 
01502     void applyPropertyPattern(RuleCharacterIterator& chars,
01503                               UnicodeString& rebuiltPat,
01504                               UErrorCode& ec);
01505 
01506     static const UnicodeSet* getInclusions(int32_t src, UErrorCode &status);
01507 
01512     typedef UBool (*Filter)(UChar32 codePoint, void* context);
01513 
01523     void applyFilter(Filter filter,
01524                      void* context,
01525                      int32_t src,
01526                      UErrorCode &status);
01527 
01531     void setPattern(const UnicodeString& newPat);
01535     void releasePattern();
01536 
01537     friend class UnicodeSetIterator;
01538 };
01539 
01540 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01541     return !operator==(o);
01542 }
01543 
01544 inline UBool UnicodeSet::isFrozen() const {
01545     return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01546 }
01547 
01548 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01549     return !containsNone(start, end);
01550 }
01551 
01552 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01553     return !containsNone(s);
01554 }
01555 
01556 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01557     return !containsNone(s);
01558 }
01559 
01560 inline UBool UnicodeSet::isBogus() const {
01561     return (UBool)(fFlags & kIsBogus);
01562 }
01563 
01564 U_NAMESPACE_END
01565 
01566 #endif

Generated on Sat Oct 3 23:25:34 2009 for ICU 4.0 by  doxygen 1.4.7