ISO: WG21/N0359 ANSI: 93-0152 Author: John Max Skaller Date: 11/9/93 Reply to: maxtal@suphys.physics.us.oz.au Proposed charset class for the Standard Library ----------------------------------------------- The charset class is a value oriented class that provides all the properties of a set of unsigned char with a set like notation. In addition, specialised methods provides conversions to and from strings and char*. The class is similar to bits<256>, but provides functionality and interface tailored to characters. Appropriate modifications will be required if a char does not have 8 bits. The class is intended to replace the use of C functions from ctype.h. The header file of the class is given below. If the class is approved the exact semantics can be easily defined. //----------------------------------------------------------- /* File : charset.h */ #ifndef charset_hpp #define charset_hpp class string; class charset; #include // ISO charset class // // concrete class representing a set of char // exceptions thrown - none // class charset { // REPRESENTATION // 1 bit for each of 256 unsigned characters '\x0'.. '\x256' unsigned char bits[32]; public: // CONSTRUCTORS AND CONVERSIONS // default constructor: empty set charset(); // copy constructor - compiler generated // assignment operator - compiler generated // destructor - none required // conversion from char : set containing that char only charset(unsigned char ch); // conversion from subrange of char : set containing all chars in range // x in charset if lo<=x and x<=hi charset(unsigned char lo, unsigned char hi); // conversion from string : set containing all chars in the string charset(const string &x); // conversion from char*: set containing all chars in string to null charset(unsigned char *x); // chars meeting a condition : {x| f(x)} charset(int (*f)(char)); // conversion to string of all chars in set, in ascii order string collate()const; // conversion to string of all chars in set, in ascii order // in C literal format, including trailing quotes string C_literal()const; // conversion to int : number of chars in the set int operator +() const; int count() const; // membership : s.contains(c) means char c in the set s int contains(char x) const; // COMPARISON // equality int operator ==(const charset &x) const; // inequality int operator !=(const charset &x) const; // improper subset int operator <=(const charset &x) const; // proper subset int operator <(const charset &x) const; // improper superset int operator >=(const charset &x) const; // proper superset int operator >(const charset &x) const; // MODIFIERS // add (assignment by union) charset& operator |= (const charset &x); // mask (assignment by intersection) charset& operator &= (const charset &x); // symmetric difference (assignment with symmetric difference) charset& operator ^= (const charset &x); // remove from charset& operator -= (const charset &x); // OPERATORS // complement charset operator ~() const; // union charset operator | (const charset &x) const; // intersection charset operator & (const charset &x) const; // symmetric difference (union - intersection) charset operator ^ (const charset &x) const; // set difference charset operator - (const charset &x) const; // iteration // result is -1 if no such char exists // otherwise the char is returned // x.succ(-1) == x.first() // x.prec(-1) == x.last() int first()const; int last()const; int succ(const int) const; int prec(const int) const; static const charset all, ascii, uppercase, lowercase, whitespace, letter, digit, hexdigit, octdigit, punct, alphanumeric, underscore_or_alpha, underscore_or_alphanumeric, cntrl, space, printable; }; #endif /* End charset.cpp */