// $Id: lookup.h,v 1.5 1999/02/12 14:39:13 shields Exp $ copyright notice #ifndef lookup_INCLUDED #define lookup_INCLUDED #include "config.h" #include <wchar.h> #include <string.h> #include <sys/stat.h> #include <time.h> #include "tuple.h" #include "long.h" #include "double.h" class Symbol; class PackageSymbol; class TypeSymbol; class MethodSymbol; class MethodShadowSymbol; class BlockSymbol; class VariableSymbol; class VariableShadowSymbol; class LabelSymbol; class LiteralSymbol; class NameSymbol; class PathSymbol; class DirectorySymbol; class FileSymbol; class ShadowSymbol; class LiteralValue; class IntLiteralValue; class LongLiteralValue; class FloatLiteralValue; class DoubleLiteralValue; class Utf8LiteralValue; class Utf8LiteralTable; class NameLookupTable; class LiteralLookupTable; class AstBinaryExpression; class AstExpression; class Hash { public: // // HASH takes as argument a pointer to a character string // and its length which it hashes it into a location in the name // hash table. // inline static unsigned Function(wchar_t *head, int len) { sum the last 5 characters using the polynomial (ci == char[len+i-5]) ((((((((c0 * 128)+c1)*128)+c2)*128)+c3)*128)*c4) unsigned long hash_value = head[len >> 1]; // start with center (or unique) letter wchar_t *tail = &head[len - 1]; for (int i = 0; i < 5 && head < tail; i++) { unsigned k = *tail--; hash_value += ((k << 7) + *head++); } return hash_value; } // // Same as above function for a regular "char" string. // inline static unsigned Function(char *head, int len) { sum the last 5 characters using the polynomial (ci == char[len+i-5]) ((((((((c0 * 128)+c1)*128)+c2)*128)+c3)*128)*c4) unsigned long hash_value = head[len >> 1]; // start with center (or unique) letter char *tail = &head[len - 1]; for (int i = 0; i < 5 && head < tail; i++) { unsigned k = *tail--; hash_value += ((k << 7) + *head++); } return hash_value; } inline static unsigned Function(IEEEfloat value) { return value.Word(); } inline static unsigned Function(IEEEdouble value) { unsigned result = value.HighWord() + value.LowWord(); return result; } }; class DirectoryEntry { public: DirectoryEntry *next; int length; char *name; DirectoryEntry() : name(NULL), directory(NULL), next(NULL), length(0), mtime_(0) { image = this; } virtual ~DirectoryEntry() { delete [] name; } inline void Initialize(DirectorySymbol *directory_, char *name_, int length_) { directory = directory_; length = length_; name = new char[length + 1]; memmove(name, name_, length * sizeof(char)); name[length] = U_NULL; return; } inline void Initialize(DirectoryEntry *entry, char *name_, int length_) { Initialize(entry -> directory, name_, length_); } time_t Mtime(); bool IsDummy() { return this != image; } // // See FoldedDirectoryEntry for an explanation of the use of this function // virtual DirectoryEntry *Image() { return this; } protected: DirectorySymbol *directory; DirectoryEntry *image; time_t mtime_; }; #ifdef WIN32_FILE_SYSTEM // // This object is needed only for systems such as Windows NT/95/98 that // treat filenames in a case-insensitive fashion. // class FoldedDirectoryEntry : public DirectoryEntry { public: FoldedDirectoryEntry(DirectoryEntry *image_) { DirectoryEntry::image = image_; } virtual ~FoldedDirectoryEntry() {} virtual DirectoryEntry *Image() { return image; } }; #endif class DirectoryTable { public: Tuple<DirectoryEntry *> entry_pool; DirectoryTable(int estimate = 1024); ~DirectoryTable(); DirectoryEntry *FindEntry(char *, int); DirectoryEntry *InsertEntry(DirectorySymbol *, char *, int); #ifdef WIN32_FILE_SYSTEM // // See FoldedDirectoryEntry for an explanation of the use of this function // DirectoryEntry *FindCaseInsensitiveEntry(char *, int); void InsertCaseInsensitiveEntry(DirectoryEntry *); #endif private: enum { DEFAULT_HASH_SIZE = 1021, MAX_HASH_SIZE = 8191 }; DirectoryEntry **base; int hash_size; static int primes[]; int prime_index; inline static unsigned Hash(char *head, int len) { return Hash::Function(head, len); } void Rehash(); }; class Symbol { public: Symbol *next; enum SymbolKind { NONE, NAME, PACKAGE, TYPE, // class or interface METHOD, BLOCK, VARIABLE, LABEL, LITERAL, PATH, _DIRECTORY, _FILE, _num_kinds }; SymbolKind Kind() { return _kind; } virtual wchar_t *Name() { return (wchar_t *) NULL; } virtual int NameLength() { return 0; } virtual NameSymbol *Identity() { return (NameSymbol *) NULL; } PackageSymbol *PackageCast() { return (PackageSymbol *) (_kind == PACKAGE ? this : NULL); } TypeSymbol *TypeCast() { return (TypeSymbol *) (_kind == TYPE ? this : NULL); } MethodSymbol *MethodCast() { return (MethodSymbol *) (_kind == METHOD ? this : NULL); } BlockSymbol *BlockCast() { return (BlockSymbol *) (_kind == BLOCK ? this : NULL); } VariableSymbol *VariableCast() { return (VariableSymbol *) (_kind == VARIABLE ? this : NULL); } LabelSymbol *LabelCast() { return (LabelSymbol *) (_kind == LABEL ? this : NULL); } LiteralSymbol *LiteralCast() { return (LiteralSymbol *) (_kind == LITERAL ? this : NULL); } NameSymbol *NameCast() { return (NameSymbol *) (_kind == NAME ? this : NULL); } PathSymbol *PathCast() { return (PathSymbol *) (_kind == PATH ? this : NULL); } DirectorySymbol *DirectoryCast() { return (DirectorySymbol *) (_kind == _DIRECTORY ? this : NULL); } FileSymbol *FileCast() { return (FileSymbol *) (_kind == _FILE ? this : NULL); } virtual ~Symbol() {} protected: SymbolKind _kind; }; class LiteralValue { public: LiteralValue *next; int constant_pool_index, constant_pool_class; virtual ~LiteralValue() {} }; class IntLiteralValue : public LiteralValue { public: int value; virtual ~IntLiteralValue() {} void Initialize(int value_) { value = value_; LiteralValue::constant_pool_index = 0; LiteralValue::constant_pool_class = 0; } }; class LongLiteralValue : public LiteralValue { public: LongInt value; virtual ~LongLiteralValue() {} void Initialize(LongInt value_) { value = value_; LiteralValue::constant_pool_index = 0; LiteralValue::constant_pool_class = 0; } }; class FloatLiteralValue : public LiteralValue { public: IEEEfloat value; virtual ~FloatLiteralValue() {} void Initialize(IEEEfloat value_) { value = value_; LiteralValue::constant_pool_index = 0; LiteralValue::constant_pool_class = 0; } }; class DoubleLiteralValue : public LiteralValue { public: IEEEdouble value; virtual ~DoubleLiteralValue() {} void Initialize(IEEEdouble value_) { value = value_; LiteralValue::constant_pool_index = 0; LiteralValue::constant_pool_class = 0; } }; class Utf8LiteralValue : public LiteralValue { public: char *value; int length; int constant_pool_index_String, // index when used as String constant_pool_index_Class; // index when used as Class Utf8LiteralValue() : value(NULL) {} virtual ~Utf8LiteralValue() { delete [] value; } void Initialize(char *value_, int length_, unsigned hash_address_) { LiteralValue::constant_pool_index = 0; LiteralValue::constant_pool_class = 0; constant_pool_index_String = 0; constant_pool_index_Class = 0; length = length_; value = new char[length + 1]; memmove(value, value_, length * sizeof(char)); value[length] = U_NULL; hash_address = hash_address_; } private: friend class Utf8LiteralTable; unsigned hash_address; }; class NameSymbol : public Symbol { NameSymbols are internal symbol representations. NameSymbols are hashed into a NameLookupTable using the method NameLookupTable::FindOrInsertName. They are linked in a linear chain data structure for searching using the field "next". The FindOrInsertName method automatically sets both the "next" and hash_address fields. Each NameSymbol has: name_ a wchar string length and it's length hash_address this is a hash index into the symbol hash table. Searching for a matching symbol starts with this entry and walks the chain of next pointers. index this is the position in the symbol_pool, the list of all symbols. next an inherited pointer to the next Symbol object all Symbols are singly linked thru this field. _kind an enumerated type SymbolKind from Symbol _kind is used to determine the success of casting this Symbol to the various subtypes. Utf8_literal method Name() method NameLength() method NameSymbol() method Utf8Name() method UtfNameLength() method PackageCast() method TypeCast() method BlockCast() method VariableCast() method LabelCast() method LiteralCast() method NameCast() method PathCast() method DirectoryCast() method FileCast() public: int index; Utf8LiteralValue *Utf8_literal; virtual wchar_t *Name() { return name_; } virtual int NameLength() { return length; } virtual NameSymbol *Identity() { return this; } char *Utf8Name() { return (char *) (Utf8_literal ? Utf8_literal -> value : NULL); } int Utf8NameLength() { return (Utf8_literal ? Utf8_literal -> length : 0); } NameSymbol() : name_(NULL) {} virtual ~NameSymbol() { delete [] name_; } inline void Initialize(wchar_t *str, int length_, unsigned hash_address_, int index_) { Symbol::_kind = NAME; hash_address = hash_address_; index = index_; length = length_; name_ = new wchar_t[length + 1]; memmove(name_, str, length * sizeof(wchar_t)); name_[length] = U_NULL; Utf8_literal = NULL; return; } private: friend class NameLookupTable; wchar_t *name_; int length; unsigned hash_address; }; class NameLookupTable { NameLookupTable is a combination of a linear linked list and a hash table. The hash table is stored in "base" and is an array of NameSymbol objects. Character strings representing symbols are given to the FindOrInsertName method which will hash into the base array and starts a linear search for the NameSymbol. If it is found, the NameSymbol is returned, else we do two things: we create a new NameSymbol which we chain to the end of the symbol_pool list and we create a hash entry in the base array. The newly created NameSymbol is returned. A NameLookupTable consists of: symbol_pool a Tuple of NameSymbol objects base an array of NameSymbol objects hash_size the current length of the base array constructor NameLookupTable method FindOrInsertName lookup and maybe create a NameSymbol object method Hash returns a hash code, given a string or number method Rehash which resizes the base array if > 50% full public: Tuple<NameSymbol *> symbol_pool; NameLookupTable(int estimate = 16384); ~NameLookupTable(); NameSymbol *FindOrInsertName(wchar_t *, int); private: enum { DEFAULT_HASH_SIZE = 4093, MAX_HASH_SIZE = 32771 }; NameSymbol **base; int hash_size; static int primes[]; int prime_index; inline static unsigned Hash(wchar_t *head, int len) { return Hash::Function(head, len); } void Rehash(); }; class LiteralSymbol : public Symbol { public: LiteralValue *value; virtual wchar_t *Name() { return name_; } virtual int NameLength() { return length; } virtual NameSymbol *Identity() { return (NameSymbol *) NULL; } LiteralSymbol() : name_(NULL) {} virtual ~LiteralSymbol() { delete [] name_; } void Initialize(wchar_t *str, unsigned hash_address_, int length_) { Symbol::_kind = LITERAL; hash_address = hash_address_; length = length_; name_ = new wchar_t[length + 1]; memmove(name_, str, length * sizeof(wchar_t)); name_[length] = U_NULL; value = NULL; } private: friend class LiteralLookupTable; wchar_t *name_; int length; unsigned hash_address; }; class LiteralLookupTable { public: Tuple<LiteralSymbol *> symbol_pool; LiteralLookupTable(); ~LiteralLookupTable(); LiteralSymbol *FindOrInsertLiteral(wchar_t *, int); private: enum { DEFAULT_HASH_SIZE = 1021, MAX_HASH_SIZE = 8191 }; LiteralSymbol **base; int hash_size; static int primes[]; int prime_index; inline static unsigned Hash(wchar_t *head, int len) { return Hash::Function(head, len); } void Rehash(); }; class IntLiteralTable { public: Tuple<IntLiteralValue *> symbol_pool; IntLiteralTable(LiteralValue *); ~IntLiteralTable(); LiteralValue *FindOrInsertNull() { return FindOrInsert(0); } LiteralValue *FindOrInsertChar(LiteralSymbol *); LiteralValue *FindOrInsertInt(LiteralSymbol *); LiteralValue *FindOrInsertHexInt(LiteralSymbol *); LiteralValue *FindOrInsertOctalInt(LiteralSymbol *); LiteralValue *FindOrInsertNegativeInt(LiteralSymbol *); IntLiteralValue *FindOrInsert(int); #ifdef TEST // // To prevent arithmentic conversion to allow illegal calls inadvertently. // void FindOrInsert(LongInt) {} void FindOrInsert(float) {} void FindOrInsert(double) {} #endif private: enum { DEFAULT_HASH_SIZE = 4093, MAX_HASH_SIZE = 32771 }; IntLiteralValue **base; int hash_size; static int primes[]; int prime_index; static int int32_limit; LiteralValue *bad_value; void Rehash(); }; class LongLiteralTable { public: Tuple<LongLiteralValue *> symbol_pool; LongLiteralTable(LiteralValue *); ~LongLiteralTable(); LiteralValue *FindOrInsertLong(LiteralSymbol *); LiteralValue *FindOrInsertHexLong(LiteralSymbol *); LiteralValue *FindOrInsertOctalLong(LiteralSymbol *); LiteralValue *FindOrInsertNegativeLong(LiteralSymbol *); LongLiteralValue *FindOrInsert(LongInt); #ifdef TEST // // To prevent arithmentic conversion to allow illegal calls inadvertently. // void FindOrInsert(int) {} void FindOrInsert(float) {} void FindOrInsert(double) {} #endif private: enum { DEFAULT_HASH_SIZE = 1021, MAX_HASH_SIZE = 8191 }; LongLiteralValue **base; int hash_size; static int primes[]; int prime_index; static LongInt int64_limit; LiteralValue *bad_value; void Rehash(); }; class FloatLiteralTable { public: Tuple<FloatLiteralValue *> symbol_pool; FloatLiteralTable(LiteralValue *); ~FloatLiteralTable(); LiteralValue *FindOrInsertFloat(LiteralSymbol *); FloatLiteralValue *FindOrInsert(IEEEfloat); #ifdef TEST // // To prevent arithmentic conversion to allow illegal calls inadvertently. // void FindOrInsert(int) {} void FindOrInsert(LongInt) {} void FindOrInsert(double) {} #endif private: enum { DEFAULT_HASH_SIZE = 1021, MAX_HASH_SIZE = 8191 }; FloatLiteralValue **base; int hash_size; static int primes[]; int prime_index; LiteralValue *bad_value; inline static unsigned Hash(IEEEfloat value) { return Hash::Function(value); } void Rehash(); }; class DoubleLiteralTable { public: Tuple<DoubleLiteralValue *> symbol_pool; DoubleLiteralTable(LiteralValue *); ~DoubleLiteralTable(); LiteralValue *FindOrInsertDouble(LiteralSymbol *); DoubleLiteralValue *FindOrInsert(IEEEdouble); #ifdef TEST // // To prevent arithmentic conversion to allow illegal calls inadvertently. // void FindOrInsert(int) {} void FindOrInsert(LongInt) {} void FindOrInsert(float) {} #endif private: enum { DEFAULT_HASH_SIZE = 1021, MAX_HASH_SIZE = 8191 }; DoubleLiteralValue **base; int hash_size; static int primes[]; int prime_index; LiteralValue *bad_value; inline static unsigned Hash(IEEEdouble value) { return Hash::Function(value); } void Rehash(); }; class Utf8LiteralTable { public: Tuple<Utf8LiteralValue *> symbol_pool; Utf8LiteralTable(LiteralValue *); ~Utf8LiteralTable(); LiteralValue *FindOrInsertString(LiteralSymbol *); Utf8LiteralValue *FindOrInsert(char *, int); Utf8LiteralValue *FindOrInsert(wchar_t); void CheckStringConstant(AstExpression *expr); private: Tuple<AstExpression *> *expr; bool IsConstant(AstExpression *); enum { DEFAULT_HASH_SIZE = 4093, MAX_HASH_SIZE = 32771 }; Utf8LiteralValue **base; int hash_size; static int primes[]; int prime_index; LiteralValue *bad_value; inline static unsigned Hash(char *head, int len) { return Hash::Function(head, len); } void Rehash(); }; #endif