// $Id: stream.h,v 1.3 1999/01/25 20:00:31 shields Exp $ copyright notice #ifndef stream_INCLUDED #define stream_INCLUDED #include "config.h" #include <sys/stat.h> #include <limits.h> #include <iostream.h> #include <stddef.h> #include <stdio.h> #include "javadef.h" #include "javasym.h" #include "tuple.h" #include "bool.h" #include "tab.h" #include "lookup.h" #include "unicode.h" class Control; class Input_info; class Scanner; class Symbol; class FileSymbol; class ZipFile; class StreamError { public: enum StreamErrorKind { BAD_TOKEN, BAD_OCTAL_CONSTANT, EMPTY_CHARACTER_CONSTANT, UNTERMINATED_CHARACTER_CONSTANT, UNTERMINATED_COMMENT, UNTERMINATED_STRING_CONSTANT, INVALID_HEX_CONSTANT, INVALID_FLOATING_CONSTANT_EXPONENT, INVALID_UNICODE_ESCAPE }; unsigned start_location, end_location; StreamErrorKind kind; void Initialize(StreamErrorKind kind_, unsigned start_location_, unsigned end_location_) { kind = kind_; start_location = start_location_; end_location = end_location_; return; } }; // // LexStream holds a stream of tokens generated from an input and // provides methods to retrieve information from the stream. // class LexStream { public: typedef int TypeIndex; typedef int TokenIndex; typedef int CommentIndex; enum { INFINITY = INT_MAX }; // the largest possible value for TokenIndex FileSymbol *file_symbol; inline TokenIndex Next(TokenIndex i) { return (++i < token_stream.Length() ? i : token_stream.Length() - 1); } inline TokenIndex Previous(TokenIndex i) { return (i <= 0 ? 0 : i - 1); } inline TokenIndex Peek() { return Next(index); } inline void Reset(TokenIndex i = 1) { index = Previous(i); } inline TokenIndex Gettoken() { return index = Next(index); } inline TokenIndex Gettoken(TokenIndex end_token) { return index = (index < end_token ? Next(index) : token_stream.Length() - 1); } inline TokenIndex Badtoken() { return 0; } inline unsigned Kind(TokenIndex i) { return tokens[i].Kind(); } inline unsigned Location(TokenIndex i) { return tokens[i].Location(); } inline unsigned Line(TokenIndex i) { return FindLine(tokens[i].Location()); } inline unsigned Column(TokenIndex i) { return columns ? columns[i] : (input_buffer ? FindColumn(tokens[i].Location()) : 0); } inline bool AfterEol(TokenIndex i) { return (i < 1 ? true : Line(i - 1) < Line(i)); } inline TokenIndex MatchingBrace(TokenIndex i) { return tokens[i].additional_info.right_brace; } inline Symbol *NameSymbol(TokenIndex i) { return (Kind(i) != TK_LBRACE ? tokens[i].additional_info.symbol : (Symbol *) NULL); } wchar_t *Name(TokenIndex i) { return (NameSymbol(i) ? tokens[i].additional_info.symbol -> Name() : KeywordName(tokens[i].Kind())); } int NameLength(TokenIndex i) { return (NameSymbol(i) ? tokens[i].additional_info.symbol -> NameLength() : wcslen(KeywordName(tokens[i].Kind()))); } char *FileName(); int FileNameLength(); inline int LineLength(unsigned line_no) { return locations[line_no + 1] - locations[line_no]; } inline int LineStart(unsigned line_no) { return locations[line_no]; } inline int LineEnd(unsigned line_no) { return locations[line_no + 1] - 1; } inline int LineSegmentLength(TokenIndex i) { return Tab::Wcslen(input_buffer, tokens[i].Location(), LineEnd(Line(i))); } wchar_t *InputBuffer() { return input_buffer; } size_t InputBufferLength() { return input_buffer_length; } CommentIndex FirstComment(TokenIndex); inline int NumTypes() { return type_index.Length(); } inline TokenIndex Type(int i) { return types[i]; } inline int NumTokens() { return token_stream.Length(); } inline int NumComments() { return comment_stream.Length(); } inline TokenIndex PrecedingToken(CommentIndex i) { return comments[i].previous_token; } inline unsigned CommentLocation(CommentIndex i) { return comments[i].location; } inline wchar_t *CommentString(CommentIndex i) { return comments[i].string; } inline int CommentStringLength(CommentIndex i) { return comments[i].length; } inline int NumBadTokens() { return bad_tokens.Length(); } #ifdef TEST int file_read; #endif //* //* Constructors and Destructor. //* LexStream(Control &control_, FileSymbol *file_symbol_) : control(control_), file_symbol(file_symbol_), initial_reading_of_input(true), input_buffer(NULL), input_buffer_length(0), comment_buffer(NULL), token_stream(12, 16), tokens(NULL), columns(NULL), comment_stream(10, 8), comments(NULL), line_location(12, 8), locations(NULL) #ifdef TEST , file_read(0) #endif {} bool ComputeColumns() { RereadInput(); if (input_buffer) InitializeColumns(); DestroyInput(); return (columns != NULL); } void RereadInput(); ~LexStream(); void DestroyInput() { delete [] input_buffer; input_buffer = NULL; delete [] comment_buffer; comment_buffer = NULL; } void SortMessages(); void PrintMessages(); void PrintEmacsMessage(int); void PrintSmallSource(int); void PrintLargeSource(int); void PrintMessage(StreamError::StreamErrorKind); void SetUpComments() { RereadInput(); // // Calculate the length of the string required to save the comments. // Allocate the buffer, save the comments in the buffer and update their // respective "string" pointer. // int length = 0, i; for (i = 1; i < comment_stream.Length(); i++) length += (comments[i].length + 1); comment_buffer = new wchar_t[length]; wchar_t *ptr = comment_buffer; for (i = 1; i < comment_stream.Length(); i++) { memmove(ptr, &(input_buffer[comments[i].location]), comments[i].length * sizeof(wchar_t)); comments[i].string = ptr; ptr += comments[i].length; *ptr++ = U_NULL; } return; } #ifdef TEST void LexStream::Dump(); // temporary function used to dump token stream. #endif // // Return the total size of space allocated for the tokens. // size_t TokenSpaceAllocated(void) { return token_stream.Length() * sizeof(Token); } // // Return the total size of space allocated for the comments. // size_t CommentSpaceAllocated(void) { return comment_stream.Length() * sizeof(Comment); } private: friend class Scanner; class Token { public: unsigned info; union { Symbol *symbol; TokenIndex right_brace; } additional_info; inline void SetLocation(unsigned location) { info = (info & 0x000000FF) | (location << 8); } inline unsigned Location() { return (info >> 8); } inline void SetKind(unsigned kind) { info = (info & 0xFFFFFF80) | kind; } inline unsigned Kind() { return (info & 0x0000007F); } // // Note that the bit 0x00000080 is currently unused. It might be used in the future // either to record some boolean fact about a token, To allow the number of token // kinds to be expanded from 127 to 255 or to allow the number of input locations // to be expanded from 16M to 32M. // inline void SetSymbol(Symbol *symbol) { additional_info.symbol = symbol; } inline void SetRightBrace(TokenIndex right_brace) { additional_info.right_brace = right_brace; } }; class Comment { public: TokenIndex previous_token; unsigned location; unsigned length; wchar_t *string; }; Tuple<StreamError> bad_tokens; TokenIndex index; Token *tokens; unsigned short *columns; ConvertibleArray<Token> token_stream; Comment *comments; ConvertibleArray<Comment> comment_stream; unsigned *locations; ConvertibleArray<unsigned> line_location; TokenIndex *types; ConvertibleArray<TokenIndex> type_index; void InitializeColumns(); void CompressSpace(); bool initial_reading_of_input; wchar_t *input_buffer; size_t input_buffer_length; wchar_t *comment_buffer; Control &control; void ReadInput(); void ProcessInput(char *, long); wchar_t *KeywordName(int); unsigned FindLine(unsigned location); unsigned FindColumn(unsigned location) { assert(locations); return Tab::Wcslen(input_buffer, locations[FindLine(location)], location); } }; #endif