scanner.cpp

// $Id: scanner.cpp,v 1.5 1999/02/17 19:07:57 shields Exp $
copyright notice

#include "config.h"
#include "scanner.h"
#include "control.h"
#include "error.h"

int (*Scanner::scan_keyword[13]) (wchar_t *p1) =
{
    ScanKeyword0,
    ScanKeyword0,
    ScanKeyword2,
    ScanKeyword3,
    ScanKeyword4,
    ScanKeyword5,
    ScanKeyword6,
    ScanKeyword7,
    ScanKeyword8,
    ScanKeyword9,
    ScanKeyword10,
    ScanKeyword0,
    ScanKeyword12
};


//
// The constructor initializes all utility variables.
//
Scanner::Scanner(Control &control_) : control(control_)
{
    //
    // If this assertion fails, the Token structure in stream.h must be redesigned !!!
    //
    assert(NUM_TERMINALS < 128);

    //
    // -------------------------------------------------------------------------------
    // We are pulling this code out because we are tired of defending it. We
    // tought it was obvious that either $ should not have been used for compiler
    // generated variables or that users should not be allowed to use in variable names...
    // -------------------------------------------------------------------------------
    //
    // For version 1.1 or above a $ may not be used as part of an identifier name
    // unless the user specifically requests that it be allowed.
    //
    //    if (control.option.one_one && (! control.option.dollar))
    //        Code::SetBadCode(U_DOLLAR);
    //

    //
    // CLASSIFY_TOKEN is a mapping from each character into a     
    // classification routine that is invoked when that character 
    // is the first character encountered in a token.             
    //
    for (int c = 0; c < 128; c++)
    {
        if (Code::IsAlpha(c))
             classify_token[c] = &Scanner::ClassifyId;
        else if (Code::IsDigit(c))
             classify_token[c] = &Scanner::ClassifyNumericLiteral;
        else classify_token[c] = &Scanner::ClassifyBadToken;
    }
    classify_token[128] = &Scanner::ClassifyNonAsciiUnicode;

    classify_token[U_a] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_b] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_c] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_d] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_e] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_f] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_g] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_i] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_l] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_n] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_p] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_r] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_s] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_t] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_v] = &Scanner::ClassifyIdOrKeyword;
    classify_token[U_w] = &Scanner::ClassifyIdOrKeyword;
    
    classify_token[U_SINGLE_QUOTE]       = &Scanner::ClassifyCharLiteral;
    classify_token[U_DOUBLE_QUOTE]       = &Scanner::ClassifyStringLiteral;

    classify_token[U_PLUS]               = &Scanner::ClassifyPlus;
    classify_token[U_MINUS]              = &Scanner::ClassifyMinus;
    classify_token[U_EXCLAMATION]        = &Scanner::ClassifyNot;
    classify_token[U_PERCENT]            = &Scanner::ClassifyMod;
    classify_token[U_CARET]              = &Scanner::ClassifyXor;
    classify_token[U_AMPERSAND]          = &Scanner::ClassifyAnd;
    classify_token[U_STAR]               = &Scanner::ClassifyStar;
    classify_token[U_BAR]                = &Scanner::ClassifyOr;
    classify_token[U_TILDE]              = &Scanner::ClassifyComplement;
    classify_token[U_SLASH]              = &Scanner::ClassifySlash;
    classify_token[U_GREATER]            = &Scanner::ClassifyGreater;
    classify_token[U_LESS]               = &Scanner::ClassifyLess;
    classify_token[U_LEFT_PARENTHESIS]   = &Scanner::ClassifyLparen;
    classify_token[U_RIGHT_PARENTHESIS]  = &Scanner::ClassifyRparen;
    classify_token[U_LEFT_BRACE]         = &Scanner::ClassifyLbrace;
    classify_token[U_RIGHT_BRACE]        = &Scanner::ClassifyRbrace;
    classify_token[U_LEFT_BRACKET]       = &Scanner::ClassifyLbracket;
    classify_token[U_RIGHT_BRACKET]      = &Scanner::ClassifyRbracket;
    classify_token[U_SEMICOLON]          = &Scanner::ClassifySemicolon;
    classify_token[U_QUESTION]           = &Scanner::ClassifyQuestion;
    classify_token[U_COLON]              = &Scanner::ClassifyColon;
    classify_token[U_COMMA]              = &Scanner::ClassifyComma;
    classify_token[U_DOT]                = &Scanner::ClassifyPeriod;
    classify_token[U_EQUAL]              = &Scanner::ClassifyEqual;

    return;
}


//
// Associate a lexical stream with this file
//
void Scanner::Initialize(FileSymbol *file_symbol)
{
    lex = new LexStream(control, file_symbol);
    lex -> Reset();

    LexStream::Token *current_token = &(lex -> token_stream.Next()); // add 0th token !
    current_token -> SetKind(0);
    current_token -> SetLocation(0);
    current_token -> SetSymbol(NULL);

    if (control.option.comments)
    {
        LexStream::Comment *current_comment = &(lex -> comment_stream.Next()); // add 0th comment !
        current_comment -> string = NULL;
        current_comment -> length = 0;
        current_comment -> previous_token = -1; // No token precedes this comment
        current_comment -> location = 0;
    }

    lex -> line_location.Next() = 0; // mark starting location of line # 0

    return;
}


//
// This is one of the main entry point for the Java lexical analyser.
// Its input is the name of a regular text file. Its output is a stream
// of tokens.
//
void Scanner::SetUp(FileSymbol *file_symbol)
{
    Initialize(file_symbol);
    lex -> CompressSpace();
    file_symbol -> lex_stream = lex;

    return;
}


//
// This is one of the main entry point for the Java lexical analyser.
// Its input is the name of a regular text file. Its output is a stream
// of tokens.
//
void Scanner::Scan(FileSymbol *file_symbol)
{
    Initialize(file_symbol);

    lex -> ReadInput();

    cursor = lex -> InputBuffer();
    if (cursor)
    {
        Scan();
        lex -> CompressSpace();

        //
        //
        //
        if (control.option.dump_errors)
        {
            lex -> SortMessages();
            for (int i = 0; i < lex -> bad_tokens.Length(); i++) 
                lex -> PrintEmacsMessage(i);
            cout.flush();
        }
        lex -> DestroyInput(); // get rid of input buffer
    }
    else
    {
        delete lex;
        lex = NULL;
    }

    file_symbol -> lex_stream = lex;

    return;
}


//
// Scan the InputBuffer() and process all tokens and comments.
//
void Scanner::Scan()
{
    wchar_t *input_buffer_tail = &cursor[lex -> InputBufferLength()];

    //
    // CURSOR is assumed to point to the next character to be scanned.
    // Using CURSOR,we jump to the proper classification function
    // which scans and classifies the token and returns the location of
    // the character immediately following it.
    //
    do
    {
        SkipSpaces();
        (this ->* classify_token[*cursor < 128 ? *cursor : 128])();
    } while (cursor < input_buffer_tail);

    //
    // Add a a gate after the last line.
    //
    lex -> line_location.Next() = input_buffer_tail - lex -> InputBuffer();

    //
    // If the brace_stack is not empty, then there are unmatched left
    // braces in the input. Each unmatched left brace should point to 
    // the EOF token as a substitute for a matching right brace.
    //
    for (LexStream::TokenIndex left_brace = brace_stack.Top(); left_brace; left_brace = brace_stack.Top())
    {
        lex -> token_stream[left_brace].SetRightBrace(lex -> token_stream.Length() - 1);
        brace_stack.Pop();
    }

    return;
}


//
// CURSOR points to the starting position of a comment.  Scan the      
// the comment and return the location of the character immediately   
// following it. CURSOR may be destroyed.                              
//
void Scanner::ScanStarComment()
{
    LexStream::Comment *current_comment = (control.option.comments ? &(lex -> comment_stream.Next()) : new LexStream::Comment());
    current_comment -> string = NULL;
    current_comment -> previous_token = lex -> token_stream.Length() - 2; // the token that precedes this comment
    current_comment -> location = cursor - lex -> InputBuffer();

    cursor += 2;

    for (;;)
    {
        while (*cursor != U_STAR && (! Code::IsNewline(*cursor)) && *cursor != U_CTL_Z)
            cursor++;

        if (*cursor == U_STAR) // Potential comment closer 
        {
            while (*++cursor == U_STAR)
                ;
            if (*cursor == U_SLASH)
            {
                cursor++;
                current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
                if (! control.option.comments)
                    delete current_comment;
                return;
            }
        }
        else if (Code::IsNewline(*cursor)) // Record new line 
        {
            cursor++;
            lex -> line_location.Next() = cursor - lex -> InputBuffer();
        }
        else break;
    }

    lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_COMMENT,
                                        current_comment -> location,
                                        (unsigned) (cursor - lex -> InputBuffer()) - 1);

    current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;

    if (! control.option.comments)
        delete current_comment;

    return;
}


//
//
//
void Scanner::ScanSlashComment()
{
    if (control.option.comments)
    {
        LexStream::Comment *current_comment = &(lex -> comment_stream.Next());
        current_comment -> string = NULL;
        current_comment -> previous_token = lex -> token_stream.Length() - 2;  // the token that precedes this comment
        current_comment -> location = cursor - lex -> InputBuffer();
        for (cursor += 2; ! Code::IsNewline(*cursor); cursor++)  // skip all until \n
            ;
        current_comment -> length = (cursor - lex -> InputBuffer()) - current_comment -> location;
    }
    else
    {
        for (cursor += 2; ! Code::IsNewline(*cursor); cursor++)  // skip all until \n
            ;
    }

    return;
}


//
// This procedure is invoked to skip useless spaces in the input.     
// It assumes upon entry that CURSOR points to the next character to   
// be scanned.  Before returning it sets CURSOR to the location of the 
// first non-space character following its initial position.          
//
inline void Scanner::SkipSpaces()
{
    do
    {
        while (Code::IsSpaceButNotNewline(*cursor))
            cursor++;
        while (Code::IsNewline(*cursor))        // starting a new line? 
        {
            cursor++;
            lex -> line_location.Next() = cursor - lex -> InputBuffer();
            while (Code::IsSpaceButNotNewline(*cursor))
                cursor++;
        }

        while (*cursor == U_SLASH)
        {
            if (cursor[1] == U_STAR)
                 ScanStarComment();
            else if (cursor[1] == U_SLASH)
                 ScanSlashComment();
            else break;
        }
    } while (Code::IsSpace(*cursor));

    return;
}


/**********************************************************************/
/**********************************************************************/
/**                                                                  **/
/**                           scan_keyword(i):                       **/
/**                                                                  **/
/**********************************************************************/
/**********************************************************************/
/**                                                                  **/
/** Scan an identifier of length I and determine if it is a keyword. **/
/**                                                                  **/
/**********************************************************************/
/**********************************************************************/
int Scanner::ScanKeyword0(wchar_t *p1)
{
    return TK_Identifier;
}

int Scanner::ScanKeyword2(wchar_t *p1)
{
    if (p1[0] == U_d && p1[1] == U_o)
        return TK_do;
    else if (p1[0] == U_i && p1[1] == U_f)
        return TK_if;

    return TK_Identifier;
}

int Scanner::ScanKeyword3(wchar_t *p1)
{
    switch(*p1)
    {
        case U_f:
            if (p1[1] == U_o && p1[2] == U_r)
                return TK_for;
            break;
        case U_i:
            if (p1[1] == U_n && p1[2] == U_t)
                return TK_int;
            break;
        case U_n:
            if (p1[1] == U_e && p1[2] == U_w)
                return TK_new;
            break;
        case U_t:
            if (p1[1] == U_r && p1[2] == U_y)
                return TK_try;
            break;
    }

    return TK_Identifier;
}

int Scanner::ScanKeyword4(wchar_t *p1)
{
    switch (*p1)
    {
        case U_b:
            if (p1[1] == U_y && p1[2] == U_t && p1[3] == U_e)
                return TK_byte;
            break;
        case U_c:
            if (p1[1] == U_a && p1[2] == U_s && p1[3] == U_e)
                return TK_case;
            else if (p1[1] == U_h && p1[2] == U_a && p1[3] == U_r)
                return TK_char;
            break;
        case U_e:
            if (p1[1] == U_l && p1[2] == U_s && p1[3] == U_e)
                return TK_else;
            break;
        case U_g:
            if (p1[1] == U_o && p1[2] == U_t && p1[3] == U_o)
                return TK_goto;
            break;
        case U_l:
            if (p1[1] == U_o && p1[2] == U_n && p1[3] == U_g)
                return TK_long;
            break;
        case U_n:
            if (p1[1] == U_u && p1[2] == U_l && p1[3] == U_l)
                return TK_null;
            break;
        case U_t:
            if (p1[1] == U_h && p1[2] == U_i && p1[3] == U_s)
                return TK_this;
            else if (p1[1] == U_r && p1[2] == U_u && p1[3] == U_e)
                return TK_true;
            break;
        case U_v:
            if (p1[1] == U_o && p1[2] == U_i && p1[3] == U_d)
                return TK_void;
            break;
    }

    return TK_Identifier;
}

int Scanner::ScanKeyword5(wchar_t *p1)
{
    switch (*p1)
    {
        case U_b:
            if (p1[1] == U_r && p1[2] == U_e &&
                p1[3] == U_a && p1[4] == U_k)
                return TK_break;
            break;
        case U_c:
            if (p1[1] == U_a && p1[2] == U_t &&
                p1[3] == U_c && p1[4] == U_h)
                return TK_catch;
            else if (p1[1] == U_l && p1[2] == U_a &&
                     p1[3] == U_s && p1[4] == U_s)
                return TK_class;
            else if (p1[1] == U_o && p1[2] == U_n &&
                     p1[3] == U_s && p1[4] == U_t)
                return TK_const;
            break;
        case U_f:
            if (p1[1] == U_a && p1[2] == U_l &&
                p1[3] == U_s && p1[4] == U_e)
                return TK_false;
            else if (p1[1] == U_i && p1[2] == U_n &&
                     p1[3] == U_a && p1[4] == U_l)
                return TK_final;
            else if (p1[1] == U_l && p1[2] == U_o &&
                     p1[3] == U_a && p1[4] == U_t)
                return TK_float;
            break;
        case U_s:
            if (p1[1] == U_h && p1[2] == U_o &&
                p1[3] == U_r && p1[4] == U_t)
                return TK_short;
            else if (p1[1] == U_u && p1[2] == U_p &&
                     p1[3] == U_e && p1[4] == U_r)
                return TK_super;
            break;
        case U_t:
            if (p1[1] == U_h && p1[2] == U_r &&
                p1[3] == U_o && p1[4] == U_w)
                return TK_throw;
            break;
        case U_w:
            if (p1[1] == U_h && p1[2] == U_i &&
                p1[3] == U_l && p1[4] == U_e)
                return TK_while;
            break;
    }

    return TK_Identifier;
}

int Scanner::ScanKeyword6(wchar_t *p1)
{
    switch (*p1)
    {
        case U_d:
            if (p1[1] == U_o && p1[2] == U_u &&
                     p1[3] == U_b && p1[4] == U_l && p1[5] == U_e)
                return TK_double;
            break;
        case U_i:
            if (p1[1] == U_m && p1[2] == U_p &&
                p1[3] == U_o && p1[4] == U_r && p1[5] == U_t)
                return TK_import;
            break;
        case U_n:
            if (p1[1] == U_a && p1[2] == U_t &&
                p1[3] == U_i && p1[4] == U_v && p1[5] == U_e)
                return TK_native;
            break;
        case U_p:
            if (p1[1] == U_u && p1[2] == U_b &&
                p1[3] == U_l && p1[4] == U_i && p1[5] == U_c)
                return TK_public;
            break;
        case U_r:
            if (p1[1] == U_e && p1[2] == U_t &&
                p1[3] == U_u && p1[4] == U_r && p1[5] == U_n)
                return TK_return;
            break;
        case U_s:
            if (p1[1] == U_t && p1[2] == U_a &&
                p1[3] == U_t && p1[4] == U_i && p1[5] == U_c)
                    return TK_static;
            else if (p1[1] == U_w && p1[2] == U_i &&
                     p1[3] == U_t && p1[4] == U_c && p1[5] == U_h)
                return TK_switch;
            break;
        case U_t:
            if (p1[1] == U_h && p1[2] == U_r &&
                p1[3] == U_o && p1[4] == U_w && p1[5] == U_s)
                return TK_throws;
            break;
    }

    return TK_Identifier;
}

int Scanner::ScanKeyword7(wchar_t *p1)
{
    switch(*p1)
    {
        case U_b:
            if (p1[1] == U_o && p1[2] == U_o && p1[3] == U_l &&
                p1[4] == U_e && p1[5] == U_a && p1[6] == U_n)
                return TK_boolean;
        case U_d:
            if (p1[1] == U_e && p1[2] == U_f && p1[3] == U_a &&
                p1[4] == U_u && p1[5] == U_l && p1[6] == U_t)
                return TK_default;
            break;
        case U_e:
            if (p1[1] == U_x && p1[2] == U_t && p1[3] == U_e &&
                p1[4] == U_n && p1[5] == U_d && p1[6] == U_s)
                return TK_extends;
            break;
        case U_f:
            if (p1[1] == U_i && p1[2] == U_n && p1[3] == U_a &&
                p1[4] == U_l && p1[5] == U_l && p1[6] == U_y)
                return TK_finally;
            break;
        case U_p:
            if (p1[1] == U_a && p1[2] == U_c && p1[3] == U_k &&
                p1[4] == U_a && p1[5] == U_g && p1[6] == U_e)
                return TK_package;
            else if (p1[1] == U_r && p1[2] == U_i && p1[3] == U_v &&
                     p1[4] == U_a && p1[5] == U_t && p1[6] == U_e)
                return TK_private;
            break;
    }

    return TK_Identifier;
}

int Scanner::ScanKeyword8(wchar_t *p1)
{
    switch(*p1)
    {
        case U_a:
            if (p1[1] == U_b && p1[2] == U_s &&
                p1[3] == U_t && p1[4] == U_r &&
                p1[5] == U_a && p1[6] == U_c && p1[7] == U_t)
                 return TK_abstract;
            break;
        case U_c:
            if (p1[1] == U_o && p1[2] == U_n &&
                p1[3] == U_t && p1[4] == U_i &&
                p1[5] == U_n && p1[6] == U_u && p1[7] == U_e)
                 return TK_continue;
            break;
        case U_s:
            if (p1[1] == U_t && p1[2] == U_r &&
                p1[3] == U_i && p1[4] == U_c &&
                p1[5] == U_t && p1[6] == U_f && p1[7] == U_p)
                 return TK_strictfp;
            break;
        case U_v:
            if (p1[1] == U_o && p1[2] == U_l &&
                p1[3] == U_a && p1[4] == U_t &&
                p1[5] == U_i && p1[6] == U_l && p1[7] == U_e)
                 return TK_volatile;
            break;
    }

    return TK_Identifier;
}

int Scanner::ScanKeyword9(wchar_t *p1)
{
    if (p1[0] == U_i && p1[1] == U_n && p1[2] == U_t &&
        p1[3] == U_e && p1[4] == U_r && p1[5] == U_f &&
        p1[6] == U_a && p1[7] == U_c && p1[8] == U_e)
        return TK_interface;
    else if (p1[0] == U_p && p1[1] == U_r && p1[2] == U_o &&
             p1[3] == U_t && p1[4] == U_e && p1[5] == U_c &&
             p1[6] == U_t && p1[7] == U_e && p1[8] == U_d)
        return TK_protected;
    else if (p1[0] == U_t && p1[1] == U_r && p1[2] == U_a &&
             p1[3] == U_n && p1[4] == U_s && p1[5] == U_i &&
             p1[6] == U_e && p1[7] == U_n && p1[8] == U_t)
        return TK_transient;

    return TK_Identifier;
}

int Scanner::ScanKeyword10(wchar_t *p1)
{
    if (p1[0] == U_i && p1[1] == U_m && p1[2] == U_p &&
        p1[3] == U_l && p1[4] == U_e && p1[5] == U_m &&
        p1[6] == U_e && p1[7] == U_n && p1[8] == U_t && p1[9] == U_s)
        return TK_implements;
    else if (p1[0] == U_i && p1[1] == U_n && p1[2] == U_s &&
             p1[3] == U_t && p1[4] == U_a && p1[5] == U_n &&
             p1[6] == U_c && p1[7] == U_e && p1[8] == U_o && p1[9] == U_f)
        return TK_instanceof;

    return TK_Identifier;
}

int Scanner::ScanKeyword12(wchar_t *p1)
{
    if (p1[0] == U_s && p1[1] == U_y && p1[2] == U_n &&
        p1[3] == U_c && p1[4] == U_h && p1[5] == U_r &&
        p1[6] == U_o && p1[7] == U_n && p1[8] == U_i &&
        p1[9] == U_z && p1[10] == U_e&& p1[11] == U_d)
        return TK_synchronized;

    return TK_Identifier;
}

/**********************************************************************/
/*                           CHECK_OctalLiteral:                      */
/**********************************************************************/
/* Verify that an octal token is legal. If not, issue a message.      */
/**********************************************************************/
inline void Scanner::CheckOctalLiteral(wchar_t *cursor, wchar_t *tail)
{
    if (cursor[0] == U_0 && cursor[1] != U_x && cursor[1] != U_X)
    {
        wchar_t *p;
        for (p = cursor + 1; p < tail; p++)
        {
            if (*p == U_8 || *p == U_9)
                break;
        }

        if (p < tail)
            lex -> bad_tokens.Next().Initialize(StreamError::BAD_OCTAL_CONSTANT,
                                                (unsigned) (cursor - lex -> InputBuffer()),
                                                (unsigned) (tail - lex -> InputBuffer()) - 1);
    }

    return;
}


/**********************************************************************/
/*                      ClassifyCharLiteral:                          */
/**********************************************************************/
/* This procedure is invoked to scan a character literal or a large   */
/* character literal. A large character literal is preceded by the    */
/* letter L (capital L). After the character literal has been scanned */
/* and classified, it is entered in the table without its closing     */
/* quote but with the opening quote (preceded by L if it's a large    */
/* character literal).                                                */
/**********************************************************************/
void Scanner::ClassifyCharLiteral()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_CharacterLiteral);

    wchar_t *ptr = cursor + 1;

    while (*ptr != U_SINGLE_QUOTE && (! Code::IsNewline(*ptr)))
    {
        if (*ptr++ == U_BACKSLASH)   // In any case, skip the character
        {                            // If it was a backslash, 
            if (! Code::IsNewline(*ptr)) // if the next char is not eol, skip it.
                ptr++;
        }
    }

    int len = ptr - cursor;
    if (*ptr == U_SINGLE_QUOTE)
    {
        if (len == 1)
            lex -> bad_tokens.Next().Initialize(StreamError::EMPTY_CHARACTER_CONSTANT,
                                                current_token -> Location(),
                                                (unsigned) (ptr - lex -> InputBuffer()));
        ptr++;
    }
    else
    {
        if (len == 1) /* Definitely, an isolated quote */
            current_token -> SetKind(0);
        lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_CHARACTER_CONSTANT,
                                            current_token -> Location(),
                                            (unsigned) (ptr - lex -> InputBuffer()) - 1);
    }

    current_token -> SetSymbol(control.char_table.FindOrInsertLiteral(cursor, ptr - cursor));

    cursor = ptr;
    return;
}


/**********************************************************************/
/*                     CLASSIFY_STRINGLITERAL:                        */
/**********************************************************************/
/* This procedure is invoked to scan a string literal or a large      */
/* string literal. A large string literal is preceded by the letter   */
/* L (capital L). After the string literal has been scanned and       */
/* classified, it is entered in the table without its closing double  */
/* quote but with the opening quote (preceded by L if it's a large    */
/* string literal).                                                   */
/**********************************************************************/
void Scanner::ClassifyStringLiteral()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_StringLiteral);

    wchar_t *ptr = cursor + 1;

    while (*ptr != U_DOUBLE_QUOTE && (! Code::IsNewline(*ptr)))
    {
        if (*ptr++ == U_BACKSLASH)   // In any case, skip the character
        {                            // If it was a backslash, 
            if (! Code::IsNewline(*ptr)) // if the next char is not eol, skip it.
                ptr++;
        }
    }

    if (*ptr == U_DOUBLE_QUOTE)
        ptr++;
    else
    {
        if ((ptr - cursor) == 1) /* Definitely, an isolated double quote */
            current_token -> SetKind(0);
        lex -> bad_tokens.Next().Initialize(StreamError::UNTERMINATED_STRING_CONSTANT,
                                            current_token -> Location(),
                                            (unsigned) (ptr - lex -> InputBuffer()) - 1);
    }

    current_token -> SetSymbol(control.string_table.FindOrInsertLiteral(cursor, ptr - cursor));

    cursor = ptr;
    return;
}


/**********************************************************************/
/*                     CLASSIFYIDORKEYWORD:                        */
/**********************************************************************/
/* This procedure is invoked when CURSOR points to one of the         */
/* following characters:                                              */
/*                                                                    */
/*      'a'                                                           */
/*      'b'                                                           */
/*      'c'                                                           */
/*      'd'                                                           */
/*      'e'                                                           */
/*      'f'                                                           */
/*      'g'                                                           */
/*      'i'                                                           */
/*      'l'                                                           */
/*      'n'                                                           */
/*      'o'                                                           */
/*      'p'                                                           */
/*      'r'                                                           */
/*      's'                                                           */
/*      't'                                                           */
/*      'v'                                                           */
/*      'w'                                                           */
/*                                                                    */
/* It scans the identifier and checks whether or not it is a keyword. */
/*                                                                    */
/* NOTE that the use of that check is a time-optimization that is not */
/* required for correctness.                                          */
/**********************************************************************/
void Scanner::ClassifyIdOrKeyword()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    wchar_t *ptr = cursor + 1;

    while (Code::IsAlnum(*ptr))
        ptr++;
    int len = ptr - cursor;

    current_token -> SetKind(len < 13 ? (scan_keyword[len])(cursor) : TK_Identifier);
    if (current_token -> Kind() == TK_Identifier)
    {
        current_token -> SetSymbol((NameSymbol *) control.FindOrInsertName(cursor, len));
        for (int i = 0; i < control.option.keyword_map.Length(); i++)
        {
            if (control.option.keyword_map[i].length == len && wcsncmp(cursor, control.option.keyword_map[i].name, len) == 0)
                current_token -> SetKind(control.option.keyword_map[i].key);
        }
    }
    else
    {
        current_token -> SetSymbol(NULL);
        if (current_token -> Kind() == TK_class || current_token -> Kind() == TK_interface)
        {
            if (brace_stack.Size() == 0) // This type keyword is not nested.
                lex -> type_index.Next() = lex -> token_stream.Length() - 1;
        }
    }

    cursor = ptr;
    return;
}

/**********************************************************************/
/*                             CLASSIFY_ID:                           */
/**********************************************************************/
/* This procedure is invoked when CURSOR points to an alphabetic      */
/* character other than the ones identified above or '$' or '_'.      */
/* A token that starts with one of these letters is an identifier.    */
/**********************************************************************/
void Scanner::ClassifyId()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    wchar_t *ptr = cursor + 1;

    while (Code::IsAlnum(*ptr))
        ptr++;

    int len = ptr - cursor;

    current_token -> SetKind(TK_Identifier);
    current_token -> SetSymbol(control.FindOrInsertName(cursor, len));

    for (int i = 0; i < control.option.keyword_map.Length(); i++)
    {
        if (control.option.keyword_map[i].length == len && wcsncmp(cursor, control.option.keyword_map[i].name, len) == 0)
            current_token -> SetKind(control.option.keyword_map[i].key);
    }

    cursor = ptr;
    return;
}


/**********************************************************************/
/*                     CLASSIFY_NUMERICLITERAL:                       */
/**********************************************************************/
/* This procedure is invoked when CURSOR points directly to one of    */
/* the characters below or to a '.' followed by one of the characters */
/* below:                                                             */
/*                                                                    */
/*        case '0': case '1': case '2': case '3': case '4':           */
/*        case '5': case '6': case '7': case '8': case '9':           */
/*                                                                    */
/* Such a token is classified as a numeric literal:                   */
/*                                                                    */
/*   TK_LongLiteral, TK_IntegerLiteral,                               */
/*   TK_DOUBLELiteral, TK_FloatingPointLiteral                        */
/**********************************************************************/
void Scanner::ClassifyNumericLiteral()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    /******************************************************************/
    /* Scan the initial sequence of digits if any.                    */
    /******************************************************************/
    wchar_t *ptr;
    for (ptr = cursor; Code::IsDigit(*ptr); ptr++)
        ;

    /******************************************************************/
    /* We now take an initial crack at classifying the numeric token. */
    /* we have four cases to consider.                                */
    /*                                                                */
    /* 1) If the initial (perhaps an empty) sequence of digits is     */
    /*    followed by a period ('.'), we have a floating-constant.    */
    /*    We scan the sequence of digits (if any) that follows the    */
    /*    period.                                                     */
    /*                                                                */
    /* 2) Otherwise, we hava an integer literal.                      */
    /*                                                                */
    /*    If the initial (can't be empty) sequence of digits start    */
    /*    with "0x" or "0X" we have a hexadecimal constant:           */
    /*    continue scanning all hex-digits that follow the 'x'.       */
    /******************************************************************/
    if (*ptr == U_DOT)
    {
        current_token -> SetKind(TK_DoubleLiteral);
        for (ptr++; Code::IsDigit(*ptr); ptr++)
            ;
    }
    else
    {
        current_token -> SetKind(TK_IntegerLiteral);
        if (*cursor == U_0 && (cursor[1] == U_x || cursor[1] == U_X))
        {
            ptr = cursor + 2;
            if (isxdigit(*ptr))
            {
                for (ptr++; isxdigit(*ptr); ptr++)
                    ;
            }
            else lex -> bad_tokens.Next().Initialize(StreamError::INVALID_HEX_CONSTANT,
                                                     current_token -> Location(),
                                                     (unsigned) (ptr - lex -> InputBuffer()) - 1);
        }
    }

    /******************************************************************/
    /* If the initial numeric token is followed by an exponent, then  */
    /* it is a floating-constant. If that's the case, the literal is  */
    /* reclassified ant the exponent is scanned.                      */
    /*                                                                */
    /* NOTE that as 'E' and 'e' are legitimate hexadecimal digits, we */
    /* don't have to worry about a hexadecimal constant being used as */
    /* the prefix of a floating-constant. E.g., 0x123e12 is tokenized */
    /* as a single hexadecimal digit. The string 0x123e+12 gets       */
    /* broken down as the hex number 0x123e, the operator '+' and the */
    /* decimal constant 12.                                           */
    /******************************************************************/
    if (*ptr == U_e || *ptr == U_E)
    {
        current_token -> SetKind(TK_DoubleLiteral);

        ptr++; /* Skip the 'e' or 'E' */

        if (*ptr == U_PLUS || *ptr == U_MINUS)
            ptr++; /* Skip the '+' or '-' */

        if (Code::IsDigit(*ptr))
        {
            for (ptr++; Code::IsDigit(*ptr); ptr++)
                ;
        }
        else lex -> bad_tokens.Next().Initialize(StreamError::INVALID_FLOATING_CONSTANT_EXPONENT,
                                                 current_token -> Location(),
                                                 (unsigned) (ptr - lex -> InputBuffer()) - 1);
    }

    /******************************************************************/
    /* A numeric constant may be suffixed by a letter that further    */
    /* qualifies what kind of a constant it is. We check for these    */
    /* suffixes here.                                                 */
    /******************************************************************/ 
    int len;

    if (*ptr == U_f || *ptr == U_F)
    {
        ptr++;
        len = ptr - cursor;
        current_token -> SetSymbol(control.float_table.FindOrInsertLiteral(cursor, len));
        current_token -> SetKind(TK_FloatingPointLiteral);
    }
    else if (*ptr == U_d || *ptr == U_D)
    {
        ptr++;
        len = ptr - cursor;
        current_token -> SetSymbol(control.double_table.FindOrInsertLiteral(cursor, len));
        current_token -> SetKind(TK_DoubleLiteral);
    }
    else if (current_token -> Kind() == TK_IntegerLiteral)
    {
        if (*ptr == U_l || *ptr == U_L)
        {
            ptr++; /* Skip the 'l' or 'L' */
            len = ptr - cursor;
            current_token -> SetSymbol(control.long_table.FindOrInsertLiteral(cursor, len));
            current_token -> SetKind(TK_LongLiteral);
        }
        else
        {
            len = ptr - cursor;
            current_token -> SetSymbol(control.int_table.FindOrInsertLiteral(cursor, len));
        }

        CheckOctalLiteral(cursor, ptr);
    }
    else
    {
        len = ptr - cursor;
        current_token -> SetSymbol(control.double_table.FindOrInsertLiteral(cursor, len));
        current_token -> SetKind(TK_DoubleLiteral);
    }

    /******************************************************************/
    /* We now have scanned the complete token and it has been properly*/
    /* classified. CURSOR points to its first character in the buffer */
    /* and PTR points to the character immediately following it. We   */
    /* insert the name into the name table and if the token is an     */
    /* octal constant, we check that all the digits in its name are   */
    /* in the range 0-7.                                              */
    /******************************************************************/

    cursor = ptr;
    return;
}


/**********************************************************************/
/*                         CLASSIFY_COLON:                            */
/**********************************************************************/
void Scanner::ClassifyColon()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_COLON);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                          CLASSIFY_PLUS:                            */
/**********************************************************************/
void Scanner::ClassifyPlus()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_PLUS)
    {
        cursor++;
        current_token -> SetKind(TK_PLUS_PLUS);
    }
    else if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_PLUS_EQUAL);
    }
    else current_token -> SetKind(TK_PLUS);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                         CLASSIFY_MINUS:                            */
/**********************************************************************/
void Scanner::ClassifyMinus()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_MINUS)
    {
        cursor++;
        current_token -> SetKind(TK_MINUS_MINUS);
    }
    else if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_MINUS_EQUAL);
    }
    else current_token -> SetKind(TK_MINUS);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                          CLASSIFY_STAR:                            */
/**********************************************************************/
void Scanner::ClassifyStar()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_MULTIPLY_EQUAL);
    }
    else current_token -> SetKind(TK_MULTIPLY);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                         CLASSIFY_SLASH:                            */
/**********************************************************************/
void Scanner::ClassifySlash()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_DIVIDE_EQUAL);
    }
    else current_token -> SetKind(TK_DIVIDE);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                         CLASSIFY_LESS:                             */
/**********************************************************************/
void Scanner::ClassifyLess()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_LESS_EQUAL);
    }
    else if (*cursor == U_LESS)
    {
        cursor++;

        if (*cursor == U_EQUAL)
        {
            cursor++;
            current_token -> SetKind(TK_LEFT_SHIFT_EQUAL);
        }
        else current_token -> SetKind(TK_LEFT_SHIFT);
    }
    else current_token -> SetKind(TK_LESS);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                        CLASSIFY_GREATER:                           */
/**********************************************************************/
void Scanner::ClassifyGreater()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_GREATER_EQUAL);
    }
    else if (*cursor == U_GREATER)
    {
        cursor++;

        if (*cursor == U_EQUAL)
        {
            cursor++;
            current_token -> SetKind(TK_RIGHT_SHIFT_EQUAL);
        }
        else if (*cursor == U_GREATER)
        {
            cursor++;

            if (*cursor == U_EQUAL)
            {
                cursor++;
                current_token -> SetKind(TK_UNSIGNED_RIGHT_SHIFT_EQUAL);
            }
            else current_token -> SetKind(TK_UNSIGNED_RIGHT_SHIFT);
        }
        else current_token -> SetKind(TK_RIGHT_SHIFT);
    }
    else current_token -> SetKind(TK_GREATER);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                          CLASSIFY_AND:                             */
/**********************************************************************/
void Scanner::ClassifyAnd()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_AMPERSAND)
    {
        cursor++;
        current_token -> SetKind(TK_AND_AND);
    }
    else if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_AND_EQUAL);
    }
    else current_token -> SetKind(TK_AND);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                          CLASSIFY_OR:                              */
/**********************************************************************/
void Scanner::ClassifyOr()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_BAR)
    {
        cursor++;
        current_token -> SetKind(TK_OR_OR);
    }
    else if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_OR_EQUAL);
    }
    else current_token -> SetKind(TK_OR);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                          CLASSIFY_XOR:                             */
/**********************************************************************/
void Scanner::ClassifyXor()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_XOR_EQUAL);
    }
    else current_token -> SetKind(TK_XOR);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                          CLASSIFY_NOT:                             */
/**********************************************************************/
void Scanner::ClassifyNot()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_NOT_EQUAL);
    }
    else current_token -> SetKind(TK_NOT);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                         CLASSIFY_EQUAL:                            */
/**********************************************************************/
void Scanner::ClassifyEqual()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_EQUAL_EQUAL);
    }
    else current_token -> SetKind(TK_EQUAL);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                          CLASSIFY_MOD:                             */
/**********************************************************************/
void Scanner::ClassifyMod()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());

    cursor++;

    if (*cursor == U_EQUAL)
    {
        cursor++;
        current_token -> SetKind(TK_REMAINDER_EQUAL);
    }
    else current_token -> SetKind(TK_REMAINDER);
    current_token -> SetSymbol(NULL);

    return;
}


/**********************************************************************/
/*                         CLASSIFY_PERIOD:                           */
/**********************************************************************/
void Scanner::ClassifyPeriod()
{
    if (Code::IsDigit(cursor[1])) // Is period immediately followed by digit?
        ClassifyNumericLiteral();
    else
    {
        LexStream::Token *current_token = &(lex -> token_stream.Next());
        current_token -> SetLocation(cursor - lex -> InputBuffer());
        current_token -> SetKind(TK_DOT);
        current_token -> SetSymbol(NULL);

        cursor++;
    }

    return;
}


/**********************************************************************/
/*                         CLASSIFY_SEMICOLON:                        */
/**********************************************************************/
void Scanner::ClassifySemicolon()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_SEMICOLON);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                           CLASSIFY_COMMA:                          */
/**********************************************************************/
void Scanner::ClassifyComma()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_COMMA);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                           CLASSIFY_LBRACE:                         */
/**********************************************************************/
void Scanner::ClassifyLbrace()
{
    //
    // Instead of setting the symbol for a left brace, we keep track of it.
    // When we encounter its matching right brace, we use the symbol field
    // to identify its counterpart.
    //
    brace_stack.Push(lex -> token_stream.Length());

    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_LBRACE);

    cursor++;

    return;
}


/**********************************************************************/
/*                           CLASSIFY_RBRACE:                         */
/**********************************************************************/
void Scanner::ClassifyRbrace()
{
    //
    // When a left brace in encountered, it is pushed into the brace_stack.
    // When its matching right brace in encountered, we pop the left brace
    // and make it point to its matching right brace.
    //
    LexStream::TokenIndex left_brace = brace_stack.Top();
    if (left_brace) // This right brace is matched by a left one
    {
        lex -> token_stream[left_brace].SetRightBrace(lex -> token_stream.Length());
        brace_stack.Pop();
    }

    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_RBRACE);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                           CLASSIFY_LPAREN:                         */
/**********************************************************************/
void Scanner::ClassifyLparen()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_LPAREN);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                           CLASSIFY_RPAREN:                         */
/**********************************************************************/
void Scanner::ClassifyRparen()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_RPAREN);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                          CLASSIFY_LBRACKET:                        */
/**********************************************************************/
void Scanner::ClassifyLbracket()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_LBRACKET);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                          CLASSIFY_RBRACKET:                        */
/**********************************************************************/
void Scanner::ClassifyRbracket()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_RBRACKET);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                         CLASSIFY_COMPLEMENT:                       */
/**********************************************************************/
void Scanner::ClassifyComplement()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_TWIDDLE);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                        CLASSIFY_BAD_TOKEN:                         */
/**********************************************************************/
void Scanner::ClassifyBadToken()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(0);
    current_token -> SetSymbol(control.FindOrInsertName(cursor, 1));

    if (++cursor < &lex -> InputBuffer()[lex -> InputBufferLength()]) // not the terminating character?
         lex -> bad_tokens.Next().Initialize(StreamError::BAD_TOKEN,
                                             current_token -> Location(),
                                             current_token -> Location());
    else current_token -> SetKind(TK_EOF);

    return;
}


/**********************************************************************/
/*                        CLASSIFY_QUESTION:                          */
/**********************************************************************/
/**********************************************************************/
void Scanner::ClassifyQuestion()
{
    LexStream::Token *current_token = &(lex -> token_stream.Next());
    current_token -> SetLocation(cursor - lex -> InputBuffer());
    current_token -> SetKind(TK_QUESTION);
    current_token -> SetSymbol(NULL);

    cursor++;

    return;
}


/**********************************************************************/
/*                     CLASSIFY_NONASCIIUNICODE:                      */
/**********************************************************************/
void Scanner::ClassifyNonAsciiUnicode()
{
    if (Code::IsAlpha(*cursor)) // Some kind of non-ascii unicode letter
         ClassifyId();
    else ClassifyBadToken();

    return;
}