/*****************************************************************************/
/*                                                                           */
/*  THE NONPAREIL DOCUMENT FORMATTING SYSTEM                                 */
/*  COPYRIGHT (C) 2002, 2005 Jeffrey H. Kingston                             */
/*                                                                           */
/*  Jeffrey H. Kingston (jeff@it.usyd.edu.au)                                */
/*  School of Information Technologies                                       */
/*  The University of Sydney 2006                                            */
/*  AUSTRALIA                                                                */
/*                                                                           */
/*  This program is free software; you can redistribute it and/or modify     */
/*  it under the terms of the GNU General Public License as published by     */
/*  the Free Software Foundation; either Version 2, or (at your option)      */
/*  any later version.                                                       */
/*                                                                           */
/*  This program is distributed in the hope that it will be useful,          */
/*  but WITHOUT ANY WARRANTY; without even the implied warranty of           */
/*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            */
/*  GNU General Public License for more details.                             */
/*                                                                           */
/*  You should have received a copy of the GNU General Public License        */
/*  along with this program; if not, write to the Free Software              */
/*  Foundation, Inc., 59 Temple Place, Suite 330, Boston MA 02111-1307 USA   */
/*                                                                           */
/*  FILE:         lexer.c                                                    */
/*  DESCRIPTION:  Lexical analyser (implementation)                          */
/*                                                                           */
/*****************************************************************************/
#include "externs.h"
#include <string.h>
#define	DEBUG1	0
#define	DEBUG2	0
#define	DEBUG3	0


/*****************************************************************************/
/*                                                                           */
/*  FILE_POS - one position in a file.                                       */
/*                                                                           */
/*****************************************************************************/

struct file_pos_rec {
  USTRING	file_name;	/* name of file containing token             */
  short		col_num;	/* column number where token begins          */
  short		line_num;	/* line number where token begins            */
};


/*****************************************************************************/
/*                                                                           */
/*  TOKEN - one token.                                                       */
/*                                                                           */
/*****************************************************************************/

struct token_rec {
  TOKEN_TYPE	type;		/* token type (see externs.h)                */
  FILE_POS	file_pos;	/* file position                             */
  USTRING	value;		/* string value, miscellaneous types only    */
  struct token_rec *next;	/* the next token in the source file         */
};


/*****************************************************************************/
/*                                                                           */
/*  FILE_POS FilePosNew(USTRING file_name, short line_num, int col_num)      */
/*                                                                           */
/*  Make and return a new file position.                                     */
/*                                                                           */
/*****************************************************************************/

FILE_POS FilePosNew(USTRING file_name, short line_num, int col_num)
{
  FILE_POS res;
  GetMemory(res, FILE_POS);
  assert(file_name != NULL);
  res->file_name = file_name;
  res->col_num = col_num;
  res->line_num = line_num;
  return res;
}


/*****************************************************************************/
/*                                                                           */
/*  TOKEN LexTokenNew(TOKEN_TYPE type, FILE_POS file_pos, USTRING value)     */
/*                                                                           */
/*  Make and return a new TOKEN object with these attributes and a null      */
/*  next field.                                                              */
/*                                                                           */
/*****************************************************************************/

static TOKEN token_free_list = NULL;

TOKEN LexTokenNew(TOKEN_TYPE type, FILE_POS file_pos, USTRING value)
{
  TOKEN res;
  if( token_free_list != NULL )
    res = token_free_list, token_free_list = token_free_list->next;
  else
    GetMemory(res, TOKEN);
  res->type = type;
  res->file_pos = file_pos;
  res->value = value;
  res->next = NULL;
  return res;
}


/*****************************************************************************/
/*                                                                           */
/*  void LexTokenListFree(TOKEN token)                                       */
/*                                                                           */
/*  Free the token list beginning with token.                                */
/*                                                                           */
/*****************************************************************************/

void LexTokenListFree(TOKEN token)
{
  if( token_free_list != NULL )
  {
    TOKEN t;
    for( t = token;  t->next != NULL;  t = t->next );
    t->next = token_free_list;
  }
  token_free_list = token;
}


/*****************************************************************************/
/*                                                                           */
/*  Predefined strings                                                       */
/*                                                                           */
/*  These are all the predefined token values, one for each token type       */
/*  except identifiers and literals.                                         */
/*                                                                           */
/*****************************************************************************/

/* predefined punctuation symbols */
#define STR_COLON		AStringToUString(":")
#define STR_COLON_EQUALS	AStringToUString(":=")
#define STR_COMMA		AStringToUString(",")
#define STR_EXCLAM		AStringToUString("!")
#define STR_EXCLAM_EXCLAM	AStringToUString("!!")
#define STR_DOT			AStringToUString(".")
#define STR_DOT_DOT		AStringToUString("..")
#define STR_LEFT_BRACE		AStringToUString("{")
#define STR_RIGHT_BRACE		AStringToUString("}")
#define STR_LEFT_BRACKET	AStringToUString("[")
#define STR_RIGHT_BRACKET	AStringToUString("]")
#define STR_LEFT_BAR_BRACKET	AStringToUString("[|")
#define STR_RIGHT_BAR_BRACKET	AStringToUString("|]")
#define STR_LEFT_PAREN		AStringToUString("(")
#define STR_RIGHT_PAREN		AStringToUString(")")

/* predefined reserved words: keywords, self, false, and true */
#define STR_AS			AStringToUString("as")
#define STR_BUILTIN		AStringToUString("builtin")
#define STR_CASE		AStringToUString("case")
#define STR_CLASS		AStringToUString("class")
#define STR_COERCE		AStringToUString("coerce")
#define STR_CREATION		AStringToUString("creation")
#define STR_ELSE		AStringToUString("else")
#define STR_ELSIF		AStringToUString("elsif")
#define STR_END			AStringToUString("end")
#define STR_ENUM		AStringToUString("enum")
#define STR_EXTEND		AStringToUString("extend")
#define STR_EXTENSION		AStringToUString("extension")
#define STR_FALSE		AStringToUString("false")
#define STR_FILTER		AStringToUString("filter")
#define STR_FUN			AStringToUString("fun")
#define STR_GENESIS		AStringToUString("genesis")
#define STR_IF			AStringToUString("if")
#define STR_IN			AStringToUString("in")
#define STR_INFIX		AStringToUString("infix")
#define STR_INFIXR		AStringToUString("infixr")
#define STR_INHERIT		AStringToUString("inherit")
#define STR_INTRODUCE		AStringToUString("introduce")
#define STR_INVARIANT		AStringToUString("invariant")
#define STR_IS			AStringToUString("is")
#define STR_LET			AStringToUString("let")
#define STR_LOCAL		AStringToUString("local")
#define STR_MEET		AStringToUString("meet")
#define STR_MODULE		AStringToUString("module")
#define STR_NONCREATION		AStringToUString("noncreation")
#define STR_NORENAME		AStringToUString("norename")
#define STR_PREDEFINED		AStringToUString("predefined")
#define STR_PREFIX		AStringToUString("prefix")
#define STR_PRIVATE		AStringToUString("private")
#define STR_POSTFIX		AStringToUString("postfix")
#define STR_RENAME		AStringToUString("rename")
#define STR_REQUIRE		AStringToUString("require")
#define STR_SELF		AStringToUString("self")
#define STR_SYSTEM		AStringToUString("system")
#define STR_THEN		AStringToUString("then")
#define STR_TRUE		AStringToUString("true")
#define STR_USE			AStringToUString("use")
#define STR_WHEN		AStringToUString("when")
#define STR_YIELD		AStringToUString("yield")


/*****************************************************************************/
/*                                                                           */
/*  Shared copies of predefined strings.                                     */
/*                                                                           */
/*  To save memory, we don't make a fresh copy of each of the above strings  */
/*  when needed.  Instead, we share in two ways.  First, for each            */
/*  predefined punctuation sequence we have a static variable initialized    */
/*  by LexInit.  A few other standard strings also get this treatment.       */
/*                                                                           */
/*  For reserved words we have the predefs symbol table.  Its first value    */
/*  is a shared copy of the key string, its second is the token type for     */
/*  the keyword with this value.                                             */
/*                                                                           */
/*****************************************************************************/

/* shared copies of reserved punctuation sequences */
static USTRING pstr_colon;
static USTRING pstr_colon_equals;
static USTRING pstr_comma;
static USTRING pstr_exclam;
static USTRING pstr_exclam_exclam;
static USTRING pstr_dot;
static USTRING pstr_dot_dot;
static USTRING pstr_left_brace;
static USTRING pstr_right_brace;
static USTRING pstr_left_bracket;
static USTRING pstr_right_bracket;
static USTRING pstr_left_bar_bracket;
static USTRING pstr_right_bar_bracket;
static USTRING pstr_left_paren;
static USTRING pstr_right_paren;

/* typedef for the predefs symbol table */
typedef ARRAY(TOKEN_TYPE)		ARRAY_TOKEN_TYPE;
typedef SYMTAB(ARRAY_TOKEN_TYPE)	SYMTAB_TOKEN_TYPE;


/*****************************************************************************/
/*                                                                           */
/*  Static variables and LexInit(), which initializes them.                  */
/*                                                                           */
/*****************************************************************************/

static	BOOLEAN		init = FALSE;	/* TRUE when module is initialized   */
static	USTRING		file_name;	/* name of current file              */
static	unsigned	line_num;	/* number of current line	     */
static	unsigned	col_num;	/* number of current column	     */
static 	BOOLEAN		error_seen;	/* TRUE if an error has been seen    */
static  SYMTAB_TOKEN_TYPE predefs;	/* predefined symbols                */

static void InsertPredefined(USTRING str, TOKEN_TYPE ttype)
{
  TOKEN_TYPE tt;
  SymInsert(predefs, str, ttype, &tt);
}

static void LexInit()
{
  if( DEBUG1 )
    fprintf(stderr, "[ LexInit()\n");

  /* initialize predefs table */
  SymInit(&predefs);
  InsertPredefined(STR_AS,          TK_AS);
  InsertPredefined(STR_BUILTIN,     TK_BUILTIN);
  InsertPredefined(STR_CASE,        TK_CASE);
  InsertPredefined(STR_CLASS,       TK_CLASS);
  InsertPredefined(STR_CASE,        TK_CREATION);
  InsertPredefined(STR_COERCE,      TK_COERCE);
  InsertPredefined(STR_ELSE,        TK_ELSE);
  InsertPredefined(STR_ELSIF,       TK_ELSIF);
  InsertPredefined(STR_END,         TK_END);
  InsertPredefined(STR_ENUM,        TK_ENUM);
  InsertPredefined(STR_EXTEND,      TK_EXTEND);
  InsertPredefined(STR_EXTENSION,   TK_EXTENSION);
  InsertPredefined(STR_FALSE,       TK_FALSE);
  InsertPredefined(STR_FILTER,      TK_FILTER);
  InsertPredefined(STR_FUN,         TK_FUN);
  InsertPredefined(STR_GENESIS,     TK_GENESIS);
  InsertPredefined(STR_IF,          TK_IF);
  InsertPredefined(STR_IN,          TK_IN);
  InsertPredefined(STR_INFIX,       TK_INFIX);
  InsertPredefined(STR_INFIXR,      TK_INFIXR);
  InsertPredefined(STR_INHERIT,     TK_INHERIT);
  InsertPredefined(STR_INTRODUCE,   TK_INTRODUCE);
  InsertPredefined(STR_INVARIANT,   TK_INVARIANT);
  InsertPredefined(STR_IS,          TK_IS);
  InsertPredefined(STR_LET,         TK_LET);
  InsertPredefined(STR_LOCAL,       TK_LOCAL);
  InsertPredefined(STR_MEET,        TK_MEET);
  InsertPredefined(STR_MODULE,      TK_MODULE);
  InsertPredefined(STR_NONCREATION, TK_NONCREATION);
  InsertPredefined(STR_NORENAME,    TK_NORENAME);
  InsertPredefined(STR_PREDEFINED,  TK_PREDEFINED);
  InsertPredefined(STR_PREFIX,      TK_PREFIX);
  InsertPredefined(STR_PRIVATE,     TK_PRIVATE);
  InsertPredefined(STR_POSTFIX,     TK_POSTFIX);
  InsertPredefined(STR_RENAME,      TK_RENAME);
  InsertPredefined(STR_REQUIRE,     TK_REQUIRE);
  InsertPredefined(STR_SELF,        TK_SELF);
  InsertPredefined(STR_SYSTEM,      TK_SYSTEM);
  InsertPredefined(STR_THEN,        TK_THEN);
  InsertPredefined(STR_TRUE,        TK_TRUE);
  InsertPredefined(STR_USE,         TK_USE);
  InsertPredefined(STR_WHEN,        TK_WHEN);
  InsertPredefined(STR_YIELD,       TK_YIELD);
  /* SymDebugPrint(predefs); */

  /* initialize one heap copy of various often-used strings */
  pstr_colon		= STR_COLON;
  pstr_colon_equals	= STR_COLON_EQUALS;
  pstr_comma		= STR_COMMA;
  pstr_exclam		= STR_EXCLAM;
  pstr_exclam_exclam	= STR_EXCLAM_EXCLAM;
  pstr_dot		= STR_DOT;
  pstr_dot_dot		= STR_DOT_DOT;
  pstr_left_brace	= STR_LEFT_BRACE;
  pstr_right_brace	= STR_RIGHT_BRACE;
  pstr_left_bracket	= STR_LEFT_BRACKET;
  pstr_right_bracket	= STR_RIGHT_BRACKET;
  pstr_left_bar_bracket	= STR_LEFT_BAR_BRACKET;
  pstr_right_bar_bracket= STR_RIGHT_BAR_BRACKET;
  pstr_left_paren	= STR_LEFT_PAREN;
  pstr_right_paren	= STR_RIGHT_PAREN;

  /* record that initialization has been done */
  init = TRUE;

  if( DEBUG1 )
    fprintf(stderr, "] LexInit()\n");
}


/*****************************************************************************/
/*                                                                           */
/*  UCHAR get_unicode_char(FILE *fp)                                         */
/*                                                                           */
/*  Read the next Unicode character from UTF-8 file fp and return the        */
/*  32-bit Unicode scalar value.  Fail with an error message if the          */
/*  characters in the file violate the UTF-8 rules.                          */
/*                                                                           */
/*  Return UEOF (i.e. 0xFFFFFFFF, not a Unicode character) at end of file.   */
/*                                                                           */
/*****************************************************************************/

static UCHAR get_unicode_char(FILE *fp)
{
  UCHAR res;

  /* handle ordinary case, including skipping out-of-range characters */
  col_num++;
  while( UCharGet(fp, &res) )
  {
    if( DEBUG3 )
    {
      if( res == UEOF )
	fprintf(stderr, "get_unicode_char returning UEOF\n");
      else
	fprintf(stderr, "get_unicode_char returning 0x%x (%s)\n",
	  res, UCharLexClassShow(UCharLexClass(res)));
    }
    return res;
  }

  /* handle error case, where UTF-8 format rules are broken */
  switch( uchar_error_type )
  {
    case UE_UNEXPECTED_EOF:

      fprintf(stderr, "%s:%d:%d: end of file within multibyte character\n",
	(ASTRING) UStringToUTF8(file_name), line_num, col_num);
      break;


    case UE_ILLEGAL_CODE:

      fprintf(stderr, "%s:%d:%d: illegal character code 0x%x\n",
	(ASTRING) UStringToUTF8(file_name), line_num,col_num,uchar_error_byte);
      break;


    case UE_ILLEGAL_BYTE1:

      fprintf(stderr, "%s:%d:%d: illegal first-byte code 0x%x\n",
	(ASTRING) UStringToUTF8(file_name), line_num,col_num,uchar_error_byte);
      break;


    case UE_ILLEGAL_BYTE2:

      fprintf(stderr,
	"%s:%d:%d: illegal second byte code 0x%x in multibyte character\n",
	(ASTRING) UStringToUTF8(file_name), line_num,col_num,uchar_error_byte);
      break;


    case UE_ILLEGAL_BYTE3:

      fprintf(stderr,
	"%s:%d:%d: illegal third byte code 0x%x in multibyte character\n",
	(ASTRING) UStringToUTF8(file_name), line_num,col_num,uchar_error_byte);
      break;


    case UE_ILLEGAL_BYTE4:

      fprintf(stderr,
	"%s:%d:%d: illegal fourth byte code 0x%x in multibyte character\n",
	(ASTRING) UStringToUTF8(file_name), line_num,col_num,uchar_error_byte);
      break;

  }
  exit(1);
  return 0; /* keep compiler happy */
}


/*****************************************************************************/
/*                                                                           */
/*  static UCHAR LexNextLine(UCHAR ch, FILE *fp)                             */
/*                                                                           */
/*  Assuming that the current unconsumed character is ch, move through the   */
/*  file and return a new value of ch so that ch contains the first          */
/*  character of the next line, or UEOF if there is no next line.            */
/*                                                                           */
/*  Update line_num and col_num if we successfully started a new line.       */
/*                                                                           */
/*  This code assumes that any line can end with \r\n, \r, \n, \v, the       */
/*  Unicode line separator character 0x2028, or the Unicode paragraph        */
/*  separator 0x2029.  All these characters are in the UCHAR_LEX_ENDLINE     */
/*  lexical class, and only these.                                           */
/*                                                                           */
/*****************************************************************************/
#define within_line(x) ((x) != UEOF && UCharLexClass(x) != UCHAR_LEX_ENDLINE)

static UCHAR LexNextLine(UCHAR ch, FILE *fp)
{
  UCHAR ch2;

  /* move to first end of line character, return UEOF if end of file */
  while( within_line(ch) )
    ch = get_unicode_char(fp);
  if( ch == UEOF ) return UEOF;

  /* move to next character, ch2 */
  ch2 = get_unicode_char(fp);

  /* if ch2 counts as part of the end of line, move one character further */
  if( ch == '\r' && ch2 == '\n' )
    ch2 = get_unicode_char(fp);

  line_num++;
  col_num = 1;
  return ch2;
}


/*****************************************************************************/
/*                                                                           */
/*  void check_op_for_table(TOKEN lastop, SYMTAB_NAMED op_table)             */
/*                                                                           */
/*  The precondition of check_op_for_table is that lastop is a "prefix",     */
/*  "infix", or "postfix" token not previously seen by this function, and    */
/*  that it is followed by other tokens known to end in a name token,        */
/*  either an identifier or a punctuation sequence.  It follows from this    */
/*  that we can safely call NameDefParse on &lastop without any risk of      */
/*  falling off the end of the token stream (although lastop will be set     */
/*  to NULL by the call to NameDefParse).                                    */
/*                                                                           */
/*  If lastop does indeed initiate the definition of an operator, then an    */
/*  entry for it is inserted into op_table, or if an entry with that name    */
/*  is already there it is checked for compatibility.                        */
/*                                                                           */
/*  Starting from lastop, the syntax we are looking for is any of            */
/*                                                                           */
/*     prefix <name>                                                         */
/*     postfix <name>                                                        */
/*     infix [ <precedence> ] <name>                                         */
/*     infixr [ <precedence> ] <name>                                        */
/*                                                                           */
/*  where the token stream must end after this syntax.                       */
/*                                                                           */
/*  Set error_seen if any errors were printed.                               */
/*                                                                           */
/*****************************************************************************/

static void check_op_for_table(TOKEN lastop, SYMTAB_NAMED op_table)
{
  NAME nd;
  TOKEN t = lastop;
  if( NameDefParse(&t, op_table, &nd) )
  {
    /* add the name to the op_table and fail if it does */
    assert(t == NULL);  /* checks that we are at the end */
    if( !NameOperatorConsistent(nd, lastop->file_pos, op_table, "module") )
      error_seen = TRUE;
  }
  else
    error_seen = TRUE;
}


/*****************************************************************************/
/*                                                                           */
/*  add_buff and add_token                                                   */
/*                                                                           */
/*  Helper macros for accumulating strings and tokens.                       */
/*                                                                           */
/*****************************************************************************/
#define MAX_TOKEN_LENGTH 256	/* max token length, not comments or spaces  */

static UCHAR buff[MAX_TOKEN_LENGTH];
static int bi;

#define clear_buff() bi = 0
#define empty_buff() (bi == 0)

#define add_buff(ch)							\
{									\
  if( bi >= MAX_TOKEN_LENGTH )						\
  {									\
    fprintf(stderr, "%s:%d:%d: maximum token length (%d) exceeded\n",	\
      (ASTRING) UStringToUTF8(file_name), line_num, col_num,		\
      MAX_TOKEN_LENGTH);						\
    exit(1);								\
  }									\
  buff[bi++] = ch;							\
}

#define add_token(typ, file, line, col, val)				\
{									\
  TOKEN t;  FILE_POS pos;						\
  pos = FilePosNew(file, line, col);					\
  t = LexTokenNew(typ, pos, val);					\
  if( first_token == NULL )						\
    first_token = t;							\
  else									\
    last_token->next = t;						\
  last_token = t;							\
}


/*****************************************************************************/
/*                                                                           */
/*  UCHAR GetHex(UCHAR ch, FILE *fp, unsigned int *res)                      */
/*                                                                           */
/*  Read one hexadecimal value from fp, starting with ch, and return         */
/*  its value in *hex_value, and the first unread character as res.          */
/*  This is not used for ordinary hexadecimal numbers, only for escape       */
/*  sequences within string and character literals.                          */
/*                                                                           */
/*****************************************************************************/
#define NOT_HEX		16		/* a non-hexadecimal value           */

#define hex_value(ch)							\
(									\
  ( (ch) >= '0' && ch <= '9' ) ? ch - '0' :				\
  ( (ch) >= 'A' && ch <= 'F' ) ? ch - ('A' - 10) : NOT_HEX		\
)

static UCHAR GetHex(UCHAR ch, FILE *fp, unsigned int *res)
{
  unsigned int hexval, count, save_col_num;
  *res = 0;
  save_col_num = col_num;
  count = 0;
  while( (hexval = hex_value(ch)) != NOT_HEX )
  {
    if( ++count > 6 )
    {
      fprintf(stderr, "%s:%d:%d: too many hexadecimal digits in character\n",
	(ASTRING) UStringToUTF8(file_name), line_num, save_col_num);
      error_seen = TRUE;
      *res = 0;
      return ch;
    }
    *res = (*res << 4) | hexval;
    ch = get_unicode_char(fp);
  }
  if( count == 0 )
  {
    fprintf(stderr, "%s:%d:%d: expected hexadecimal digit here\n",
      (ASTRING) UStringToUTF8(file_name), line_num, col_num);
    error_seen = TRUE;
    *res = 0;
    return ch;
  }
  if( *res == 0 )
  {
    fprintf(stderr, "%s:%d:%d: invalid hexadecimal value 0\n",
      (ASTRING) UStringToUTF8(file_name), line_num, col_num);
    error_seen = TRUE;
    *res = 0;
    return ch;
  }
  return ch;
}


/*****************************************************************************/
/*                                                                           */
/*  UCHAR LexEscapeSequence(UCHAR ch, FILE *fp)                              */
/*                                                                           */
/*  Parameter ch is the backslash just found inside a quoted string or       */
/*  character literal.  Move fp on and add_buff all the characters of this   */
/*  literal.  Return the first unconsumed character.                         */
/*                                                                           */
/*  The complete set of legal escape codes is:                               */
/*                                                                           */
/*      \\            \                                                      */
/*      \'            '                                                      */
/*      \"            "                                                      */
/*      \[hhhh { hhhh }]                                                     */
/*                                                                           */
/*  According to the Unicode book, page 45, the range of legal high          */
/*  surrogates and low surrogates, and the formula for combining high and    */
/*  low surrogates into a scalar value, are as in the macros just below.     */
/*                                                                           */
/*****************************************************************************/
#define high_surrogate(ch)	( (ch) >= 0xD800 && (ch) <= 0xDBFF )
#define low_surrogate(ch)	( (ch) >= 0xDC00 && (ch) <= 0xDFFF )
#define scalar_code(h, l) ((((h) - 0xD800) << 10) + ((l) - 0xDC00) + 0x10000)

static UCHAR LexEscapeSequence(UCHAR ch, FILE *fp)
{
  int save_col_num;  unsigned hexval, high_surr, low_surr;
  assert(ch == '\\');

  /* move past the backslash and make sure we are still on the line */
  ch = get_unicode_char(fp);
  if( !within_line(ch) )
  {
    fprintf(stderr, "%s:%d:%d: unterminated string or character literal\n",
      (ASTRING) UStringToUTF8(file_name), line_num, col_num);
    error_seen = TRUE;
    return ch;
  }

  /* handle the character or character sequene after the backslash */
  switch( ch )
  {
    case '\\':
    case '\'':
    case '\"':
      
      add_buff(ch);
      ch = get_unicode_char(fp);
      break;


    case '[':

      do
      {
	/* skip initial '[' or separating ' ', known to be there */
	ch = get_unicode_char(fp);

	/* get a hexadecimal value */
	ch = GetHex(ch, fp, &hexval);
	if( hexval != 0 )
	{
	  if( high_surrogate(hexval) )
	  {
	    /* if found a high surrogate, get the following low surrogate */
	    high_surr = hexval;
	    if( UCharLexClass(ch) != UCHAR_LEX_SPACE )
	    {
	      fprintf(stderr,
		"%s:%d:%d: space before low surrogate code expected here\n",
		(ASTRING) UStringToUTF8(file_name), line_num, col_num); 
	      error_seen = TRUE;
	      return ch;
	    }
	    ch = get_unicode_char(fp);
	    if( hex_value(ch) == NOT_HEX )
	    {
	      fprintf(stderr, "%s:%d:%d: low surrogate code expected here\n",
		(ASTRING) UStringToUTF8(file_name), line_num, col_num); 
	      error_seen = TRUE;
	      return ch;
	    }
	    save_col_num = col_num;
	    ch = GetHex(ch, fp, &low_surr);
	    if( !low_surrogate(low_surr) )
	    {
	      fprintf(stderr, "%s:%d:%d: low surrogate code expected here\n",
		(ASTRING) UStringToUTF8(file_name), line_num, save_col_num); 
	      error_seen = TRUE;
	      return ch;
	    }
	    add_buff(scalar_code(high_surr, low_surr));
	  }
	  else
	    add_buff(hexval);
	}

      } while( within_line(ch) && ch == ' ' );
      if( ch != ']' )
      {
	fprintf(stderr, "%s:%d:%d: unexpected end of literal\n",
	  (ASTRING) UStringToUTF8(file_name), line_num, col_num); 
	error_seen = TRUE;
	return ch;
      }
      ch = get_unicode_char(fp);
      break;


    default:

      fprintf(stderr, "%s:%d:%d: unexpected character after \\\n",
	(ASTRING) UStringToUTF8(file_name), line_num, col_num);
      error_seen = TRUE;
      break;
  }
  return ch;
}


/*****************************************************************************/
/*                                                                           */
/*  UCHAR LexDigitSequence(UCHAR ch, FILE *fp)                               */
/*                                                                           */
/*  Add a non-empty sequence of digits to the current token buffer,          */
/*  starting with ch, which is known to be a digit.  Return the first        */
/*  non-digit.                                                               */
/*                                                                           */
/*****************************************************************************/
#define is_digit(ch) (UCharLexClass(ch) == UCHAR_LEX_DIGIT)

static UCHAR LexDigitSequence(UCHAR ch, FILE *fp)
{
  do
  {
    add_buff(ch);
    ch = get_unicode_char(fp);
  } while( ch != UEOF && is_digit(ch) );
  return ch;
}


/*****************************************************************************/
/*                                                                           */
/*  static void DoLex(FILE *fp, BOOLEAN end_at_newline, TOKEN *tokens,       */
/*    SYMTAB_NAMED op_table)                                                 */
/*                                                                           */
/*  Do the actual lexing of file *fp, ending at the end of line or at end    */
/*  of file depending on end_at_newline.                                     */
/*                                                                           */
/*  Static variables file_name, line_num, col_num, and error_seen will be    */
/*  initialized by the time DoLex is called, as will the module generally.   */
/*                                                                           */
/*  If op_table is not NULL, then it is a symbol table which has been        */
/*  initialized and may already contain some values, which are being built   */
/*  up into the set of all free symbols (i.e. not requiring a . in front of  */
/*  them to be recognizable) visible within the current module.  LexFile     */
/*  does its bit towards building this table by adding any prefix, infix,    */
/*  or postfix operators declared in fp, and reporting any name clashes.     */
/*  Prefix operators whose names are punctuation symbols will have an _      */
/*  prefixed to their names in op_table.                                     */
/*                                                                           */
/*****************************************************************************/
#define is_other_punct(ch) (UCharLexClass(ch) == UCHAR_LEX_OTHER_PUNCT)

#define is_identchar(ch)						\
( (lc = UCharLexClass(ch)) == UCHAR_LEX_ID_BEGIN ||			\
  lc == UCHAR_LEX_ID_EXTEND || lc == UCHAR_LEX_DIGIT			\
)


static void DoLex(FILE *fp, BOOLEAN end_at_newline, TOKEN *tokens,
  SYMTAB_NAMED op_table)
{
  TOKEN first_token = NULL, last_token = NULL, lastop = NULL;
  UCHAR ch;  int save_col_num;  USTRING str;  TOKEN_TYPE ttype;
  UCHAR_LEX_CLASS lc;

  if( DEBUG1 )
    fprintf(stderr, "[ DoLex(fp, op_table %s NULL, at %s:%d:%d)\n", 
      op_table == NULL ? "==" : "!=", (ASTRING) UStringToUTF8(file_name),
      line_num, col_num);

  /* read through the file; each iteration consumes one space or token;   */
  /* at the start of each iteration, ch is the first unconsumed character */
  ch = get_unicode_char(fp);
  while( ch != UEOF )
  {
    lc = UCharLexClass(ch);
    if( DEBUG2 )
      fprintf(stderr, "  char %c (%04X), lex class %s\n",
	ch >= ' ' && ch <= '~' ? ch : '*', ch, UCharLexClassShow(lc));
    switch( lc )
    {
      case UCHAR_LEX_HASH:
      case UCHAR_LEX_ENDLINE:

	/* comment start or end of line */
	if( end_at_newline )
	  ch = UEOF;
	else
	  ch = LexNextLine(ch, fp);
	break;


      case UCHAR_LEX_QUOTE_DOUBLE:

	/* read string, handling backslash escapes */
	save_col_num = col_num;
        ch = get_unicode_char(fp);
	clear_buff();
	while( ch != '"' && within_line(ch) )
	{
	  if( ch == '\\' )
	    ch = LexEscapeSequence(ch, fp);
	  else
	  {
	    add_buff(ch);
	    ch = get_unicode_char(fp);
	  }
	}
	add_buff(UEOF);

	/* position ch correctly for next token */
	if( ch == '"' )
	  ch = get_unicode_char(fp);
	else
	{
	  fprintf(stderr, "%s:%d:%d: unterminated string\n",
            (ASTRING) UStringToUTF8(file_name), line_num, save_col_num);
	  error_seen = TRUE;
	}

	/* construct string token and add it to token sequence */
	add_token(TK_LIT_STRING, file_name, line_num, save_col_num, 
	  UStringCopy(buff));
	break;


      case UCHAR_LEX_QUOTE_SINGLE:

	/* note: characters are lexed as strings then length checked after */
	/* read character, handling backslash escapes */
	save_col_num = col_num;
        ch = get_unicode_char(fp);
	clear_buff();
	while( ch != '\'' && within_line(ch) )
	{
	  if( ch == '\\' )
	    ch = LexEscapeSequence(ch, fp);
	  else
	  {
	    add_buff(ch);
	    ch = get_unicode_char(fp);
	  }
	}

	/* handle empty character case and close buffer */
	if( empty_buff() )
	{
	  fprintf(stderr, "%s:%d:%d: empty literal character\n",
            (ASTRING) UStringToUTF8(file_name), line_num, save_col_num);
	  add_buff('?');
	  error_seen = TRUE;
	}
	add_buff(UEOF);

	/* position ch correctly for next token */
	if( ch == '\'' )
	  ch = get_unicode_char(fp);
	else
	{
	  fprintf(stderr, "%s:%d:%d: unterminated literal character\n",
            (ASTRING) UStringToUTF8(file_name), line_num, save_col_num);
	  error_seen = TRUE;
	}

	/* truncate if length > 1 */
	if( UStringLength(buff) > 1 )
	{
	  fprintf(stderr, "%s:%d:%d: excess characters in literal character\n",
            (ASTRING) UStringToUTF8(file_name), line_num, save_col_num);
	  buff[1] = 0;
	  error_seen = TRUE;
	}

	/* construct character token and add it to token sequence */
	add_token(TK_LIT_CHARACTER, file_name, line_num, save_col_num, 
	  UStringCopy(buff));
	break;


      case UCHAR_LEX_LEFT_PAREN:

	add_token(TK_LEFT_PAREN, file_name, line_num, col_num,
          pstr_left_paren);
	ch = get_unicode_char(fp);
	break;


      case UCHAR_LEX_RIGHT_PAREN:

	add_token(TK_RIGHT_PAREN, file_name, line_num, col_num,
          pstr_right_paren);
	ch = get_unicode_char(fp);
	break;


      case UCHAR_LEX_COMMA:

	add_token(TK_COMMA, file_name, line_num, col_num, pstr_comma);
	ch = get_unicode_char(fp);
	break;


      case UCHAR_LEX_EXCLAM:

	ch = get_unicode_char(fp); 
	if( ch == '!' )
	{
	  add_token(TK_EXCLAM_EXCLAM, file_name, line_num, col_num - 1,
            pstr_exclam_exclam);
	  ch = get_unicode_char(fp); 
	}
	else
	  add_token(TK_EXCLAM, file_name, line_num, col_num - 1, pstr_exclam);
	break;


      case UCHAR_LEX_DOT:

	ch = get_unicode_char(fp); 
	if( ch == '.' )
	{
	  add_token(TK_DOT_DOT, file_name, line_num, col_num - 1,
            pstr_dot_dot);
	  ch = get_unicode_char(fp); 
	}
	else
	  add_token(TK_DOT, file_name, line_num, col_num - 1, pstr_dot);
	break;


      case UCHAR_LEX_COLON:

	ch = get_unicode_char(fp); 
	if( ch == '=' )
	{
	  add_token(TK_COLON_EQUALS, file_name, line_num, col_num - 1,
            pstr_colon_equals);
	  ch = get_unicode_char(fp); 
	}
	else
	  add_token(TK_COLON, file_name, line_num, col_num - 1, pstr_colon);
	break;


      case UCHAR_LEX_LEFT_BRACKET:

	ch = get_unicode_char(fp);
	if( ch == '|' )
	{
	  add_token(TK_LEFT_BAR_BRACKET, file_name, line_num, col_num - 1,
	    pstr_left_bar_bracket);
	  ch = get_unicode_char(fp); 
	}
	else
	  add_token(TK_LEFT_BRACKET, file_name, line_num, col_num,
	    pstr_left_bracket);
	break;


      case UCHAR_LEX_BACKSLASH:

	fprintf(stderr, "%s:%d:%d: unexpected \\ character\n",
          (ASTRING) UStringToUTF8(file_name), line_num, col_num);
	ch = get_unicode_char(fp);
	break;


      case UCHAR_LEX_RIGHT_BRACKET:

	add_token(TK_RIGHT_BRACKET, file_name, line_num, col_num,
          pstr_right_bracket);
	ch = get_unicode_char(fp);
	break;


      case UCHAR_LEX_LEFT_BRACE:

	add_token(TK_LEFT_BRACE, file_name, line_num, col_num,
          pstr_left_brace);
	ch = get_unicode_char(fp);
	break;


      case UCHAR_LEX_RIGHT_BRACE:

	add_token(TK_RIGHT_BRACE, file_name, line_num, col_num,
          pstr_right_brace);
	ch = get_unicode_char(fp);
	break;


      case UCHAR_LEX_ID_BEGIN:

	save_col_num = col_num;
	clear_buff();
	add_buff(ch);
	ch = get_unicode_char(fp); 
	while( ch != UEOF && is_identchar(ch) )
	{
	  add_buff(ch);
	  ch = get_unicode_char(fp); 
	}
	add_buff(UEOF);
	if( SymRetrieveWithString(predefs, buff, &str, &ttype) )
	{
	  /* identifier is a predefined word, save it as such */
	  add_token(ttype, file_name, line_num, save_col_num, str);

	  /* if operator keyword, remember it for operator table checking  */
	  if( ttype == TK_INFIX || ttype == TK_INFIXR ||
	      ttype == TK_PREFIX || ttype == TK_POSTFIX )
	    lastop = last_token;
	}
	else
	{
	  add_token(TK_IDENTIFIER, file_name, line_num, save_col_num,
	    UStringCopy(buff));

	  /* check whether this is an operator, and remember it if so */
	  if( op_table != NULL && lastop != NULL )
	    check_op_for_table(lastop, op_table);
	  lastop = NULL;
	}
	break;


      case UCHAR_LEX_DIGIT:

	/* initially integer, may change to real later */
	ttype = TK_LIT_INTEGER;
	save_col_num = col_num;
	clear_buff();

        /* read initial nonempty sequence of digits, or hex value */
	add_buff(ch);
	ch = get_unicode_char(fp);
	if( buff[0] == '0' && ch == 'x' )
	{
	  /* hexadecimal integer value */
	  add_buff(ch);
	  ch = get_unicode_char(fp);
	  if( hex_value(ch) == NOT_HEX )
	  {
	    /* missing hexadecimal digit after 0x */
	    fprintf(stderr, "%s:%d:%d: hexadecimal digit expected after 0x\n",
              (ASTRING) UStringToUTF8(file_name), line_num, col_num);
	    error_seen = TRUE;
	  }
	  else
	  {
	    /* keep reading while hexadecimal digits coming */
	    do
	    {
	      add_buff(ch);
	      ch = get_unicode_char(fp);
	    } while( ch != UEOF && hex_value(ch) != NOT_HEX );
	  }
	}
	else
	{
	  /* not hexadecimal; go on with initial sequence of digits */
	  while( ch != UEOF && is_digit(ch) )
	  {
	    add_buff(ch);
	    ch = get_unicode_char(fp);
	  }

	  /* read optional fractional part; digit must follow decimal point */
	  if( ch == '.' )
	  {
	    ttype = TK_LIT_REAL;
	    add_buff(ch);
	    ch = get_unicode_char(fp);
	    if( !is_digit(ch) )
	    {
	      /* missing digit after decimal point */
	      fprintf(stderr, "%s:%d:%d: digit required after decimal point\n",
		(ASTRING) UStringToUTF8(file_name), line_num, col_num);
	      add_buff('0');
	      error_seen = TRUE;
	    }
	    else
	      ch = LexDigitSequence(ch, fp);
	  }

	  /* read optional exponent part */
	  if( ch == 'e' || ch == 'E' )
	  {
	    /* save exponent character */
	    ttype = TK_LIT_REAL;
	    add_buff(ch);
	    ch = get_unicode_char(fp);

	    /* read optional sign */
	    if( ch == '-' )
	    {
	      add_buff(ch);
	      ch = get_unicode_char(fp);
	    }

	    /* read digits of exponent */
	    if( !is_digit(ch) )
	    {
	      /* missing first digit of exponent */
	      fprintf(stderr, "%s:%d:%d: digit required in exponent\n",
		(ASTRING) UStringToUTF8(file_name), line_num, col_num);
	      add_buff('0');
	      error_seen = TRUE;
	    }
	    else
	      ch = LexDigitSequence(ch, fp);
	  }
	}

	/* make token */
	add_buff(UEOF);
	add_token(ttype, file_name, line_num, save_col_num, UStringCopy(buff));
	break;


      case UCHAR_LEX_OTHER_PUNCT:

	/* maximal sequences of these are operator names, except for |] */
	/* get first character into buff */
	save_col_num = col_num;
	clear_buff();
	add_buff(ch);
	ch = get_unicode_char(fp);

	if( buff[0] == '|' && ch == ']' )
	{
	  /* token is |], so return TK_TIGHT_BAR_BRACKET */
	  add_token(TK_RIGHT_BAR_BRACKET, file_name, line_num, save_col_num,
	    pstr_right_bar_bracket);
	  ch = get_unicode_char(fp); 
	}
	else
	{
	  /* ordinary; get remaining characters into buff, build the token */
	  while( ch != UEOF && is_other_punct(ch) )
	  {
	    add_buff(ch);
	    ch = get_unicode_char(fp); 
	  }
	  add_buff(UEOF);
	  add_token(TK_PUNCTSEQ, file_name, line_num, save_col_num,
	    UStringCopy(buff));

	  /* check whether this is an operator, and remember it if so */
	  if( op_table != NULL && lastop != NULL )
	    check_op_for_table(lastop, op_table);
	  lastop = NULL;
	}
	break;


      case UCHAR_LEX_SPACE:
      case UCHAR_LEX_TAB:

        ch = get_unicode_char(fp);
	break;


      case UCHAR_LEX_ID_EXTEND:
      case UCHAR_LEX_OTHER:

	/* no token should begin with a character in these classes */
        fprintf(stderr, "%s:%d:%d: unexpected character 0x%x\n",
	  (ASTRING) UStringToUTF8(file_name), line_num, col_num, ch);
        ch = get_unicode_char(fp);
        error_seen = TRUE;
	break;


      default:

	/* class value does not make sense */
        fprintf(stderr,
	  "%s:%d:%d internal error: unknown lex class %d for character %04X\n",
	  (ASTRING) UStringToUTF8(file_name), line_num, col_num, lc, ch);
        ch = get_unicode_char(fp);
	break;

    }
  }
  add_token(TK_END_FILE, file_name, line_num, col_num,
    AStringToUString("<endfile>"));
  *tokens = first_token;
  if( DEBUG1 )
  {
    fprintf(stderr, "] DoLex returning at %s:%d:%d; tokens:)\n", 
      (ASTRING) UStringToUTF8(file_name), line_num, col_num);
    LexDebug(*tokens, stderr);
  }
}


/*****************************************************************************/
/*                                                                           */
/*  BOOLEAN LexFile(USTRING fdir, USTRING file, TOKEN *tokens,               */
/*    SYMTAB_NAMED op_table, FILE_POS err_pos)                               */
/*                                                                           */
/*  Open file fdir/file (or just file if fdir is NULL), perform a lexical    */
/*  analysis on it.  Return TRUE if there were no lexical errors, and set    */
/*  *tokens to the list of tokens found.  Otherwise return FALSE.            */
/*                                                                           */
/*  The value of *tokens is a pointer to the first token, or NULL if the     */
/*  list of tokens is empty (which can't happen, because there is always     */
/*  a concluding end of file token).                                         */
/*                                                                           */
/*  If op_table is not NULL, then it is a symbol table which has been        */
/*  initialized and may already contain some values, which are being built   */
/*  up into the set of all free symbols (i.e. not requiring a . in front of  */
/*  them to be recognizable) visible within the current module.  LexFile     */
/*  does its bit towards building this table by adding any prefix, infix,    */
/*  or postfix operators declared in fp, and reporting any name clashes.     */
/*                                                                           */
/*  Prefix operators whose names are punctuation symbols will have an _      */
/*  prefixed to their names in op_table.                                     */
/*                                                                           */
/*  If err_pos is non-NULL it gives a file position to use for reporting     */
/*  failure to open the desired file.                                        */
/*                                                                           */
/*****************************************************************************/

BOOLEAN LexFile(USTRING fdir, USTRING file, TOKEN *tokens,
  SYMTAB_NAMED op_table, FILE_POS err_pos)
{
  FILE *fp;

  if( DEBUG1 )
    fprintf(stderr, "[ LexFile(%s, %s, op_table %s NULL)\n", 
      fdir == NULL ? "NULL" : (ASTRING) UStringToUTF8(fdir),
      (ASTRING) UStringToUTF8(file), op_table == NULL ? "==" : "!=");

  /* make sure this module is initialized */
  if( !init )
    LexInit();

  /* initialize file position variables */
  file_name = (fdir == NULL ? file :
    UStringCat3(fdir, AStringToUString(NPC_DIR_SEP), file));
  assert(file_name != NULL);
  line_num = 1;
  col_num = 0;
  error_seen = FALSE;

  /* open the file and fail if can't */
  fp = fopen((ASTRING) UStringToUTF8(file_name), "r");
  if( fp == NULL )
  {
    if( err_pos != NULL )
      fprintf(stderr, "%s: cannot open file \"%s\"\n", FilePosShow(err_pos),
	(ASTRING) UStringToUTF8(file_name));
    else
      fprintf(stderr, "cannot open file \"%s\"\n",
	(ASTRING) UStringToUTF8(file_name));
    return FALSE;
  }

  /* read through the file; each iteration consumes one space or token;   */
  /* at the start of each iteration, ch is the first unconsumed character */
  DoLex(fp, FALSE, tokens, op_table);
  fclose(fp);
  return !error_seen;
}


/*****************************************************************************/
/*                                                                           */
/*  BOOLEAN LexLine(FILE *fp, TOKEN *tokens)                                 */
/*                                                                           */
/*  Lex one line of open file fp, returning the result in *tokens and        */
/*  result TRUE if successful.                                               */
/*                                                                           */
/*****************************************************************************/

BOOLEAN LexLine(FILE *fp, TOKEN *tokens)
{
  /* initialize file position variables */
  file_name = AStringToUString("np");
  line_num = 1;
  col_num = 0;
  error_seen = FALSE;

  /* do the lexing */
  DoLex(fp, TRUE, tokens, NULL);
  return !error_seen;
}


/*****************************************************************************/
/*                                                                           */
/*  BOOLEAN LexIsName(TOKEN token)                                           */
/*                                                                           */
/*  Returns TRUE if token is a name (either an identifier or punctseq).      */
/*                                                                           */
/*****************************************************************************/

BOOLEAN LexIsName(TOKEN token)
{
  return token->type == TK_IDENTIFIER || token->type == TK_PUNCTSEQ;
}


/*****************************************************************************/
/*                                                                           */
/*  void LexAddPos(FILE_POS pos, AFACTORY af)                                */
/*                                                                           */
/*  Add character string representation of pos to af.                        */
/*                                                                           */
/*****************************************************************************/

void LexAddPos(FILE_POS pos, AFACTORY af)
{
  char buff[100];
  sprintf(buff, "%s:%d:%d", (ASTRING) UStringToUTF8(pos->file_name),
    pos->line_num, pos->col_num);
  AStringAddAString(af, buff);
}


/*****************************************************************************/
/*                                                                           */
/*  FILE_POS LexFilePos(TOKEN t)                                             */
/*                                                                           */
/*  Return the file position of token t.                                     */
/*                                                                           */
/*****************************************************************************/

FILE_POS LexFilePos(TOKEN t)
{
  return t->file_pos;
}


/*****************************************************************************/
/*                                                                           */
/*  UTF8 FilePosShow(FILE_POS pos)                                           */
/*                                                                           */
/*  Return a character string representation of this file position.          */
/*                                                                           */
/*****************************************************************************/

UTF8 FilePosShow(FILE_POS pos)
{
  if( pos == NULL )
    return (UTF8) "NULL";
  else
  {
    AFACTORY af = AStringBegin();
    LexAddPos(pos, af);
    return (UTF8) AStringEnd(af);
  }
}


/*****************************************************************************/
/*                                                                           */
/*  UTF8 LexPos(TOKEN t)                                                     */
/*                                                                           */
/*  Return a character string representation of the position of token t.     */
/*                                                                           */
/*****************************************************************************/

UTF8 LexPos(TOKEN t)
{
  return (t == NULL ? (UTF8) "NULL" : FilePosShow(t->file_pos));
}


/*****************************************************************************/
/*                                                                           */
/*  TOKEN_TYPE LexType(TOKEN t)                                              */
/*                                                                           */
/*  Return the type of token t.                                              */
/*                                                                           */
/*****************************************************************************/

TOKEN_TYPE LexType(TOKEN t)
{
  return t->type;
}


/*****************************************************************************/
/*                                                                           */
/*  TOKEN LexNext(TOKEN t)                                                   */
/*                                                                           */
/*  Return the token following t.                                            */
/*                                                                           */
/*****************************************************************************/

TOKEN LexNext(TOKEN t)
{
  return t->next;
}


/*****************************************************************************/
/*                                                                           */
/*  USTRING LexValue(TOKEN t)                                                */
/*                                                                           */
/*  Return the string value of token t.                                      */
/*                                                                           */
/*****************************************************************************/

USTRING LexValue(TOKEN t)
{
  return t->value;
}


/*****************************************************************************/
/*                                                                           */
/*  UTF8 LexShow(TOKEN t)                                                    */
/*                                                                           */
/*  Return a character string representation of the content of token t.      */
/*  The returned value is not permanent; use AStringCopy if you want a       */
/*  premanent value.                                                         */
/*                                                                           */
/*****************************************************************************/

UTF8 LexShow(TOKEN t)
{
  return UStringToUTF8(t->value);
}


/*****************************************************************************/
/*                                                                           */
/*  void LexDebug(TOKEN tokens, FILE *fp)                                    */
/*                                                                           */
/*  Debug print of sequence of tokens to file fp.                            */
/*                                                                           */
/*****************************************************************************/

static char *token_type(TOKEN_TYPE ttype)
{
  switch( ttype )
  {
    case TK_COLON:		return "colon";
    case TK_COLON_EQUALS:	return "coloneq";
    case TK_COMMA:		return "comma";
    case TK_EXCLAM:		return "exclam";
    case TK_EXCLAM_EXCLAM:	return "exclam2";
    case TK_DOT:		return "dot";
    case TK_DOT_DOT:		return "dotdot";
    case TK_LEFT_BRACE:		return "lbrace";
    case TK_RIGHT_BRACE:	return "rbrace";
    case TK_LEFT_BRACKET:	return "lbracket";
    case TK_RIGHT_BRACKET:	return "rbracket";
    case TK_LEFT_PAREN:		return "lparen";
    case TK_RIGHT_PAREN:	return "rparen";

    case TK_AS:			return "kas";
    case TK_BUILTIN:		return "kbuiltin";
    case TK_CASE:		return "kcase";
    case TK_CLASS:		return "kclass";
    case TK_CREATION:		return "kcreation";
    case TK_COERCE:		return "kcoerce";
    case TK_ELSE:		return "kelse";
    case TK_ELSIF:		return "kelsif";
    case TK_END:		return "kend";
    case TK_ENUM:		return "kenum";
    case TK_EXTEND:		return "kextend";
    case TK_EXTENSION:		return "kextension";
    case TK_FALSE:		return "pfalse";
    case TK_FILTER:		return "kfilter";
    case TK_FUN:		return "kfun";
    case TK_GENESIS:		return "kgenesis";
    case TK_IF:			return "kif";
    case TK_IN:			return "kin";
    case TK_INFIX:		return "kinfix";
    case TK_INFIXR:		return "kinfixr";
    case TK_INHERIT:		return "kinherit";
    case TK_INTRODUCE:		return "kintroduce";
    case TK_INVARIANT:		return "kinvariant";
    case TK_IS:			return "kis";
    case TK_LET:		return "klet";
    case TK_LOCAL:		return "klocal";
    case TK_MEET:		return "kmeet";
    case TK_MODULE:		return "kmodule";
    case TK_NONCREATION:	return "knoncreation";
    case TK_NORENAME:		return "knorename";
    case TK_PREDEFINED:		return "kpredefined";
    case TK_PREFIX:		return "kprefix";
    case TK_PRIVATE:		return "kprivate";
    case TK_POSTFIX:		return "kpostfix";
    case TK_RENAME:		return "krename";
    case TK_REQUIRE:		return "krequire";
    case TK_SELF:		return "pself";
    case TK_SYSTEM:		return "ksystem";
    case TK_THEN:		return "kthen";
    case TK_TRUE:		return "ptrue";
    case TK_WHEN:		return "kwhen";
    case TK_USE:		return "kuse";
    case TK_YIELD:		return "kyield";
    case TK_IDENTIFIER:		return "identifier";
    case TK_PUNCTSEQ:		return "punctseq";
    case TK_LIT_INTEGER:	return "integer";
    case TK_LIT_REAL:		return "real";
    case TK_LIT_CHARACTER:	return "character";
    case TK_LIT_STRING:		return "string";
    case TK_END_FILE:		return "endfile";

    default:

      fprintf(stderr, "internal error: token_type found %d\n", ttype);
      exit(1);
      return "";
  }
}

void LexDebug(TOKEN tokens, FILE *fp)
{ TOKEN t;
  char buff[100];
  fprintf(fp, "[ LexDebug:\n");
  for( t = tokens;  t != NULL;  t = t->next )
  {
    sprintf(buff, "%s:", LexPos(t)); 
    fprintf(fp, "  %-16s %-12s %s\n", buff, token_type(t->type),
      UStringToUTF8(t->value));
  }
  fprintf(fp, "]\n");
}
