INTERFACE M3CLex;

(***************************************************************************)
(*                      Copyright (C) Olivetti 1989                        *)
(*                          All Rights reserved                            *)
(*                                                                         *)
(* Use and copy of this software and preparation of derivative works based *)
(* upon this software are permitted to any person, provided this same      *)
(* copyright notice and the following Olivetti warranty disclaimer are     *) 
(* included in any copy of the software or any modification thereof or     *)
(* derivative work therefrom made by any person.                           *)
(*                                                                         *)
(* This software is made available AS IS and Olivetti disclaims all        *)
(* warranties with respect to this software, whether expressed or implied  *)
(* under any law, including all implied warranties of merchantibility and  *)
(* fitness for any purpose. In no event shall Olivetti be liable for any   *)
(* damages whatsoever resulting from loss of use, data or profits or       *)
(* otherwise arising out of or in connection with the use or performance   *)
(* of this software.                                                       *)
(***************************************************************************)

IMPORT Text;
IMPORT IO;
IMPORT M3AST_LX;
IMPORT M3CHash, M3CToken, M3CReservedWord, M3CSrcPos;

(* Lexer for Modula 3 *)

(* WARNING: This lexer makes extensive use of <*INLINE*> procedures; the parser
which uses it must inline lexer functions and the lexer depends on being able
to inline M3CHash functions; without inlining the lexer will be uncomfortably
slow *)

TYPE
  T <: ROOT; (* Type representing a lexer *)

  (* A 'CallBack' object is used by the lexer to notify the user of tokens
   the lexer cannot handle *)
  CallBack = OBJECT
  METHODS
    badChar(ch: CHAR) RAISES {};         (* Bad character found *)
    comment(comment: Text.T) RAISES {};  (* Comment found *)
    pragma(pragma: Text.T) RAISES {};    (* Pragma found *)
  END;

  Symbol_rep = M3AST_LX.Symbol_rep;      (* Represents an identifier *)
  Literal_rep = M3AST_LX.Literal_rep;    (* Represents a numeric, character *)
                                         (* or text literal *)

REVEAL
  Symbol_rep <: M3CHash.Id;              (* Identifiers and literals are *)
  Literal_rep <: M3CHash.Id;             (* both stored in hash tables *)


PROCEDURE New(
    s: IO.Stream;
    identifiers: M3CReservedWord.Table;
    literals: M3CHash.Table;
    callBack: CallBack;
    init: T := NIL)
    : T
    RAISES {};
(* Creates a new lexer. The lexer will read from the given stream, 's'.
  Any identifiers found will be put into the 'identifiers' hash table. Note
that this table already contains all the reserved words; hence only one hash
lookup/entry operation is needed for a reserved word or identifier.
  Any literals found will be put in the 'literals' hash table.
  The appropriate 'callBack' method will be called when a bad character,
comment or pragma is found.
  If 'init' is NIL (the usual case) 'New' creates and initializes a new lexer.
If 'init' is non NIL 'New' just initializes the lexer 'init'. 'init' should
only be non NIL when initializing a newly created object which is a subtype of
'M3CLex.T' *)

(* It is a checked runtime error to use a NIL lexer as an argument to any of
the following procedures *)

<*INLINE*> PROCEDURE Current(t: T): M3CToken.T RAISES {};
(* Returns the current token *)

<*INLINE*> PROCEDURE Next(t: T): M3CToken.T RAISES {IO.Error};
(* Advances to the next token and then returns the (new) current token *)

<*INLINE*> PROCEDURE Position(t: T): M3CSrcPos.T RAISES {};
(* Returns the current position *)

<*INLINE*> PROCEDURE Literal(t: T): Literal_rep RAISES {};
(* If 'Current(t) IN M3CToken.T.Literals' returns the representation of the
current literal. When the lexer encounters a literal it creates a text which
it puts into the literal hash table. The texts can be distinguished as follows:
  Valid numeric literals: text starts with a digit and ends with a hex digit
  Valid text literals:    text starts and ends with double quote character
  Valid char literals:    text starts and ends with single quote character
Sometimes the lexer finds an invalid literal. A numeric literal is invalid if
it is a based number and one of its digits is out of range e.g. 2_13. A
character or text literal could be missing its closing quote. In these cases
the lexer returns an invalid literal.
  Invalid literals always have at least one character and their first character
can still be used to distinguish the literal type (numerics start with a digit,
texts with double quote, chars with single quote). They can be spotted because
their last character is inappropriate; e.g. not a hex digit if the literal is
numeric.
  If 'NOT Current(t) IN M3CToken.T.Literals' the result of 'Literal' is
undefined *)

<*INLINE*> PROCEDURE Identifier(t: T): Symbol_rep RAISES {};
(* If 'Current(t) = M3CToken.T.Identifier' the result is a handle for the
hashed text of the current identifier. Otherwise the result is undefined *)

PROCEDURE Disable(t: T) RAISES {};
(* Disables the lexer; any call of 'Next' will return 'M3CToken.T.Void' and
the position will not advance *)

PROCEDURE Disabled(t: T): BOOLEAN RAISES {};
(* Returns TRUE iff 't' is disabled *)

PROCEDURE Reset(t: T; pos := M3CSrcPos.Null; s: IO.Stream := NIL) RAISES {};
(* Resets 't'; sets the current symbol to 'M3CToken.T.Void' and enables 't' if
it is disabled.
  If 'pos' is not 'M3CSrcPos.Null' sets the lexer position to be 'pos'.
  If 's' is not NIL sets the lexer stream to be 's' and sets the lexer position
to be 'pos' if 'pos # M3CSrcPos.Null' or line 1 offset 0 otherwise. *)


PROCEDURE TokenToText(token: M3CToken.T): Text.T RAISES {};
(* Returns a text describing the given token, suitable for use in error
messages *)

PROCEDURE CurrentTokenToText(t: T): Text.T RAISES {};
(* Returns a text describing the current token for 't'; this may give more
information than 'TokenToText(Current(t))' because it incorporates identifier
names and literal values if appropriate *)

END M3CLex.
