  @     	     Name 	 8          lex - Generates programs for lexical analysis. 8         Syntax  "          lex [-CTVN] [ file ] ... "         Description  F          lex generates programs to be used in simple lexical analysis F           of text.     F          The input files (standard input default) contain strings and F C          expressions to be searched for, and C text to be executed C !          when strings are found. !    B          A file lex.yy.c is generated which, when loaded with the B F          library, copies the input to the output except when a string F @          specified in the file is found; then the corresponding @ F          program text is executed.  The actual string matched is left F                             @  F          in yytext, an external character array.  Matching is done in F D          order of the strings in the file.  The strings may contain D F          square brackets to indicate character classes, as in [abx-z] F F          to indicate a, b, x, y, and z; and the operators *, +, and ? F D          mean respectively; any nonnegative number of, any positive D ?          number of, and either zero or one occurrences of, the ? D          previous character or character class.  The character . is D ;          the class of all ASCII characters except newline. ; C          Parentheses for grouping and vertical bar for alternation C F          are also supported.  The notation r{d,e} in a rule indicates F                                                   @  D          between d and e instances of regular expression r.  It has D ?          higher precedence than |, but lower than *, ?, +, and ? @          concatenation.  The character ^ at the beginning of an @ F          expression permits a successful match only immediately after F D          a newline, and the character $ at the end of an expression D <          requires a trailing newline. The character / in an < E          expression indicates trailing context; only the part of the E C          expression up to the slash is returned in yytext, but the C F          remainder of the expression must follow in the input stream. F D          An operator character may be used as an ordinary symbol if D                                                         @  C          it is within " symbols or preceded by \.  Thus, [a-zA-Z]+ C &          matches a string of letters. &    F          Three subroutines defined as macros are expected: input() to F E          read a character; unput(c) to replace a character read; and E F          output(c) to place an output character.  They are defined in F C          terms of the standard streams, but you can override them. C A          The program generated is named yylex(), and the library A E          contains a main() which calls it.  The action REJECT on the E E          right side of the rule causes this match to be rejected and E A          the next suitable match executed; the function yymore() A                                                                      @  E          accumulates additional characters into the same yytext; and E F          the function yyless(p) pushes back the portion of the string F D          matched beginning at p, which should be between yytext and D D          yytext+yyleng.  The macros input and output use files yyin D E          and yyout to read from and write to, defaulted to stdin and E           stdout, respectively.     F          Any line beginning with a blank is assumed to contain only C F E          text and is copied; if it precedes %% it is copied into the E C          external definition area of the lex.yy.c file.  All rules C C          should follow a %%, as in YACC.  Lines preceding %% which C                                                                       @  B          begin with a nonblank character define the string on the B D          left to be the remainder of the line; it can be called out D D          later by surrounding it with {}.  Note that curly brackets D E          do not imply parentheses; only string substitution is done. E         Example                    D       [0-9]                    %%  5                  if      printf(``IF statement\n''); 5 =                  [a-z]+  printf(``tag, value %s\n'',yytext); = ?                  0{D}+   printf(``octal number %s\n'',yytext); ? A                  {D}+    printf(``decimal number %s\n'',yytext); A 1                  ``++''  printf(``unary op\n''); 1 2                  ``+''   printf(``binary op\n''); 2                                @  '                  ``/*''  {       loop: ' 9                                  while (input() != '*'); 9 2                                  switch (input()) 2 +                                          { + :                                          case '/': break; : ?                                          case '*': unput('*'); ? >                                          default: go to loop; > +                                          } + #                                  } #    @          The external names generated by lex all begin with the @           prefix yy or YY.     B          The options must appear before any files.  The option -C B ?          indicates C actions and is the default, -T causes the ?                                 @  D          lex.yy.c program to be written instead to standard output, D E          -V provides a one-line summary of statistics of the machine E C          generated, -N will not print out the - summary.  Multiple C >          files are treated as a single file.  If no files are > ,          specified, standard input is used. ,    D          Certain table sizes for the resulting finite state machine D 0          can be set in the definitions section: 0    ;               %p n number of positions is n (default 2500) ; /               %n n number of states is n (500) / :               %t n number of parse tree nodes is n (1000) : 5               %a n number of transitions is n (2000) 5                                                                  @  C          The use of one or more of the above automatically implies C 6          the -V option, unless the -N option is used. 6    
     See Also 
           yacc.man     )          (printed 9/27/90 - J B Systems) )                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         