Files
befpc/bepascal/source/tools/stubgen.so/cpp/lexer.l
2003-09-21 22:46:55 +00:00

523 lines
15 KiB
Plaintext

%{
/*
* FILE: lexer.l
* AUTH: Michael John Radwin <mjr@acm.org>
*
* DESC: stubgen lexer. Portions borrowed from Newcastle
* University's Arjuna project (http://arjuna.ncl.ac.uk/), and
* Jeff Lee's ANSI Grammar
* (ftp://ftp.uu.net/usenet/net.sources/ansi.c.grammar.Z)
*
* DATE: Thu Aug 15 13:10:06 EDT 1996
* $Id: lexer.l,v 1.1 2003-09-21 22:46:55 ocoursiere Exp $
*
* Copyright (c) 1996-1998 Michael John Radwin
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Modification history:
* $Log: not supported by cvs2svn $
* Revision 1.1 2001/11/07 10:06:07 ithamar
* Added stubgen to CVS
*
* Revision 1.33 1998/07/27 19:16:57 mradwin
* added some c++ keywords
* need to handle typename, using, and namespace
*
* Revision 1.32 1998/05/11 19:49:11 mradwin
* Version 2.03 (updated copyright information).
*
* Revision 1.31 1998/04/07 23:39:55 mradwin
* changed error-handling code significantly. functions
* like count() are now contributing to linebuf so we get correct
* parse error messages during lineno == 1 and other situations.
* also, instead of calling fatal() for collect*() functions,
* we return -1 and let the parser recover more gracefully.
*
* Revision 1.30 1998/01/12 19:39:11 mradwin
* modified rcsid
*
* Revision 1.29 1997/11/13 22:37:31 mradwin
* changed char[] to char * to make non-gcc compilers
* a little happier. We need to #define const to nothing
* for other compilers as well.
*
* Revision 1.28 1997/11/13 21:29:30 mradwin
* moved code from parser.y to main.c
*
* Revision 1.27 1997/11/13 21:10:17 mradwin
* renamed stubgen.[ly] to parser.y lexer.l
*
* Revision 1.26 1997/11/11 03:52:06 mradwin
* changed fatal()
*
* Revision 1.25 1997/11/05 03:02:02 mradwin
* Modified logging routines.
*
* Revision 1.24 1997/11/01 23:12:43 mradwin
* greatly improved error-recovery. errors no longer spill over
* into other files because the yyerror state is properly reset.
*
* Revision 1.23 1997/10/26 23:16:32 mradwin
* changed inform_user and fatal functions to use varargs
*
* Revision 1.22 1997/10/26 22:46:48 mradwin
* support macros within comments, etc.
*
* Revision 1.21 1997/10/16 19:42:48 mradwin
* added support for elipses, static member/array initializers,
* and bitfields.
*
* Revision 1.20 1997/10/16 17:36:06 mradwin
* Fixed compiler warning on win32 from <ctype.h> and isspace()
*
* Revision 1.19 1997/10/16 17:12:59 mradwin
* handle extern "C" blocks better now, and support multi-line
* macros. still need error-checking.
*
* Revision 1.18 1997/10/15 22:09:06 mradwin
* changed tons of names. stubelem -> sytaxelem,
* stubin -> infile, stubout -> outfile, stublog -> logfile.
*
* Revision 1.17 1997/10/15 21:45:13 mradwin
* rearranged table.[ch] and util.[ch] so that util pkg
* knows nothing about syntaxelems.
*
* Revision 1.16 1997/10/15 17:42:37 mradwin
* added support for 'extern "C" { ... }' blocks.
*
* Revision 1.15 1997/09/05 19:17:06 mradwin
* works for scanning old versions, except for parameter
* names that differ between .H and .C files.
*
* Revision 1.14 1997/09/05 16:37:41 mradwin
* rcsid
*
* Revision 1.13 1997/09/05 16:34:36 mradwin
* GPL-ized code.
*
* Revision 1.12 1997/09/05 16:13:18 mradwin
* changed email address to acm.org
*
* Revision 1.11 1996/09/12 14:44:49 mjr
* Added throw decl recognition (great, another 4 bytes in syntaxelem)
* and cleaned up the grammar so that const_opt appears in far fewer
* places. const_opt is by default 0 as well, so we don't need to
* pass it as an arg to new_elem().
*
* I also added a fix to a potential bug with the MINIT and INLIN
* exclusive start states. I think they could have been confused
* by braces within comments, so now I'm grabbing comments in those
* states as well.
*
* Revision 1.10 1996/09/12 03:46:10 mjr
* No concrete changes in code. Just added some sanity by
* factoring out code into util.[ch] and putting some prototypes
* that were in table.h into stubgen.y where they belong.
*
* Revision 1.9 1996/09/01 20:59:48 mjr
* Added collectMemberInitList() function, which is similar
* to collectInlineDef() and also the exclusive state MINIT
*
* Revision 1.8 1996/08/23 05:09:19 mjr
* fixed up some more portability things
*
* Revision 1.7 1996/08/22 02:43:47 mjr
* added parse error message (using O'Reilly p. 274)
*
* Revision 1.6 1996/08/21 18:33:50 mjr
* removed the buffer for inlines. we don't care anyway.
* now we can't overflow on inlines!
*
* Revision 1.5 1996/08/21 17:40:56 mjr
* added some cpp directives for porting to WIN32
*
* Revision 1.4 1996/08/19 17:01:33 mjr
* no echo now
*
* Revision 1.3 1996/08/15 21:24:58 mjr
* *** empty log message ***
*/
%}
D [0-9]
L [a-zA-Z_]
H [a-fA-F0-9]
E [Ee][+-]?{D}+
FS (f|F|l|L)
IS (u|U|l|L)*
%{
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "table.h"
#include "util.h"
#ifdef WIN32
/* definitions of exit, malloc, realloc, and free */
#include <stdlib.h>
#endif
#if 0 /* #ifdef WIN32 */
#include "y_tab.h"
#else
#include "y.tab.h"
#endif
#ifdef __cplusplus
#define STUB_INPUT() yyinput()
#else
#define STUB_INPUT() input()
#endif
/* when we return a string, duplicate it so we can free it later.
we always allocate memory so we can uniformly free() it. */
#define RETURN_STR(x) tokens_seen++; yylval.string = strdup(yytext); return(x)
/* make that nasty union a value that will bus error if we misinterpret
the value as a pointer */
#define RETURN_VAL(x) tokens_seen++; yylval.flag = 37; return(x)
static const char rcsid[] = "$Id: lexer.l,v 1.1 2003-09-21 22:46:55 ocoursiere Exp $";
static void count();
static void comment();
static void macro();
char linebuf[1024]; /* null-terminated at beginning of each file */
int lineno; /* set to 1 at beginning of each file */
int column; /* set to 0 at beginning of each file */
int tokens_seen; /* set to 0 at beginning of each file */
%}
%x INLIN MINIT
%%
\n.* { /*
* for debugging purposes, we grab an entire
* line and buffer it, then grab tokens out of
* it. This lets us have more informative
* error messages. See yyerror() in parser.y
*/
strncpy(linebuf, yytext+1, 1024);
lineno++;
column = 0;
yyless(1); /* give back everything but \n */
}
"/*" { comment(); }
"//".* { count(); }
"#" { macro(); /* was #.* { count(); } */ }
"static" { count(); tokens_seen++; }
"volatile" { count(); tokens_seen++; }
"auto" { count(); tokens_seen++; }
"extern" { count(); RETURN_VAL(EXTERN); }
"register" { count(); tokens_seen++; }
"typedef" { count(); tokens_seen++; }
"struct" { count(); RETURN_VAL(STRUCT); }
"union" { count(); RETURN_VAL(UNION); }
"enum" { count(); RETURN_VAL(ENUM); }
"const" { count(); RETURN_VAL(CONST); }
"template" { count(); RETURN_VAL(TEMPLATE); }
"typename" { count(); tokens_seen++; /* FIXME */ }
"using" { count(); tokens_seen++; /* FIXME */ }
"namespace" { count(); RETURN_VAL(CLASS); /* FIXME */ }
"dllexport" { count(); tokens_seen++; }
"dllimport" { count(); tokens_seen++; }
"explicit" { count(); tokens_seen++; }
"mutable" { count(); tokens_seen++; }
"inline" { count(); tokens_seen++; }
"virtual" { count(); tokens_seen++; }
"class" { count(); RETURN_VAL(CLASS); }
"delete" { count(); RETURN_VAL(DELETE); }
"new" { count(); RETURN_VAL(NEW); }
"friend" { count(); RETURN_VAL(FRIEND); }
"operator" { count(); RETURN_VAL(OPERATOR); }
"protected" { count(); RETURN_VAL(PROTECTED); }
"private" { count(); RETURN_VAL(PRIVATE); }
"public" { count(); RETURN_VAL(PUBLIC); }
"throw" { count(); RETURN_VAL(THROW); }
"char" { count(); RETURN_STR(CHAR); }
"short" { count(); RETURN_STR(SHORT); }
"int" { count(); RETURN_STR(INT); }
"long" { count(); RETURN_STR(LONG); }
"signed" { count(); RETURN_STR(SIGNED); }
"unsigned" { count(); RETURN_STR(UNSIGNED); }
"float" { count(); RETURN_STR(FLOAT); }
"double" { count(); RETURN_STR(DOUBLE); }
"void" { count(); RETURN_STR(VOID); }
{L}({L}|{D})* { count(); RETURN_STR(IDENTIFIER); }
0[xX]{H}+{IS}? { count(); RETURN_STR(CONSTANT); }
0{D}+{IS}? { count(); RETURN_STR(CONSTANT); }
{D}+{IS}? { count(); RETURN_STR(CONSTANT); }
'(\\.|[^\\'])+' { count(); RETURN_STR(CONSTANT); /* 'fontlck */ }
{D}+{E}{FS}? { count(); RETURN_STR(CONSTANT); }
{D}*"."{D}+({E})?{FS}? { count(); RETURN_STR(CONSTANT); }
{D}+"."{D}*({E})?{FS}? { count(); RETURN_STR(CONSTANT); }
\"(\\.|[^\\"])*\" { count(); RETURN_STR(STRING_LITERAL); /* "fontlck */ }
">>=" { count(); RETURN_VAL(RIGHT_ASSIGN); }
"<<=" { count(); RETURN_VAL(LEFT_ASSIGN); }
"+=" { count(); RETURN_VAL(ADD_ASSIGN); }
"-=" { count(); RETURN_VAL(SUB_ASSIGN); }
"*=" { count(); RETURN_VAL(MUL_ASSIGN); }
"/=" { count(); RETURN_VAL(DIV_ASSIGN); }
"%=" { count(); RETURN_VAL(MOD_ASSIGN); }
"&=" { count(); RETURN_VAL(AND_ASSIGN); }
"^=" { count(); RETURN_VAL(XOR_ASSIGN); }
"|=" { count(); RETURN_VAL(OR_ASSIGN); }
">>" { count(); RETURN_VAL(RIGHT_OP); }
"<<" { count(); RETURN_VAL(LEFT_OP); }
"++" { count(); RETURN_VAL(INC_OP); }
"--" { count(); RETURN_VAL(DEC_OP); }
"->" { count(); RETURN_VAL(PTR_OP); }
"->*" { count(); RETURN_VAL(MEM_PTR_OP); }
"&&" { count(); RETURN_VAL(AND_OP); }
"||" { count(); RETURN_VAL(OR_OP); }
"<=" { count(); RETURN_VAL(LE_OP); }
">=" { count(); RETURN_VAL(GE_OP); }
"==" { count(); RETURN_VAL(EQ_OP); }
"!=" { count(); RETURN_VAL(NE_OP); }
";" { count(); RETURN_VAL(';'); }
"{" { count(); RETURN_VAL('{'); }
"}" { count(); RETURN_VAL('}'); }
"," { count(); RETURN_VAL(','); }
":" { count(); RETURN_VAL(':'); }
"=" { count(); RETURN_VAL('='); }
"(" { count(); RETURN_VAL('('); }
")" { count(); RETURN_VAL(')'); }
"[" { count(); RETURN_VAL('['); }
"]" { count(); RETURN_VAL(']'); }
"." { count(); RETURN_VAL('.'); }
"&" { count(); RETURN_VAL('&'); }
"!" { count(); RETURN_VAL('!'); }
"~" { count(); RETURN_VAL('~'); }
"-" { count(); RETURN_VAL('-'); }
"+" { count(); RETURN_VAL('+'); }
"*" { count(); RETURN_VAL('*'); }
"/" { count(); RETURN_VAL('/'); }
"%" { count(); RETURN_VAL('%'); }
"<" { count(); RETURN_VAL('<'); }
">" { count(); RETURN_VAL('>'); }
"^" { count(); RETURN_VAL('^'); }
"|" { count(); RETURN_VAL('|'); }
"?" { count(); RETURN_VAL('?'); }
"::" { count(); RETURN_VAL(CLCL); }
"..." { count(); RETURN_VAL(ELIPSIS); }
<INLIN>"/*" { comment(); }
<INLIN>"//".* { count(); }
<INLIN>"#" { macro(); /* was #.* { count(); } */ }
<INLIN>. |
<INLIN>\n { RETURN_VAL((int) yytext[0]); }
<MINIT>"/*" { comment(); }
<MINIT>"//".* { count(); }
<MINIT>"#" { macro(); /* was #.* { count(); } */ }
<MINIT>. |
<MINIT>\n { RETURN_VAL((int) yytext[0]); }
[ \t\v\f] { count(); }
. { count(); /* ignore bad characters */ }
%%
/*
* called when EOF is encountered. Return 1 so the scanner will return
* the zero token to report end-of-file.
*/
int yywrap()
{
return(1);
}
static void comment()
{
int c1 = 0, c2 = STUB_INPUT();
linebuf[column] = c2;
column++;
for(;;) {
if (c2 == EOF)
break;
if (c1 == '*' && c2 == '/')
break;
if (c2 == '\n') {
linebuf[0] = '\0';
column = 0;
lineno++;
}
c1 = c2;
c2 = STUB_INPUT();
linebuf[column] = c2;
column++;
}
}
static void macro()
{
int c1 = 0, c2 = STUB_INPUT(), nonws = 0;
log_printf("MACRO reading begining...\n#");
log_printf("%c", c2);
linebuf[column] = c2;
column++;
for(;;) {
if (c2 == EOF)
break;
if (!isspace(c1))
nonws = c1;
if (nonws == '\\' && c2 == '\n') {
linebuf[0] = '\0';
column = 0;
lineno++;
} else if (c2 == '\n') {
linebuf[0] = '\0';
column = 0;
lineno++;
break;
}
c1 = c2;
c2 = STUB_INPUT();
linebuf[column] = c2;
log_printf("%c", c2);
column++;
}
log_printf("MACRO reading done.\n");
}
static void count()
{
int i;
if (lineno == 1)
strcat(linebuf, yytext);
for (i = 0; yytext[i] != '\0'; i++)
if (yytext[i] == '\n')
column = 0;
else if (yytext[i] == '\t')
column += 8 - (column % 8);
else
column++;
/* equiv to fprintf(yyout, "%s", yytext); */
/* ECHO; */
}
/*
* Collect the contents of inline functions, reading them char by char.
* thanks to the arjuna stubgen project for this one
*/
int collectInlineDef()
{
int bracelevel = 1;
int token;
/* the magic of exclusive start states makes it all possible */
BEGIN INLIN;
while (bracelevel > 0) {
token = yylex();
column++;
/* fprintf(stderr, "INLIN: read token %c\n", token); */
if (token > 0) {
/* Assume single char */
switch (token) {
case '{':
bracelevel++;
break;
case '}':
bracelevel--;
if (bracelevel == 0)
{
column--;
unput(token);
break;
}
break;
case '\n':
column = 0;
lineno++;
break;
}
} else {
/* fatal error: Unexpected EOF reading inline function */
return -1;
}
}
/* we now return you to your regularly scheduled start state */
BEGIN 0;
return 0;
}
/*
* hmmm... looks familiar. more control-y programming.
*/
int collectMemberInitList()
{
int token;
int insideList = 1;
/* the magic of exclusive start states makes it all possible */
BEGIN MINIT;
while(insideList) {
token = yylex();
column++;
/* fprintf(stderr, "MINIT: read token %c\n", token); */
if (token > 0) {
/* Assume single char */
switch (token)
{
case '{':
insideList = 0;
unput(token);
break;
case '\n':
column = 0;
lineno++;
break;
}
} else {
/* fatal error: Unexpected EOF reading member initialization */
return -1;
}
}
/* we now return you to your regularly scheduled start state */
BEGIN 0;
return 0;
}