526 lines
15 KiB
Plaintext
526 lines
15 KiB
Plaintext
%{
|
|
/*
|
|
* FILE: lexer.l
|
|
* AUTH: Michael John Radwin <mjr@acm.org>
|
|
*
|
|
* DESC: stubgen lexer. Portions borrowed from Newcastle
|
|
* University's Arjuna project (http://arjuna.ncl.ac.uk/), and
|
|
* Jeff Lee's ANSI Grammar
|
|
* (ftp://ftp.uu.net/usenet/net.sources/ansi.c.grammar.Z)
|
|
*
|
|
* DATE: Thu Aug 15 13:10:06 EDT 1996
|
|
* $Id: lexer.l,v 1.2 2003-10-20 22:46:40 jetsoni Exp $
|
|
*
|
|
* Copyright (c) 1996-1998 Michael John Radwin
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*
|
|
* Modification history:
|
|
* $Log: not supported by cvs2svn $
|
|
* Revision 1.1 2003/09/21 22:46:55 ocoursiere
|
|
* stubgen.so is now include in the build process
|
|
*
|
|
* Revision 1.1 2001/11/07 10:06:07 ithamar
|
|
* Added stubgen to CVS
|
|
*
|
|
* Revision 1.33 1998/07/27 19:16:57 mradwin
|
|
* added some c++ keywords
|
|
* need to handle typename, using, and namespace
|
|
*
|
|
* Revision 1.32 1998/05/11 19:49:11 mradwin
|
|
* Version 2.03 (updated copyright information).
|
|
*
|
|
* Revision 1.31 1998/04/07 23:39:55 mradwin
|
|
* changed error-handling code significantly. functions
|
|
* like count() are now contributing to linebuf so we get correct
|
|
* parse error messages during lineno == 1 and other situations.
|
|
* also, instead of calling fatal() for collect*() functions,
|
|
* we return -1 and let the parser recover more gracefully.
|
|
*
|
|
* Revision 1.30 1998/01/12 19:39:11 mradwin
|
|
* modified rcsid
|
|
*
|
|
* Revision 1.29 1997/11/13 22:37:31 mradwin
|
|
* changed char[] to char * to make non-gcc compilers
|
|
* a little happier. We need to #define const to nothing
|
|
* for other compilers as well.
|
|
*
|
|
* Revision 1.28 1997/11/13 21:29:30 mradwin
|
|
* moved code from parser.y to main.c
|
|
*
|
|
* Revision 1.27 1997/11/13 21:10:17 mradwin
|
|
* renamed stubgen.[ly] to parser.y lexer.l
|
|
*
|
|
* Revision 1.26 1997/11/11 03:52:06 mradwin
|
|
* changed fatal()
|
|
*
|
|
* Revision 1.25 1997/11/05 03:02:02 mradwin
|
|
* Modified logging routines.
|
|
*
|
|
* Revision 1.24 1997/11/01 23:12:43 mradwin
|
|
* greatly improved error-recovery. errors no longer spill over
|
|
* into other files because the yyerror state is properly reset.
|
|
*
|
|
* Revision 1.23 1997/10/26 23:16:32 mradwin
|
|
* changed inform_user and fatal functions to use varargs
|
|
*
|
|
* Revision 1.22 1997/10/26 22:46:48 mradwin
|
|
* support macros within comments, etc.
|
|
*
|
|
* Revision 1.21 1997/10/16 19:42:48 mradwin
|
|
* added support for elipses, static member/array initializers,
|
|
* and bitfields.
|
|
*
|
|
* Revision 1.20 1997/10/16 17:36:06 mradwin
|
|
* Fixed compiler warning on win32 from <ctype.h> and isspace()
|
|
*
|
|
* Revision 1.19 1997/10/16 17:12:59 mradwin
|
|
* handle extern "C" blocks better now, and support multi-line
|
|
* macros. still need error-checking.
|
|
*
|
|
* Revision 1.18 1997/10/15 22:09:06 mradwin
|
|
* changed tons of names. stubelem -> sytaxelem,
|
|
* stubin -> infile, stubout -> outfile, stublog -> logfile.
|
|
*
|
|
* Revision 1.17 1997/10/15 21:45:13 mradwin
|
|
* rearranged table.[ch] and util.[ch] so that util pkg
|
|
* knows nothing about syntaxelems.
|
|
*
|
|
* Revision 1.16 1997/10/15 17:42:37 mradwin
|
|
* added support for 'extern "C" { ... }' blocks.
|
|
*
|
|
* Revision 1.15 1997/09/05 19:17:06 mradwin
|
|
* works for scanning old versions, except for parameter
|
|
* names that differ between .H and .C files.
|
|
*
|
|
* Revision 1.14 1997/09/05 16:37:41 mradwin
|
|
* rcsid
|
|
*
|
|
* Revision 1.13 1997/09/05 16:34:36 mradwin
|
|
* GPL-ized code.
|
|
*
|
|
* Revision 1.12 1997/09/05 16:13:18 mradwin
|
|
* changed email address to acm.org
|
|
*
|
|
* Revision 1.11 1996/09/12 14:44:49 mjr
|
|
* Added throw decl recognition (great, another 4 bytes in syntaxelem)
|
|
* and cleaned up the grammar so that const_opt appears in far fewer
|
|
* places. const_opt is by default 0 as well, so we don't need to
|
|
* pass it as an arg to new_elem().
|
|
*
|
|
* I also added a fix to a potential bug with the MINIT and INLIN
|
|
* exclusive start states. I think they could have been confused
|
|
* by braces within comments, so now I'm grabbing comments in those
|
|
* states as well.
|
|
*
|
|
* Revision 1.10 1996/09/12 03:46:10 mjr
|
|
* No concrete changes in code. Just added some sanity by
|
|
* factoring out code into util.[ch] and putting some prototypes
|
|
* that were in table.h into stubgen.y where they belong.
|
|
*
|
|
* Revision 1.9 1996/09/01 20:59:48 mjr
|
|
* Added collectMemberInitList() function, which is similar
|
|
* to collectInlineDef() and also the exclusive state MINIT
|
|
*
|
|
* Revision 1.8 1996/08/23 05:09:19 mjr
|
|
* fixed up some more portability things
|
|
*
|
|
* Revision 1.7 1996/08/22 02:43:47 mjr
|
|
* added parse error message (using O'Reilly p. 274)
|
|
*
|
|
* Revision 1.6 1996/08/21 18:33:50 mjr
|
|
* removed the buffer for inlines. we don't care anyway.
|
|
* now we can't overflow on inlines!
|
|
*
|
|
* Revision 1.5 1996/08/21 17:40:56 mjr
|
|
* added some cpp directives for porting to WIN32
|
|
*
|
|
* Revision 1.4 1996/08/19 17:01:33 mjr
|
|
* no echo now
|
|
*
|
|
* Revision 1.3 1996/08/15 21:24:58 mjr
|
|
* *** empty log message ***
|
|
*/
|
|
%}
|
|
|
|
D [0-9]
|
|
L [a-zA-Z_]
|
|
H [a-fA-F0-9]
|
|
E [Ee][+-]?{D}+
|
|
FS (f|F|l|L)
|
|
IS (u|U|l|L)*
|
|
|
|
%{
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#include "table.h"
|
|
#include "util.h"
|
|
|
|
#ifdef WIN32
|
|
/* definitions of exit, malloc, realloc, and free */
|
|
#include <stdlib.h>
|
|
#endif
|
|
|
|
#if 0 /* #ifdef WIN32 */
|
|
#include "y_tab.h"
|
|
#else
|
|
#include "parser.h"
|
|
#endif
|
|
|
|
#ifdef __cplusplus
|
|
#define STUB_INPUT() yyinput()
|
|
#else
|
|
#define STUB_INPUT() input()
|
|
#endif
|
|
|
|
/* when we return a string, duplicate it so we can free it later.
|
|
we always allocate memory so we can uniformly free() it. */
|
|
#define RETURN_STR(x) tokens_seen++; yylval.string = strdup(yytext); return(x)
|
|
|
|
/* make that nasty union a value that will bus error if we misinterpret
|
|
the value as a pointer */
|
|
#define RETURN_VAL(x) tokens_seen++; yylval.flag = 37; return(x)
|
|
|
|
static const char rcsid[] = "$Id: lexer.l,v 1.2 2003-10-20 22:46:40 jetsoni Exp $";
|
|
|
|
static void count();
|
|
static void comment();
|
|
static void macro();
|
|
|
|
char linebuf[1024]; /* null-terminated at beginning of each file */
|
|
int lineno; /* set to 1 at beginning of each file */
|
|
int column; /* set to 0 at beginning of each file */
|
|
int tokens_seen; /* set to 0 at beginning of each file */
|
|
|
|
%}
|
|
|
|
%x INLIN MINIT
|
|
%%
|
|
\n.* { /*
|
|
* for debugging purposes, we grab an entire
|
|
* line and buffer it, then grab tokens out of
|
|
* it. This lets us have more informative
|
|
* error messages. See yyerror() in parser.y
|
|
*/
|
|
strncpy(linebuf, yytext+1, 1024);
|
|
lineno++;
|
|
column = 0;
|
|
yyless(1); /* give back everything but \n */
|
|
}
|
|
"/*" { comment(); }
|
|
|
|
"//".* { count(); }
|
|
"#" { macro(); /* was #.* { count(); } */ }
|
|
|
|
"static" { count(); tokens_seen++; }
|
|
"volatile" { count(); tokens_seen++; }
|
|
"auto" { count(); tokens_seen++; }
|
|
"extern" { count(); RETURN_VAL(EXTERN); }
|
|
"register" { count(); tokens_seen++; }
|
|
"typedef" { count(); tokens_seen++; }
|
|
"struct" { count(); RETURN_VAL(STRUCT); }
|
|
"union" { count(); RETURN_VAL(UNION); }
|
|
"enum" { count(); RETURN_VAL(ENUM); }
|
|
"const" { count(); RETURN_VAL(CONST); }
|
|
"template" { count(); RETURN_VAL(TEMPLATE); }
|
|
|
|
"typename" { count(); tokens_seen++; /* FIXME */ }
|
|
"using" { count(); tokens_seen++; /* FIXME */ }
|
|
"namespace" { count(); RETURN_VAL(CLASS); /* FIXME */ }
|
|
|
|
"dllexport" { count(); tokens_seen++; }
|
|
"dllimport" { count(); tokens_seen++; }
|
|
|
|
"explicit" { count(); tokens_seen++; }
|
|
"mutable" { count(); tokens_seen++; }
|
|
"inline" { count(); tokens_seen++; }
|
|
"virtual" { count(); tokens_seen++; }
|
|
"class" { count(); RETURN_VAL(CLASS); }
|
|
"delete" { count(); RETURN_VAL(DELETE); }
|
|
"new" { count(); RETURN_VAL(NEW); }
|
|
"friend" { count(); RETURN_VAL(FRIEND); }
|
|
"operator" { count(); RETURN_VAL(OPERATOR); }
|
|
"protected" { count(); RETURN_VAL(PROTECTED); }
|
|
"private" { count(); RETURN_VAL(PRIVATE); }
|
|
"public" { count(); RETURN_VAL(PUBLIC); }
|
|
"throw" { count(); RETURN_VAL(THROW); }
|
|
|
|
"char" { count(); RETURN_STR(CHAR); }
|
|
"short" { count(); RETURN_STR(SHORT); }
|
|
"int" { count(); RETURN_STR(INT); }
|
|
"long" { count(); RETURN_STR(LONG); }
|
|
"signed" { count(); RETURN_STR(SIGNED); }
|
|
"unsigned" { count(); RETURN_STR(UNSIGNED); }
|
|
"float" { count(); RETURN_STR(FLOAT); }
|
|
"double" { count(); RETURN_STR(DOUBLE); }
|
|
"void" { count(); RETURN_STR(VOID); }
|
|
|
|
{L}({L}|{D})* { count(); RETURN_STR(IDENTIFIER); }
|
|
|
|
0[xX]{H}+{IS}? { count(); RETURN_STR(CONSTANT); }
|
|
0{D}+{IS}? { count(); RETURN_STR(CONSTANT); }
|
|
{D}+{IS}? { count(); RETURN_STR(CONSTANT); }
|
|
'(\\.|[^\\'])+' { count(); RETURN_STR(CONSTANT); /* 'fontlck */ }
|
|
|
|
{D}+{E}{FS}? { count(); RETURN_STR(CONSTANT); }
|
|
{D}*"."{D}+({E})?{FS}? { count(); RETURN_STR(CONSTANT); }
|
|
{D}+"."{D}*({E})?{FS}? { count(); RETURN_STR(CONSTANT); }
|
|
|
|
\"(\\.|[^\\"])*\" { count(); RETURN_STR(STRING_LITERAL); /* "fontlck */ }
|
|
|
|
">>=" { count(); RETURN_VAL(RIGHT_ASSIGN); }
|
|
"<<=" { count(); RETURN_VAL(LEFT_ASSIGN); }
|
|
"+=" { count(); RETURN_VAL(ADD_ASSIGN); }
|
|
"-=" { count(); RETURN_VAL(SUB_ASSIGN); }
|
|
"*=" { count(); RETURN_VAL(MUL_ASSIGN); }
|
|
"/=" { count(); RETURN_VAL(DIV_ASSIGN); }
|
|
"%=" { count(); RETURN_VAL(MOD_ASSIGN); }
|
|
"&=" { count(); RETURN_VAL(AND_ASSIGN); }
|
|
"^=" { count(); RETURN_VAL(XOR_ASSIGN); }
|
|
"|=" { count(); RETURN_VAL(OR_ASSIGN); }
|
|
">>" { count(); RETURN_VAL(RIGHT_OP); }
|
|
"<<" { count(); RETURN_VAL(LEFT_OP); }
|
|
"++" { count(); RETURN_VAL(INC_OP); }
|
|
"--" { count(); RETURN_VAL(DEC_OP); }
|
|
"->" { count(); RETURN_VAL(PTR_OP); }
|
|
"->*" { count(); RETURN_VAL(MEM_PTR_OP); }
|
|
"&&" { count(); RETURN_VAL(AND_OP); }
|
|
"||" { count(); RETURN_VAL(OR_OP); }
|
|
"<=" { count(); RETURN_VAL(LE_OP); }
|
|
">=" { count(); RETURN_VAL(GE_OP); }
|
|
"==" { count(); RETURN_VAL(EQ_OP); }
|
|
"!=" { count(); RETURN_VAL(NE_OP); }
|
|
";" { count(); RETURN_VAL(';'); }
|
|
"{" { count(); RETURN_VAL('{'); }
|
|
"}" { count(); RETURN_VAL('}'); }
|
|
"," { count(); RETURN_VAL(','); }
|
|
":" { count(); RETURN_VAL(':'); }
|
|
"=" { count(); RETURN_VAL('='); }
|
|
"(" { count(); RETURN_VAL('('); }
|
|
")" { count(); RETURN_VAL(')'); }
|
|
"[" { count(); RETURN_VAL('['); }
|
|
"]" { count(); RETURN_VAL(']'); }
|
|
"." { count(); RETURN_VAL('.'); }
|
|
"&" { count(); RETURN_VAL('&'); }
|
|
"!" { count(); RETURN_VAL('!'); }
|
|
"~" { count(); RETURN_VAL('~'); }
|
|
"-" { count(); RETURN_VAL('-'); }
|
|
"+" { count(); RETURN_VAL('+'); }
|
|
"*" { count(); RETURN_VAL('*'); }
|
|
"/" { count(); RETURN_VAL('/'); }
|
|
"%" { count(); RETURN_VAL('%'); }
|
|
"<" { count(); RETURN_VAL('<'); }
|
|
">" { count(); RETURN_VAL('>'); }
|
|
"^" { count(); RETURN_VAL('^'); }
|
|
"|" { count(); RETURN_VAL('|'); }
|
|
"?" { count(); RETURN_VAL('?'); }
|
|
"::" { count(); RETURN_VAL(CLCL); }
|
|
"..." { count(); RETURN_VAL(ELIPSIS); }
|
|
|
|
<INLIN>"/*" { comment(); }
|
|
<INLIN>"//".* { count(); }
|
|
<INLIN>"#" { macro(); /* was #.* { count(); } */ }
|
|
<INLIN>. |
|
|
<INLIN>\n { RETURN_VAL((int) yytext[0]); }
|
|
|
|
<MINIT>"/*" { comment(); }
|
|
<MINIT>"//".* { count(); }
|
|
<MINIT>"#" { macro(); /* was #.* { count(); } */ }
|
|
<MINIT>. |
|
|
<MINIT>\n { RETURN_VAL((int) yytext[0]); }
|
|
|
|
[ \t\v\f] { count(); }
|
|
. { count(); /* ignore bad characters */ }
|
|
|
|
%%
|
|
|
|
/*
|
|
* called when EOF is encountered. Return 1 so the scanner will return
|
|
* the zero token to report end-of-file.
|
|
*/
|
|
int yywrap()
|
|
{
|
|
return(1);
|
|
}
|
|
|
|
static void comment()
|
|
{
|
|
int c1 = 0, c2 = STUB_INPUT();
|
|
|
|
linebuf[column] = c2;
|
|
column++;
|
|
for(;;) {
|
|
if (c2 == EOF)
|
|
break;
|
|
if (c1 == '*' && c2 == '/')
|
|
break;
|
|
if (c2 == '\n') {
|
|
linebuf[0] = '\0';
|
|
column = 0;
|
|
lineno++;
|
|
}
|
|
|
|
c1 = c2;
|
|
c2 = STUB_INPUT();
|
|
linebuf[column] = c2;
|
|
column++;
|
|
}
|
|
}
|
|
|
|
|
|
static void macro()
|
|
{
|
|
int c1 = 0, c2 = STUB_INPUT(), nonws = 0;
|
|
|
|
log_printf("MACRO reading begining...\n#");
|
|
log_printf("%c", c2);
|
|
|
|
linebuf[column] = c2;
|
|
column++;
|
|
for(;;) {
|
|
if (c2 == EOF)
|
|
break;
|
|
if (!isspace(c1))
|
|
nonws = c1;
|
|
if (nonws == '\\' && c2 == '\n') {
|
|
linebuf[0] = '\0';
|
|
column = 0;
|
|
lineno++;
|
|
} else if (c2 == '\n') {
|
|
linebuf[0] = '\0';
|
|
column = 0;
|
|
lineno++;
|
|
break;
|
|
}
|
|
|
|
c1 = c2;
|
|
c2 = STUB_INPUT();
|
|
linebuf[column] = c2;
|
|
log_printf("%c", c2);
|
|
column++;
|
|
}
|
|
log_printf("MACRO reading done.\n");
|
|
}
|
|
|
|
|
|
static void count()
|
|
{
|
|
int i;
|
|
|
|
if (lineno == 1)
|
|
strcat(linebuf, yytext);
|
|
|
|
for (i = 0; yytext[i] != '\0'; i++)
|
|
if (yytext[i] == '\n')
|
|
column = 0;
|
|
else if (yytext[i] == '\t')
|
|
column += 8 - (column % 8);
|
|
else
|
|
column++;
|
|
|
|
/* equiv to fprintf(yyout, "%s", yytext); */
|
|
/* ECHO; */
|
|
}
|
|
|
|
/*
|
|
* Collect the contents of inline functions, reading them char by char.
|
|
* thanks to the arjuna stubgen project for this one
|
|
*/
|
|
int collectInlineDef()
|
|
{
|
|
int bracelevel = 1;
|
|
int token;
|
|
|
|
/* the magic of exclusive start states makes it all possible */
|
|
BEGIN INLIN;
|
|
|
|
while (bracelevel > 0) {
|
|
token = yylex();
|
|
column++;
|
|
/* fprintf(stderr, "INLIN: read token %c\n", token); */
|
|
if (token > 0) {
|
|
/* Assume single char */
|
|
switch (token) {
|
|
case '{':
|
|
bracelevel++;
|
|
break;
|
|
case '}':
|
|
bracelevel--;
|
|
if (bracelevel == 0)
|
|
{
|
|
column--;
|
|
unput(token);
|
|
break;
|
|
}
|
|
break;
|
|
case '\n':
|
|
column = 0;
|
|
lineno++;
|
|
break;
|
|
}
|
|
} else {
|
|
/* fatal error: Unexpected EOF reading inline function */
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* we now return you to your regularly scheduled start state */
|
|
BEGIN 0;
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* hmmm... looks familiar. more control-y programming.
|
|
*/
|
|
int collectMemberInitList()
|
|
{
|
|
int token;
|
|
int insideList = 1;
|
|
|
|
/* the magic of exclusive start states makes it all possible */
|
|
BEGIN MINIT;
|
|
|
|
while(insideList) {
|
|
token = yylex();
|
|
column++;
|
|
/* fprintf(stderr, "MINIT: read token %c\n", token); */
|
|
if (token > 0) {
|
|
/* Assume single char */
|
|
switch (token)
|
|
{
|
|
case '{':
|
|
insideList = 0;
|
|
unput(token);
|
|
break;
|
|
case '\n':
|
|
column = 0;
|
|
lineno++;
|
|
break;
|
|
}
|
|
} else {
|
|
/* fatal error: Unexpected EOF reading member initialization */
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* we now return you to your regularly scheduled start state */
|
|
BEGIN 0;
|
|
|
|
return 0;
|
|
}
|