woaidongmao

文章均收录自他人博客,但不喜标题前加-[转贴],因其丑陋,见谅!~
随笔 - 1469, 文章 - 0, 评论 - 661, 引用 - 0
数据加载中……

ANSI C grammar 最新的 Lex 和 Yacc 描述

http://www.quut.com/c/ANSI-C-grammar-l-1998.html
http://www.quut.com/c/ANSI-C-grammar-y-1998.html


可以说这是比较权威和准确的词法及语法描述了,是比较可贵的材料。
原文引述如下
-------------------------------
ANSI C Yacc grammar

 

ANSI C Yacc grammar
In 1985, Jeff Lee published his Yacc grammar (which is accompanied by a matching Lex specification) for the April 30, 1985 draft version of the ANSI C standard.  Tom Stockfisch reposted it to net.sources in 1987; that original, as mentioned in the answer to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file usenet/net.sources/ansi.c.grammar.Z.
 
The version you see here has been updated based on an 1998 draft of the standards document. It allows for restricted pointers, variable arrays, "inline", and designated initializers. The previous version's lex and yacc files (ANSI C as of ca 1995) are still around as archived copies.
 
I intend to keep this version as close to the current C Standard grammar as possible; please let me know if you discover discrepancies.
(If you feel like it, read the FAQ first.)
 
Jutta Degener, April 2005
 
%token IDENTIFIER CONSTANT STRING_LITERAL SIZEOF
%token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
%token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
%token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
%token XOR_ASSIGN OR_ASSIGN TYPE_NAME
 
%token TYPEDEF EXTERN STATIC AUTO REGISTER
%token CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE CONST VOLATILE VOID
%token STRUCT UNION ENUM ELLIPSIS
 
%token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
 
%start translation_unit
%%
 
primary_expression
        : IDENTIFIER
        | CONSTANT
        | STRING_LITERAL
        | '(' expression ')'
        ;
 
postfix_expression
        : primary_expression
        | postfix_expression '[' expression ']'
        | postfix_expression '(' ')'
        | postfix_expression '(' argument_expression_list ')'
        | postfix_expression '.' IDENTIFIER
        | postfix_expression PTR_OP IDENTIFIER
        | postfix_expression INC_OP
        | postfix_expression DEC_OP
        | '(' type_name ')' '{' initializer_list '}'
        | '(' type_name ')' '{' initializer_list ',' '}'
        ;
 
argument_expression_list
        : assignment_expression
        | argument_expression_list ',' assignment_expression
        ;
 
unary_expression
        : postfix_expression
        | INC_OP unary_expression
        | DEC_OP unary_expression
        | unary_operator cast_expression
        | SIZEOF unary_expression
        | SIZEOF '(' type_name ')'
        ;
 
unary_operator
        : '&'
        | '*'
        | '+'
        | '-'
        | '~'
        | '!'
        ;
 
cast_expression
        : unary_expression
        | '(' type_name ')' cast_expression
        ;
 
multiplicative_expression
        : cast_expression
        | multiplicative_expression '*' cast_expression
        | multiplicative_expression '/' cast_expression
        | multiplicative_expression '%' cast_expression
        ;
 
additive_expression
        : multiplicative_expression
        | additive_expression '+' multiplicative_expression
        | additive_expression '-' multiplicative_expression
        ;
 
shift_expression
        : additive_expression
        | shift_expression LEFT_OP additive_expression
        | shift_expression RIGHT_OP additive_expression
        ;
 
relational_expression
        : shift_expression
        | relational_expression '<' shift_expression
        | relational_expression '>' shift_expression
        | relational_expression LE_OP shift_expression
        | relational_expression GE_OP shift_expression
        ;
 
equality_expression
        : relational_expression
        | equality_expression EQ_OP relational_expression
        | equality_expression NE_OP relational_expression
        ;
 
and_expression
        : equality_expression
        | and_expression '&' equality_expression
        ;
 
exclusive_or_expression
        : and_expression
        | exclusive_or_expression '^' and_expression
        ;
 
inclusive_or_expression
        : exclusive_or_expression
        | inclusive_or_expression '|' exclusive_or_expression
        ;
 
logical_and_expression
        : inclusive_or_expression
        | logical_and_expression AND_OP inclusive_or_expression
        ;
 
logical_or_expression
        : logical_and_expression
        | logical_or_expression OR_OP logical_and_expression
        ;
 
conditional_expression
        : logical_or_expression
        | logical_or_expression '?' expression ':' conditional_expression
        ;
 
assignment_expression
        : conditional_expression
        | unary_expression assignment_operator assignment_expression
        ;
 
assignment_operator
        : '='
        | MUL_ASSIGN
        | DIV_ASSIGN
        | MOD_ASSIGN
        | ADD_ASSIGN
        | SUB_ASSIGN
        | LEFT_ASSIGN
        | RIGHT_ASSIGN
        | AND_ASSIGN
        | XOR_ASSIGN
        | OR_ASSIGN
        ;
 
expression
        : assignment_expression
        | expression ',' assignment_expression
        ;
 
constant_expression
        : conditional_expression
        ;
 
declaration
        : declaration_specifiers ';'
        | declaration_specifiers init_declarator_list ';'
        ;
 
declaration_specifiers
        : storage_class_specifier
        | storage_class_specifier declaration_specifiers
        | type_specifier
        | type_specifier declaration_specifiers
        | type_qualifier
        | type_qualifier declaration_specifiers
        | function_specifier
        | function_specifier declaration_specifiers
        ;
 
init_declarator_list
        : init_declarator
        | init_declarator_list ',' init_declarator
        ;
 
init_declarator
        : declarator
        | declarator '=' initializer
        ;
 
storage_class_specifier
        : TYPEDEF
        | EXTERN
        | STATIC
        | AUTO
        | REGISTER
        ;
 
type_specifier
        : VOID
        | CHAR
        | SHORT
        | INT
        | LONG
        | FLOAT
        | DOUBLE
        | SIGNED
        | UNSIGNED
        | BOOL
        | COMPLEX
        | IMAGINARY
        | struct_or_union_specifier
        | enum_specifier
        | TYPE_NAME
        ;
 
struct_or_union_specifier
        : struct_or_union IDENTIFIER '{' struct_declaration_list '}'
        | struct_or_union '{' struct_declaration_list '}'
        | struct_or_union IDENTIFIER
        ;
 
struct_or_union
        : STRUCT
        | UNION
        ;
 
struct_declaration_list
        : struct_declaration
        | struct_declaration_list struct_declaration
        ;
 
struct_declaration
        : specifier_qualifier_list struct_declarator_list ';'
        ;
 
specifier_qualifier_list
        : type_specifier specifier_qualifier_list
        | type_specifier
        | type_qualifier specifier_qualifier_list
        | type_qualifier
        ;
 
struct_declarator_list
        : struct_declarator
        | struct_declarator_list ',' struct_declarator
        ;
 
struct_declarator
        : declarator
        | ':' constant_expression
        | declarator ':' constant_expression
        ;
 
enum_specifier
        : ENUM '{' enumerator_list '}'
        | ENUM IDENTIFIER '{' enumerator_list '}'
        | ENUM '{' enumerator_list ',' '}'
        | ENUM IDENTIFIER '{' enumerator_list ',' '}'
        | ENUM IDENTIFIER
        ;
 
enumerator_list
        : enumerator
        | enumerator_list ',' enumerator
        ;
 
enumerator
        : IDENTIFIER
        | IDENTIFIER '=' constant_expression
        ;
 
type_qualifier
        : CONST
        | RESTRICT
        | VOLATILE
        ;
 
function_specifier
        : INLINE
        ;
 
declarator
        : pointer direct_declarator
        | direct_declarator
        ;
 
 
direct_declarator
        : IDENTIFIER
        | '(' declarator ')'
        | direct_declarator '[' type_qualifier_list assignment_expression ']'
        | direct_declarator '[' type_qualifier_list ']'
        | direct_declarator '[' assignment_expression ']'
        | direct_declarator '[' STATIC type_qualifier_list assignment_expression ']'
        | direct_declarator '[' type_qualifier_list STATIC assignment_expression ']'
        | direct_declarator '[' type_qualifier_list '*' ']'
        | direct_declarator '[' '*' ']'
        | direct_declarator '[' ']'
        | direct_declarator '(' parameter_type_list ')'
        | direct_declarator '(' identifier_list ')'
        | direct_declarator '(' ')'
        ;
 
pointer
        : '*'
        | '*' type_qualifier_list
        | '*' pointer
        | '*' type_qualifier_list pointer
        ;
 
type_qualifier_list
        : type_qualifier
        | type_qualifier_list type_qualifier
        ;
 
 
parameter_type_list
        : parameter_list
        | parameter_list ',' ELLIPSIS
        ;
 
parameter_list
        : parameter_declaration
        | parameter_list ',' parameter_declaration
        ;
 
parameter_declaration
        : declaration_specifiers declarator
        | declaration_specifiers abstract_declarator
        | declaration_specifiers
        ;
 
identifier_list
        : IDENTIFIER
        | identifier_list ',' IDENTIFIER
        ;
 
type_name
        : specifier_qualifier_list
        | specifier_qualifier_list abstract_declarator
        ;
 
abstract_declarator
        : pointer
        | direct_abstract_declarator
        | pointer direct_abstract_declarator
        ;
 
direct_abstract_declarator
        : '(' abstract_declarator ')'
        | '[' ']'
        | '[' assignment_expression ']'
        | direct_abstract_declarator '[' ']'
        | direct_abstract_declarator '[' assignment_expression ']'
        | '[' '*' ']'
        | direct_abstract_declarator '[' '*' ']'
        | '(' ')'
        | '(' parameter_type_list ')'
        | direct_abstract_declarator '(' ')'
        | direct_abstract_declarator '(' parameter_type_list ')'
        ;
 
initializer
        : assignment_expression
        | '{' initializer_list '}'
        | '{' initializer_list ',' '}'
        ;
 
initializer_list
        : initializer
        | designation initializer
        | initializer_list ',' initializer
        | initializer_list ',' designation initializer
        ;
 
designation
        : designator_list '='
        ;
 
designator_list
        : designator
        | designator_list designator
        ;
 
designator
        : '[' constant_expression ']'
        | '.' IDENTIFIER
        ;
 
statement
        : labeled_statement
        | compound_statement
        | expression_statement
        | selection_statement
        | iteration_statement
        | jump_statement
        ;
 
labeled_statement
        : IDENTIFIER ':' statement
        | CASE constant_expression ':' statement
        | DEFAULT ':' statement
        ;
 
compound_statement
        : '{' '}'
        | '{' block_item_list '}'
        ;
 
block_item_list
        : block_item
        | block_item_list block_item
        ;
 
block_item
        : declaration
        | statement
        ;
 
expression_statement
        : ';'
        | expression ';'
        ;
 
selection_statement
        : IF '(' expression ')' statement
        | IF '(' expression ')' statement ELSE statement
        | SWITCH '(' expression ')' statement
        ;
 
iteration_statement
        : WHILE '(' expression ')' statement
        | DO statement WHILE '(' expression ')' ';'
        | FOR '(' expression_statement expression_statement ')' statement
        | FOR '(' expression_statement expression_statement expression ')' statement
        | FOR '(' declaration expression_statement ')' statement
        | FOR '(' declaration expression_statement expression ')' statement
        ;
 
jump_statement
        : GOTO IDENTIFIER ';'
        | CONTINUE ';'
        | BREAK ';'
        | RETURN ';'
        | RETURN expression ';'
        ;
 
translation_unit
        : external_declaration
        | translation_unit external_declaration
        ;
 
external_declaration
        : function_definition
        | declaration
        ;
 
function_definition
        : declaration_specifiers declarator declaration_list compound_statement
        | declaration_specifiers declarator compound_statement
        ;
 
declaration_list
        : declaration
        | declaration_list declaration
        ;
 
 
%%
#include <stdio.h>
 
extern char yytext[];
extern int column;
 
yyerror(s)
char *s;
{
        fflush(stdout);
        printf("\n%*s\n%*s\n", column, "^", column, s);
}

 

ANSI C grammar, Lex specification

代码:

ANSI C grammar, Lex specification
In 1985, Jeff Lee published this Lex specification together with a Yacc grammar for the April 30, 1985 ANSI C draft.  Tom Stockfisch reposted both to net.sources in 1987; that original, as mentioned in the answer to question 17.25 of the comp.lang.c FAQ, can be ftp'ed from ftp.uu.net, file usenet/net.sources/ansi.c.grammar.Z.
 
The version you see here has been updated based on an 1998 draft of the standards document. It allows for restricted pointers, variable arrays, "inline", and designated initializers. The previous version's lex and yacc files (ANSI C as of ca 1995) are still around as archived copies.
 
I intend to keep this version as close to the current C Standard grammar as possible; please let me know if you discover discrepancies.
 
Jutta Degener, 2004
 
D                      [0-9]
L                      [a-zA-Z_]
H                      [a-fA-F0-9]
E                      [Ee][+-]?{D}+
FS                     (f|F|l|L)
IS                     (u|U|l|L)*
 
%{
#include <stdio.h>
#include "y.tab.h"
 
void count();
%}
 
%%
"/*"                   { comment(); }
 
"auto"                 { count(); return(AUTO); }
"_Bool"                { count(); return(BOOL); }
"break"                { count(); return(BREAK); }
"case"                 { count(); return(CASE); }
"char"                 { count(); return(CHAR); }
"_Complex"             { count(); return(COMPLEX); }
"const"                { count(); return(CONST); }
"continue"             { count(); return(CONTINUE); }
"default"              { count(); return(DEFAULT); }
"do"                   { count(); return(DO); }
"double"               { count(); return(DOUBLE); }
"else"                 { count(); return(ELSE); }
"enum"                 { count(); return(ENUM); }
"extern"               { count(); return(EXTERN); }
"float"                { count(); return(FLOAT); }
"for"                  { count(); return(FOR); }
"goto"                 { count(); return(GOTO); }
"if"                   { count(); return(IF); }
"_Imaginary"           { count(); return(IMAGINARY); }
"inline"               { count(); return(INLINE); }
"int"                  { count(); return(INT); }
"long"                 { count(); return(LONG); }
"register"             { count(); return(REGISTER); }
"restrict"             { count(); return(RESTRICT); }
"return"               { count(); return(RETURN); }
"short"                { count(); return(SHORT); }
"signed"               { count(); return(SIGNED); }
"sizeof"               { count(); return(SIZEOF); }
"static"               { count(); return(STATIC); }
"struct"               { count(); return(STRUCT); }
"switch"               { count(); return(SWITCH); }
"typedef"              { count(); return(TYPEDEF); }
"union"                { count(); return(UNION); }
"unsigned"             { count(); return(UNSIGNED); }
"void"                 { count(); return(VOID); }
"volatile"             { count(); return(VOLATILE); }
"while"                { count(); return(WHILE); }
 
{L}({L}|{D})*          { count(); return(check_type()); }
 
0[xX]{H}+{IS}?         { count(); return(CONSTANT); }
0{D}+{IS}?             { count(); return(CONSTANT); }
{D}+{IS}?              { count(); return(CONSTANT); }
L?'(\\.|[^\\'])+'      { count(); return(CONSTANT); }
 
{D}+{E}{FS}?           { count(); return(CONSTANT); }
{D}*"."{D}+({E})?{FS}? { count(); return(CONSTANT); }
{D}+"."{D}*({E})?{FS}? { count(); return(CONSTANT); }
 
L?\"(\\.|[^\\"])*\"    { count(); return(STRING_LITERAL); }
 
"..."                  { count(); return(ELLIPSIS); }
">>="                  { count(); return(RIGHT_ASSIGN); }
"<<="                  { count(); return(LEFT_ASSIGN); }
"+="                   { count(); return(ADD_ASSIGN); }
"-="                   { count(); return(SUB_ASSIGN); }
"*="                   { count(); return(MUL_ASSIGN); }
"/="                   { count(); return(DIV_ASSIGN); }
"%="                   { count(); return(MOD_ASSIGN); }
"&="                   { count(); return(AND_ASSIGN); }
"^="                   { count(); return(XOR_ASSIGN); }
"|="                   { count(); return(OR_ASSIGN); }
">>"                   { count(); return(RIGHT_OP); }
"<<"                   { count(); return(LEFT_OP); }
"++"                   { count(); return(INC_OP); }
"--"                   { count(); return(DEC_OP); }
"->"                   { count(); return(PTR_OP); }
"&&"                   { count(); return(AND_OP); }
"||"                   { count(); return(OR_OP); }
"<="                   { count(); return(LE_OP); }
">="                   { count(); return(GE_OP); }
"=="                   { count(); return(EQ_OP); }
"!="                   { count(); return(NE_OP); }
";"                    { count(); return(';'); }
("{"|"<%")             { count(); return('{'); }
("}"|"%>")             { count(); return('}'); }
","                    { count(); return(','); }
":"                    { count(); return(':'); }
"="                    { count(); return('='); }
"("                    { count(); return('('); }
")"                    { count(); return(')'); }
("["|"<:")             { count(); return('['); }
("]"|":>")             { count(); return(']'); }
"."                    { count(); return('.'); }
"&"                    { count(); return('&'); }
"!"                    { count(); return('!'); }
"~"                    { count(); return('~'); }
"-"                    { count(); return('-'); }
"+"                    { count(); return('+'); }
"*"                    { count(); return('*'); }
"/"                    { count(); return('/'); }
"%"                    { count(); return('%'); }
"<"                    { count(); return('<'); }
">"                    { count(); return('>'); }
"^"                    { count(); return('^'); }
"|"                    { count(); return('|'); }
"?"                    { count(); return('?'); }
 
[ \t\v\n\f]            { count(); }
.                      { /* ignore bad characters */ }
 
%%
 
yywrap()
{
        return(1);
}
 
 
comment()
{
        char c, c1;
 
loop:
        while ((c = input()) != '*' && c != 0)
               putchar(c);
 
        if ((c1 = input()) != '/' && c != 0)
        {
               unput(c1);
               goto loop;
        }
 
        if (c != 0)
               putchar(c1);
}
 
 
int column = 0;
 
void count()
{
        int i;
 
        for (i = 0; yytext[i] != '\0'; i++)
               if (yytext[i] == '\n')
                       column = 0;
               else if (yytext[i] == '\t')
                       column += 8 - (column % 8);
               else
                       column++;
 
        ECHO;
}
 
 
int check_type()
{
/*
* pseudo code --- this is what it should check
*
*       if (yytext == type_name)
*              return(TYPE_NAME);
*
*       return(IDENTIFIER);
*/
 
/*
*       it actually will only return IDENTIFIER
*/
 
        return(IDENTIFIER);
}

 

posted on 2008-11-22 00:33 肥仔 阅读(2546) 评论(1)  编辑 收藏 引用 所属分类: LEX & YACC

评论

# re: ANSI C grammar 最新的 Lex 和 Yacc 描述  回复  更多评论   

想问一下y.tab.h在哪里?
2009-04-06 18:09 | wangping

只有注册用户登录后才能发表评论。
网站导航: 博客园   IT新闻   BlogJava   知识库   博问   管理