首页 > 解决方案 > Lex 和 Yacc 符号表的生成和操作

问题描述

我正在尝试构建一个简单的 C 编译器,使用 lex 进行词法分析,使用 yacc 进行语法分析。我正在 lex 中构建符号表,并用我在词法分析中遇到的所有标识符(现在只是标识符的名称、行号和范围)填充它。符号表本身是链表的形式,指针指向链表的头部。现在我希望能够在 yacc 中访问这个符号表来更新每个标识符的值和数据类型。如何访问 yacc 中的符号表?

我已将 lex 中的头指针定义为 extern,但它没有帮助。这是供参考的完整代码 -

Lex (ngrammar.l) -

%{
    #include<stdio.h>
    #include<string.h>
    #include<stdlib.h>
    #define MAX 1000
    extern int yylval;
    int scope = 0;
    int lineno = 1;
    int paran = 0;
    typedef struct node{
        int index;
        int scope;
        char symbol[MAX];
        char lineno[MAX];
        char type[MAX];
        char value[MAX];
        struct node* next; 
    }node;
    extern node* head;
    head = NULL;
    node* InsertSymbol(node*,int,char*,int,int);
    void display(node* head);
    node* InsertSymbol(node* head, int scope, char* symbol, int lineno, int paran){
        if(paran>0){
            scope++;
        }
        else{;
        }
        if(head == NULL){
            node* temp = (node*)malloc(sizeof(node));
            temp->index = 1;
            temp->scope = scope;
            strcpy(temp->symbol,symbol);
            char str[4];
            sprintf(str, "%d", lineno);
            strcpy(temp->lineno,str);
            temp->next = NULL;
            head = temp;
        }
        else{
            node* cur= head;
            node* prev = NULL;
            int present = 0;
            while(cur != NULL){
                if((cur->scope == scope) && (strcmp(cur->symbol,symbol)==0)){
                    char str[4];
                    sprintf(str, ", %d", lineno);
                    strcat(cur->lineno,str);
                    present = 1;
                }
                prev = cur;
                cur = cur->next;
            }
            if(present == 0){
                node* temp = (node*)malloc(sizeof(node));
                temp->index = (prev->index)+1;
                temp->scope = scope;
                strcpy(temp->symbol,symbol);
                char str[4];
                sprintf(str, "%d", lineno);
                strcpy(temp->lineno,str);
                temp->next = NULL;
                prev->next = temp;
            }
        }
        return head;
    }
    void display(node* head){
        node* p = head;
        printf("\t\t\t SYMBOL TABLE \t\t\t\n\n");
        printf("\t Index \t\t Symbol \t Scope \t\t Line Number \n");
        if(p == NULL){
            printf("nothing");
            return;
        }
        else{
            while(p != NULL){
            //printf("enter");
                printf("\t %d \t\t %s \t\t %d \t\t %s \n",p->index,p->symbol,p->scope,p->lineno);
                p = p->next;
            }
        }
    }
%}

alpha [A-Za-z]
digit [0-9]
und [_]
space [ ]
tab [   ]
line [\n]
acc [^"*/"]
str [^\"]

%%
\/\/(.*)[\n]* {;}
\/\*({acc}*\n)*{acc}*\*\/[\n]* {;}
for {return FOR;}
if {return IF;}
char {return CHAR;}
float {return FLOAT;}
int {return INT;}
continue {return CONTINUE;}
return {return RETURN;}
bool {return BOOL;}
main {return MAIN;}
else {return ELSE;}
printf {return PRINT;}
break {return BREAK;}
TRUE {return BOOLTRUE;}
FALSE {return BOOLFALSE;}
\|\| {return OR;}
\%d|\%c|\%f {return FORMATSPEC;}
\( {return OPENBRACES;paran++;}
\) {return CLOSEBRACES;paran--;}
\{ {return OPENCURLYBRACES;scope++;}
\} {return CLOSECURLYBRACES;scope--;}
[.] {return DOT;}
= {return ASSIGNOP;}
\+\+|\-\- {return UNARYOP;}
\+|\-|\*|\/ {return ARITHOP;}
>|<|>=|<=|!=|== {return RELOP;}
&& {return AND;}
[,] {return COMMA;}
[;] {return SEMICOLON;}
['] {return SINGLEQUOTES;}
["] {return DOUBLEQUOTES;}
["]{str}*["] {return STRING;}
{alpha}({alpha}|{digit}|{und})* {return IDENTIFIER;
                                head = InsertSymbol(head,scope,yytext,lineno,paran);}
{digit}+ {return INTCONST;}
({digit}+)\.({digit}+) {return FLOATCONST;}
[\n] {lineno++;}
[\t] {;}
[ ] {;}
. {return yytext[0];}
%%

int yywrap()
{
    return 1;
}

Yacc (ngrammar.y) -

%{
    #include<stdio.h>
    #include<string.h>
    int valid=1;
%}

%token CHAR INT FLOAT BOOL MAIN FOR IF ELSE PRINT BREAK CONTINUE BOOLTRUE BOOLFALSE FORMATSPEC OPENBRACES CLOSEBRACES OPENCURLYBRACES CLOSECURLYBRACES UNARYOP ARITHOP RELOP ASSIGNOP COMMA SEMICOLON SINGLEQUOTES DOUBLEQUOTES UNDERSCORE RETURN AND OR DOT STRING IDENTIFIER INTCONST FLOATCONST

%%
start: INT MAIN OPENBRACES CLOSEBRACES OPENCURLYBRACES statement return_statement CLOSECURLYBRACES;
statement: compound_statement | expression_statement | jump_statement | print_statement | for_loop | if_else;
for_loop: FOR OPENBRACES declaration condition_statement SEMICOLON for_expression_statement CLOSEBRACES statement;
if_else: IF OPENBRACES condition_statement CLOSEBRACES statement ELSE statement;
return_statement: RETURN identifier SEMICOLON | RETURN intconstant SEMICOLON;

declaration: declaration_specifiers init_declarator_list SEMICOLON | init_declarator_list SEMICOLON;
declaration_specifiers: CHAR | INT | FLOAT | BOOL;
init_declarator_list: init_declarator | init_declarator COMMA init_declarator_list;
init_declarator: identifier | identifier ASSIGNOP initializer;
initializer: numconstant | identifier;

compound_statement: OPENCURLYBRACES CLOSECURLYBRACES | OPENCURLYBRACES block_item_list CLOSECURLYBRACES | block_item_list;
block_item_list: block_item block_item | block_item;
block_item: declaration | statement |;
expression_statement: identifier ASSIGNOP expression SEMICOLON | identifier UNARYOP SEMICOLON | UNARYOP identifier SEMICOLON;
expression: identifier ARITHOP identifier | identifier ARITHOP numconstant | numconstant ARITHOP numconstant;
for_expression_statement: identifier ASSIGNOP expression  | identifier UNARYOP | UNARYOP identifier;

condition_statement: identifier RELOP identifier | identifier RELOP numconstant | numconstant RELOP identifier | condition_statement AND condition_statement | condition_statement OR condition_statement | BOOLTRUE | BOOLFALSE;
jump_statement: BREAK SEMICOLON | CONTINUE SEMICOLON;

print_statement: PRINT OPENBRACES stringconstant CLOSEBRACES SEMICOLON | PRINT OPENBRACES DOUBLEQUOTES FORMATSPEC DOUBLEQUOTES COMMA identifier CLOSEBRACES SEMICOLON;

identifier: IDENTIFIER;
intconstant: INTCONST;
numconstant: INTCONST | FLOATCONST;
stringconstant: STRING;
%%
#include "lex.yy.c"
int yyerror()
{
    printf("\nDoes not satisfy the grammar\n");
    valid=0;
    return 0;
}
int main()
{
    yyin = fopen("in.c","r");
    yyparse();
    if(valid)
    {
        printf("\nSatisfies the grammar \n");
    }
    display(head);  
}

下面的屏幕截图显示了执行时的警告和输出 - 在此处输入图像描述 在此处输入图像描述

(我还没有编写更新值和数据类型的代码。这只是为了在 yacc 中显示符号表。)

标签: ccompiler-errors

解决方案


C 程序的顶层只能包含声明和定义。它不能包含语句。所以以下在 C 程序的顶层是不合法的:

extern node* head;
head = NULL;

第二行被转换为声明,因为 GCC 仍然允许使用隐式类型(类型默认为int)的标准 C 之前的声明。它确实会警告您,但警告不是很有帮助。

结果是上述内容被解释为

extern node* head;
int head = NULL;

这显然是非法的,因为您不能为两个全局定义赋予相同的名称。然后 GCC 继续使用第二个定义,其余的错误级联。

这与flex或bison无关,与yoir符号表实现无关,我没有看。但这很好地说明了为什么在编写解析器时应该考虑生成好的错误消息。


推荐阅读