首页 > 解决方案 > 如何将标识符插入到 C 中的哈希表中?

问题描述

我想我可能拥有它。从代码中,它是否正确获取了标识符?还是我应该以不同的方式处理它?我不知道是否应该将 IDENT 案例添加到 main 而不是 check_identifier,或者保留 check_identifier 以绑定到哈希表中。

//main.c
#include "symtab.c

FILE *file;

char buffer[1000 + 1];  // + 1 for the null terminator.
char tokens[1000 + 1];
char ctype[20];

int index = 0;
int line_num = 0;
int line_row = 0;
int line_column = 0;
int max_token = 0;

typedef enum
{
    WORD,       // Consists of a string of letters.
    SEPARATOR,  // Consists of a single separator.
    SPACE,      // Consists of a single space character.
    NELI,
    NUMBER,
    OPERATOR,
    EQUALIZER,
    UNKNOWN,    // Consists of a single unknown character.
    IDENT,
    ENFI,
} token_type;

token_type token;

char predirect[][12] = { "use", "system",  "label",  "translate"};

void library(char t[])
{
    int i;

    for (i = 0; i < 5; i++)
    {
        if (strcmp(t, predirect[i]) == 0)
        {
            strcpy(ctype, predirect[i]);    //If Keyword, set 'ctype' as that token.
            printf(" %03d:   %s\t              Predirect\n", line_num, t);
            return;
        }
    }
    check_identifier(buffer);
}

int check_identifier(char buf[])
{
    line_num++;

    strcpy(tokens, buf);
    printf(" %03d:   %s                     Identifier\n", line_num, tokens);

    insert(tokens, strlen(tokens), UNDEF, line_row);

    return IDENT;
}

get_char(FILE *file, char *const buf, const int max_token)
{

    int length = 0;
    int ch;

    if ((ch = fgetc(file)) == EOF) 
    {
        return ENFI;
    }

    buffer[length++] = ch;
    buffer[length] = 0;                        /* In case 'ch' is separator, space, or unknown. */

    if (seperator(ch))
    {
        return SEPARATOR;
    }

    if (space(ch))
    {
        return SPACE;
    }

    if (neli(ch))
    {
        return NELI;
    }
    if (number(ch))
    {
        return NUMBER;
    }
    if (operators(ch))
    {
        return OPERATOR;
    }

    if (equalizer(ch))
    {
        return EQUALIZER;
    }

    if (!letter(ch))
    {
        return UNKNOWN;
    }

    while ((ch = fgetc(file)) != EOF && length < max_token) 
    {
        // If we see a non-letter, put it back on the input stream.
        if (!letter(ch)) 
        {
            ungetc(ch, file);
            break;
        }
        buffer[length++] = ch;
    }
    buffer[length] = 0;

    return WORD;
}

get_token()
{
    strcpy(ctype, "NULL");  //Initialising as "NULL"

    while ((token = get_char(file, buffer, 1000 + 1)) != ENFI)
    {
        line_num++;

        switch (token)
        {
        case SEPARATOR:
            printf(" %03d:   %s\t              Separator\n", line_num, buffer);
            break;
        case SPACE:
            printf("", buffer);
            break;
        case NELI:
            printf("%s", buffer);
            break;
        case NUMBER:
            printf(" %03d:   %s\t              Number\n", line_num, buffer);
            break;
        case OPERATOR:
            printf(" %03d:   %s\t              Operator\n", line_num, buffer);
            break;
        case EQUALIZER:
            printf(" %03d:   %s\t              Equalizer\n", line_num, buffer);
            break;
        case WORD:
            library(buffer);
            break;
        case UNKNOWN:
            printf(" %03d: \tASCII value %d       \tUnknown\n", line_num, buffer[0]);
            break;
        default:
            break;
        }
    }
}

int main()
{
    init_hash_table();

    file = fopen("source.txt", "r");
    get_token();

    listing = fopen("output.txt", "w");
    symtab_dump(listing);

    fclose(file);
    return 0;
}

我一直在使用这个哈希表,并试图让它适应。

//Symtab.c
#include "type.h"

/* maximum size of hash table */
#define size 211

/* maximum size of tokens-identifiers */
#define token_length 40

/* token types */
#define UNDEF 0
#define NUM_TYPE 1
#define DECI_TYPE 2
#define STRAND_TYPE 3
#define LOGIC_TYPE 4
#define LIST_TYPE 5
#define FUNCTION_TYPE 6

/* how parameter is passed */
#define BY_VALUE 1
#define BY_REFER 2
#define max_children 3

/* current scope */
int cur_scope = 0;

/* parameter struct */
typedef struct Trade 
{
    int trade_type;
    char trade_name[token_length];
    // to store value
    int num_value; double decii_value; char *symbol_strand_value;
    int passing; // value or reference
}Trade;

/* a linked list of references (lineno's) for each variable */
typedef struct reference_list
{
    int line_row;
    struct  reference_list *next;
    int type;
} reference_list;

// struct that represents a list node
typedef struct list_node
{
    char symbol_table_name[token_length];
    int symbol_table_size;
    int scope;
    reference_list *lines;

    // to store value and sometimes more information
    int symbol_table_num_value; double symbol_table_decii_value; char *symbol_strand_value;
    // type
    int symbol_table_type;

    int info_function_type; // for arrays (info type) and functions (return type)
    // array stuff
    int *num_values; double *decii_values; char **strand_values;
    int array_size;
    // function parameters
    Trade *trades;
    int num_of_pars;
    // pointer to next item in the list
    struct list_node *next;
}list_node;

/* the hash table */
static list_node **hash_table;


void init_hash_table() 
{
    int i;

    hash_table = malloc(size * sizeof(list_node*));

    for (i = 0; i < size; i++)
    {
        hash_table[i] = NULL;
    }
}

unsigned int hash(char *key)
{
    unsigned int hashval = 0;

    for (; *key != '\0'; key++)
    {
        hashval += *key;
    }

    hashval += key[0] % 11 + (key[0] << 3) - key[0];
    return hashval % size;
}

void insert(char *name, int len, int type, int lineno)
{
    line_row++;

    unsigned int hashval = hash(name);

    list_node *l = hash_table[hashval];

    while ((l != NULL) && (strcmp(name, l->symbol_table_name) != 0)) l =>next;

    /* variable not yet in table */
    if (l == NULL)
    {
        l = (list_node*)malloc(sizeof(list_node));

        strncpy(l->symbol_table_name, name, len);
        /* add to hashtable */
        l->symbol_table_type = type;
        l->scope = cur_scope;
        l->lines = (reference_list*)malloc(sizeof(reference_list));
        l->lines->line_row = lineno;
        l->lines->next = NULL;
        l->next = hash_table[hashval];
        hash_table[hashval] = l;
        //printf(" Inserted %s for the first time with linenumber %d!\n", name, line_row); // error checking
    }
    /* found in table, so just add line number */
    else 
    {
        l->scope = cur_scope;
        reference_list *t = l->lines;
        while (t->next != NULL) t = t->next;
        /* add linenumber to reference list */
        t->next = (reference_list*)malloc(sizeof(reference_list));
        t->next->line_row = lineno;
        t->next->next = NULL;
        //printf(" Found %s again at line %d!\n", name, line_row);
    }
}

list_node *lookup(char *name) 
{ 
    /* return symbol if found or NULL if not found */
    unsigned int hashval = hash(name);
    list_node *l = hash_table[hashval];
    while ((l != NULL) && (strcmp(name, l->symbol_table_name) != 0)) l = l->next;
    return l; // NULL is not found
}

list_node *lookup_scope(char *name, int scope) 
{ 
    /* return symbol if found or NULL if not found */
    unsigned int hashval = hash(name);
    list_node *l = hash_table[hashval];
    while ((l != NULL) && (strcmp(name, l->symbol_table_name) != 0) && (scope != l->scope)) l = l->next;
    return l; // NULL is not found
}

void hide_scope()
{
    /* hide the current scope */
    if (cur_scope > 0) cur_scope--;
}

void incr_scope() 
{ 
    /* go to next scope */
    cur_scope++;
}

/* print to stdout by default */
void symtab_dump(FILE * of)
{
    int i;
    fprintf(of, "_______________________________________________________________________\n");
    fprintf(of, "Name                                         Type          Line Numbers\n");
    fprintf(of, "-----------------------------------------------------------------------\n");

    for (i = 0; i < size; ++i) 
    {
        if (hash_table[i] != NULL) 
        {
            list_node *l = hash_table[i];

            while (l != NULL) 
            {
                reference_list *t = l->lines;

                fprintf(of, "%s", l->symbol_table_name);

                if (l->symbol_table_name == NUM_TYPE)
                {
                    fprintf(of, "%-7s", "num");
                }
                else if (l->symbol_table_name == DECI_TYPE)
                {
                    fprintf(of, "%-7s", "deci");
                }
                else if (l->symbol_table_name == STRAND_TYPE)
                {
                    fprintf(of, "%-7s", "strand");
                }
                else if (l->symbol_table_name == LIST_TYPE)
                {
                    fprintf(of, "list of ");

                    if (l->info_function_type == NUM_TYPE)
                    {
                        fprintf(of, "%-7s", "num");
                    }
                    else if (l->info_function_type == DECI_TYPE)
                    {
                        fprintf(of, "%-7s", "deci");
                    }
                    else if (l->info_function_type == STRAND_TYPE)
                    {
                        fprintf(of, "%-7s", "strand");
                    }
                    else fprintf(of, "%-7s", "undef");
                    {

                    }
                }
                else if (l->symbol_table_name == FUNCTION_TYPE)
                {
                    fprintf(of, "%-7s %s", "function returns ");

                    if (l->info_function_type == NUM_TYPE)
                    {
                        fprintf(of, "%-7s", "num");
                    }
                    else if (l->info_function_type == DECI_TYPE)
                    {
                        fprintf(of, "%-7s", "deci");
                    }
                    else if (l->info_function_type == STRAND_TYPE)
                    {
                        fprintf(of, "%-7s", "strand");
                    }
                    else fprintf(of, " %s", "undef");
                    {

                    }
                }
                else fprintf(of, " %s", "undef"); // if UNDEF or 0

                while (t != NULL)
                {
                    fprintf(of, " %10d ", t->line_row);
                    t = t->next;
                }
                fprintf(of, "\n");
                l = l->next;
            }
        }
    }
}

// Function Declarations
void init_hash_table(); // initialize hash table
unsigned int hash(char *key); // hash function 
void insert(char *name, int len, int type, int lineno); // insert entry
list_node *lookup(char *name); // search for entry
list_node *lookup_scope(char *name, int scope); // search for entry in scope
void hide_scope(); // hide the current scope
void incr_scope(); // go to next scope
void symtab_dump(FILE *of); // dump file

我添加了类型以便可以测试代码。

//Type.h
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include <string.h>

int letter(int ch)
{
    return (((ch >= 'a') && (ch <= 'z')) || (ch >= 'A') && (ch <= 'Z') || (ch == '_'));
}

int number(int ch)
{
    return ((ch >= '0') && (ch <= '9'));
}

int separator(int ch)
{
    return ((ch == '#') || (ch == '(') || (ch == ')') || (ch == '{') || (ch == '}') || (ch == '[') || (ch == ']')
        || (ch == '<') || (ch == '>') || (ch == '.') || (ch == ',') || (ch == ':') || (ch == ';') || (ch == '\'') || (ch == '\"'));
}

int operators(int ch)
{
    return ((ch == '+') || (ch == '-') || (ch == '*') || (ch == '/') || (ch == '%'));
}

int equalizer(int ch)
{
    return ((ch == '=') || (ch == '!'));
}

int space(int ch)
{
    return ((ch == ' ') || (ch == '\t'));
}

int neli(int ch)
{
    return ((ch == '\n'));
}

标签: csyntaxcompiler-construction

解决方案


为了实现符号表,绝对不需要在哈希表中实现字符串存储。

在编译器编写的参考书目中,大多数书籍在词汇级别存储标识符,通过实习(制作每个标识符的单个副本以供以后使用)所有标识符,字符串(因为这也可以用字符串文字来完成,在表达式中用作常量的整数文字,通常,每个标记都有多种源代码形式)这允许您保存大量冗余信息,并促进例如字符串比较例程(因为只有一个这样的文字在实习生表)。这样,不同的哈希表可以用通用代码实现,只需向一个通用模块提供适当的函数(一个 hash() 计算器函数和一个 equals() 验证函数),负责对值进行实际数据访问.

因此,首先,哈希表代码通过将哈希函数应用于存储的元素来检查将使用哪个表槽。然后它使用比较函数来确定哪个冲突条目——在有多个条目的情况下——是请求的条目(这是 O(1) 的非常数部分,如果尺寸不太好)

正如您在对问题的评论中被告知的那样,您的代码中没有实现 ha 哈希表,因此很难看出在代码中您可能会犯错误或误解的地方,因此无法提供更多帮助。


推荐阅读