首页 > 解决方案 > CS50 Problem set 5 speller trie 数据结构问题 - 使用 fscanf 将字符串从文件复制到 trie?

问题描述

我成功地为这个问题实现了一个哈希表,但是我想尝试实现一个 trie。不幸的是,当我运行 check50 时,我收到以下错误消息:

:) dictionary.c, dictionary.h, and Makefile exist
:) speller compiles
:( handles most basic words properly
    expected "MISSPELLED WOR...", not "MISSPELLED WOR..."
:( handles min length (1-char) words
    expected "MISSPELLED WOR...", not "MISSPELLED WOR..."
:( handles max length (45-char) words
    expected "MISSPELLED WOR...", not "MISSPELLED WOR..."
:( handles words with apostrophes properly
    expected "MISSPELLED WOR...", not "MISSPELLED WOR..."
:( spell-checking is case-insensitive
    expected "MISSPELLED WOR...", not "MISSPELLED WOR..."
:( handles substrings properly
    expected "MISSPELLED WOR...", not "MISSPELLED WOR..."
:| program is free of memory errors
    can't check until a frown turns upside down

第一条错误消息的日志是:

:( handles most basic words properly
Cause
expected "MISSPELLED WOR...", not "MISSPELLED WOR..."
Log
running ./speller basic/dict basic/text...
checking for output "MISSPELLED WORDS\n\n\nWORDS MISSPELLED: 0\nWORDS IN DICTIONARY: 8\nWORDS IN TEXT: 9\n"...

Expected Output:
MISSPELLED WORDS


WORDS MISSPELLED:     0
WORDS IN DICTIONARY:  8
WORDS IN TEXT:        9

Actual Output:
MISSPELLED WORDS

The
quick
brown
fox
jumps
over
the
lazy
dog

从这些错误消息中,我认为问题出在我的加载功能或检查功能上。

与哈希表实现一样,我使用 fscanf 将要打开的文件中的每个单词复制到临时缓冲区中,然后将其复制到 trie 中。然而,我见过的其他方法似乎使用 fgetc ,这对我来说似乎效率更低。

是我用来从文件中转录字符串的方法的问题还是单独的逻辑问题?

这是我的完整代码:

dictionary.c(我写的)

// Implements a dictionary's functionality

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <ctype.h>

#include "dictionary.h"

// Decalre trie node struct
typedef struct trie
{
    bool isLeaf;
    struct trie* children[Alphabet_size];
}
trie;

//Declare root node globally
trie* root_node;

// Declare word globally
char string[LENGTH + 1];

// Keep track of number of words in dictionary
int dict_size = 0;

// Function that gets new trie node
struct trie* getNewTrieNode(void)
{
    // Malloc memory for new trie node and check pointer != NULL
    struct trie* node = malloc(sizeof(trie));
    if (node != NULL)
    {
        // Set isLeaf Boolean to false
        node->isLeaf = false;
        // Initialise all 26 child pointers to NULL
        for (int i = 0; i < Alphabet_size; i++)
        {
            node->children[i] = NULL;
        }
    }
    return node;
}

// Loads dictionary into memory, returning true if successful else false
bool load(const char *dictionary)
{
    //Open dictionary.h file
    FILE *dict_ptr = fopen(dictionary, "r");
    // Check to see if dictionary.h file pointer is NULL
    if (dict_ptr == NULL)
    {
        fprintf(stderr, "Error: Could not open %s", dictionary);
        return false;
    }
    // Create array of chars into which we can temporarily store each word
    char buffer[LENGTH + 1];
    // Dynamically allocate memory for universal root node
    root_node = malloc(sizeof(trie));
    // Loop over whole file to copy each string until we reach EOF
    while (fscanf(dict_ptr, "%s", buffer) != EOF)
    {
        // Insert word into Trie data structure
        // Create pointer to root_node in order to traverse data structure
        struct trie* current_node = root_node;
        int j = strlen(buffer);
        for (int i = 0; i < j; i++)
        {
            // Create new node if child pointer does not point to path
            if (current_node->children[buffer[i] - 'a'] == NULL)
            {
                current_node->children[buffer[i] - 'a'] = getNewTrieNode();
            }
            //Go to next node if child pointer points to one
            current_node = current_node->children[buffer[i] - 'a'];
            // Move to next character in string
            buffer[i]++;
        }
        dict_size++;
    }
    fclose(dict_ptr);
    return true;
}

// Returns true if word is in dictionary else false
bool check(const char* word)
{
    // Make lowercase copy of word (so it will match path of corresponding word from dictionary)
    int n = strlen(word);
    char word_copy[n + 1];
    for (int i = 0; i < n; i++)
    {
        word_copy[i] = tolower(word[i]);
    }
    // Add NULL terminating character to lowercase copy of word
    word_copy[n] = '\0';
    // Search Trie data structure for lowercase copy of word
    // Return false if Trie is empty
    if (root_node == NULL)
    {
        return false;
    }
    // Declare pointer to root_node to traverse Trie data structure
    struct trie* current_node = root_node;
    // Declare integer the length of the lowercase copy of word
    int k = strlen(word_copy);
    for (int i = 0; i < k; i++)
    {
        // Go to next node
        current_node = current_node->children[word_copy[i] - 'a'];
        // If current_node pointer is NULL we have reached the end of the path and the string has not been found
        if (current_node == NULL)
        {
            return false;
        }
        // Move to next character in string
        word_copy[i]++;
    }
    // If we have gotten through whole string, current node must be Leaf Node corresponding to that string
    return (current_node->isLeaf);
}

// Returns number of words in dictionary if loaded else 0 if not yet loaded
unsigned int size(void)
{
    return dict_size;
}

// Prototype for destroy trie data structure function
void destroy(struct trie* root_node);

// Unloads dictionary from memory, returning true if successful else false
bool unload(void)
{
    destroy(root_node);
    return true;
}

// Destroys entire trie data structure recursively and frees dynamically allocated memory
void destroy(struct trie* root)
{
    // Initialise current node to point to root node to traverse trie data structure
    struct trie* current_node = root_node;
    // Recursively delete entire trie data structure
    for (int i = 0; i < Alphabet_size; i++)
    {
        if (current_node->children[i] != NULL)
        {
            destroy(current_node->children[i]);
        }
    }
    free(current_node);
}

接下来的两个文件是我不更改的分发代码:

字典.h

// Declares a dictionary's functionality

#ifndef DICTIONARY_H
#define DICTIONARY_H

#include <stdbool.h>

// Maximum length for a word
// (e.g., pneumonoultramicroscopicsilicovolcanoconiosis)
#define LENGTH 45
#define Alphabet_size (26)

// Prototypes
struct trie* getNewTrieNode(void);
bool check(const char *word);
unsigned int hash(const char *word);
bool load(const char *dictionary);
unsigned int size(void);
bool unload(void);

#endif // DICTIONARY_H
// Implements a spell-checker

#include <ctype.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>

#include "dictionary.h"

// Undefine any definitions
#undef calculate
#undef getrusage

// Default dictionary
#define DICTIONARY "dictionaries/large"

// Prototype
double calculate(const struct rusage *b, const struct rusage *a);

int main(int argc, char *argv[])
{
    // Check for correct number of args
    if (argc != 2 && argc != 3)
    {
        printf("Usage: ./speller [DICTIONARY] text\n");
        return 1;
    }

    // Structures for timing data
    struct rusage before, after;

    // Benchmarks
    double time_load = 0.0, time_check = 0.0, time_size = 0.0, time_unload = 0.0;

    // Determine dictionary to use
    char *dictionary = (argc == 3) ? argv[1] : DICTIONARY;

    // Load dictionary
    getrusage(RUSAGE_SELF, &before);
    bool loaded = load(dictionary);
    getrusage(RUSAGE_SELF, &after);

    // Exit if dictionary not loaded
    if (!loaded)
    {
        printf("Could not load %s.\n", dictionary);
        return 1;
    }

    // Calculate time to load dictionary
    time_load = calculate(&before, &after);

    // Try to open text
    char *text = (argc == 3) ? argv[2] : argv[1];
    FILE *file = fopen(text, "r");
    if (file == NULL)
    {
        printf("Could not open %s.\n", text);
        unload();
        return 1;
    }

    // Prepare to report misspellings
    printf("\nMISSPELLED WORDS\n\n");

    // Prepare to spell-check
    int index = 0, misspellings = 0, words = 0;
    char word[LENGTH + 1];

    // Spell-check each word in text
    for (int c = fgetc(file); c != EOF; c = fgetc(file))
    {
        // Allow only alphabetical characters and apostrophes
        if (isalpha(c) || (c == '\'' && index > 0))
        {
            // Append character to word
            word[index] = c;
            index++;

            // Ignore alphabetical strings too long to be words
            if (index > LENGTH)
            {
                // Consume remainder of alphabetical string
                while ((c = fgetc(file)) != EOF && isalpha(c));

                // Prepare for new word
                index = 0;
            }
        }

        // Ignore words with numbers (like MS Word can)
        else if (isdigit(c))
        {
            // Consume remainder of alphanumeric string
            while ((c = fgetc(file)) != EOF && isalnum(c));

            // Prepare for new word
            index = 0;
        }

        // We must have found a whole word
        else if (index > 0)
        {
            // Terminate current word
            word[index] = '\0';

            // Update counter
            words++;

            // Check word's spelling
            getrusage(RUSAGE_SELF, &before);
            bool misspelled = !check(word);
            getrusage(RUSAGE_SELF, &after);

            // Update benchmark
            time_check += calculate(&before, &after);

            // Print word if misspelled
            if (misspelled)
            {
                printf("%s\n", word);
                misspellings++;
            }

            // Prepare for next word
            index = 0;
        }
    }

    // Check whether there was an error
    if (ferror(file))
    {
        fclose(file);
        printf("Error reading %s.\n", text);
        unload();
        return 1;
    }

    // Close text
    fclose(file);

    // Determine dictionary's size
    getrusage(RUSAGE_SELF, &before);
    unsigned int n = size();
    getrusage(RUSAGE_SELF, &after);

    // Calculate time to determine dictionary's size
    time_size = calculate(&before, &after);

    // Unload dictionary
    getrusage(RUSAGE_SELF, &before);
    bool unloaded = unload();
    getrusage(RUSAGE_SELF, &after);

    // Abort if dictionary not unloaded
    if (!unloaded)
    {
        printf("Could not unload %s.\n", dictionary);
        return 1;
    }

    // Calculate time to unload dictionary
    time_unload = calculate(&before, &after);

    // Report benchmarks
    printf("\nWORDS MISSPELLED:     %d\n", misspellings);
    printf("WORDS IN DICTIONARY:  %d\n", n);
    printf("WORDS IN TEXT:        %d\n", words);
    printf("TIME IN load:         %.2f\n", time_load);
    printf("TIME IN check:        %.2f\n", time_check);
    printf("TIME IN size:         %.2f\n", time_size);
    printf("TIME IN unload:       %.2f\n", time_unload);
    printf("TIME IN TOTAL:        %.2f\n\n",
           time_load + time_check + time_size + time_unload);

    // Success
    return 0;
}

// Returns number of seconds between b and a
double calculate(const struct rusage *b, const struct rusage *a)
{
    if (b == NULL || a == NULL)
    {
        return 0.0;
    }
    else
    {
        return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                  (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                 ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                  (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                / 1000000.0);
    }
}

标签: ccs50trie

解决方案


推荐阅读