我正在做一项作业,要求我打印给定文本文件中出现频率最高的 10 个单词。我的代码正在打印文件中的单词,但它没有根据它们的频率对它们进行排序。

下面是我的代码。我使用哈希表来存储每个唯一单词及其频率。我目前正在使用我编写的 wordcmp 函数对单词进行排序,并在 main 的内置 qsort 函数中调用它。



前 10 个单词(共 10 个)是:

1 分钟

1 是

1 再次

3 开心

2 你好

1 如何

1 让

1 你

1 次尝试

1 这个


typedef struct word
  char *s;          /* the word */
  int count;        /* number of times word occurs */
  struct word* next;

struct hashtable
  word **table;
  int tablesize;
  int currentsize;
typedef struct hashtable hashtable;
int main(int argc, char *argv[])

    int top_words = 10;
    word *word = NULL;
    hashtable *hash = ht_create(5000);
    char *file_name;
    char *file_word;
    FILE *fp;
    struct word *present = NULL;

    fp = fopen (file_name, "r");
    if (fp == NULL)
        fprintf (stderr,"%s: No such file or directory\n", file_name);
        fprintf(stderr,"The top %d words (out of 0) are:\n", top_words); 

    while ((file_word = getWord(fp)))
        word = add(hash, file_word, 1);

    qsort((void*)hash->table, hash->currentsize, sizeof(word),(int (*)(const void *, const void *)) wordcmp);

    if(top_words > total_unique_words)
          top_words = total_unique_words;

    printf("the top %d words (out of %d) are:\n", top_words, total_unique_words);

    int iterations =0;
    for(i =0; i <= hash->tablesize && iterations< top_words; i++)
          present = hash->table[i];
          if(present != NULL)
              printf("     %4d %s\n", present->count, present->s);
              present = present->next;

 return 0;

int wordcmp (word *a, word *b) 
    if (a != NULL && b!= NULL) {

    if (a->count < b->count) 
      return +1;     
    else if (a->count > b->count) 
        return -1; 
    else if (a->count == b->count)
      /*return strcmp(b->s, a->s);*/
      return 0;
  return 0;

/* Create a new hashtable. */
struct hashtable *ht_create( int size ) 
  int i;

  if( size < 1 ) 
    return NULL;

  hashtable *table = (hashtable *) malloc(sizeof(hashtable));
  table->table = (word **) malloc(sizeof(word *) * size);

  if(table != NULL)
      table->currentsize = 0;
      table->tablesize = size;

  for( i = 0; i < size; i++ ) 
    table->table[i] = NULL;

  return table; 

/* Adds a new node to the hash table*/
word * add(hashtable *h, char *key, int freq) 
    int index = hashcode(key) % h->tablesize;
    word *current = h->table[index];

    /* Search for duplicate value */
    while(current != NULL) {
        if(contains(h, key) == 1){
            return current;
         current = current->next;

    /* Create new node if no duplicate is found */
    word *newnode = (struct word*)malloc(sizeof(struct word));
          newnode->s =strdup(key);
          newnode-> count = freq;
          newnode-> next = NULL;
    h->table[index] = newnode;
    h->currentsize = h->currentsize + 1;
    return newnode;

table[ 0] = NULL
table[ 1] = foo
table[ 2] = NULL
table[ 3] = |some|->|words|->|that|->|collided|  /* chained bucket */
table[ 4] = other
table[ 5] = words
table[ 6] = NULL
table[ 7] = NULL


为了让生活更轻松,只需忘记哈希表,并使用动态分配的word**. 您可以使用类似的添加来增加重复项的出现次数,并避免链式存储桶的所有问题。(如果你为每个单词提供自动存储,它会给你一个简单free()的指针,你就完成了)

以下示例采用 2 个参数。第一个要从中读取单词的文件名,以及(可选)第二个整数值,将排序的输出限制为最高的单词数。该words_t结构使用word限制为 32 个字符的自动存储(未删节字典中的最大单词为 28 个字符)。您可以更改单词或阅读方式以解析输入并根据需要忽略标点符号和复数。以下在所有标点符号上分隔单词(连字符除外),并丢弃单词的复数形式(例如,它存储"Mike"遇到"Mike's"时,丢弃"'s"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>

#define MAXC   32   /* max word length is 28-char, 29-char is sufficient */
#define MAXW  128   /* initial maximum number of words to allocate */

typedef struct {
    char word[MAXC];    /* struct holding individual words */
    size_t ninst;       /* and the number of times they occur */
} words_t;

/*  function prototypes */
void *addword (words_t *words, const char *word, size_t *wc, size_t *maxw);
void *xrealloc (void *ptr, size_t psz, size_t *nelem);

/* qsort compare function for words_t (alphabetical) */
int cmpwrds (const void *a, const void *b)
    return strcmp (((words_t *)a)->word, ((words_t *)b)->word);

/* qsort compare function for words_t (by occurrence - descending)
 * and alphabetical (ascending) if occurrences are equal)
int cmpinst (const void *a, const void *b)
    int ndiff =  (((words_t *)a)->ninst < ((words_t *)b)->ninst) - 
                (((words_t *)a)->ninst > ((words_t *)b)->ninst);

    if (ndiff)
        return ndiff;

    return strcmp (((words_t *)a)->word, ((words_t *)b)->word);

int main (int argc, char **argv) {

    int c = 0, nc = 0, prev = ' ', total = 0;
    size_t maxw = MAXW, wc = 0, top = 0;
    char buf[MAXC] = "";
    words_t *words = NULL;
    FILE *fp = fopen (argv[1], "r");

    if (!fp) {  /* validate file open for reading */
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;

    if (argc > 2) { /* if 2 args, convert argv[2] to number of top words */
        char *p = argv[2];
        size_t tmp = strtoul (argv[2], &p, 0);
        if (p != argv[2] && !errno)
            top = tmp;

    /* allocate/validate initial words */
    if (!(words = calloc (maxw, sizeof *words))) {
        perror ("calloc-words");
        return 1;

    while ((c = fgetc(fp)) != EOF) {        /* read each character in file */
        if (c != '-' && (isspace (c) || ispunct (c))) { /* word-end found */
            if (!isspace (prev) && !ispunct (prev) &&   /* multiple ws/punct */
                !(prev == 's' && nc == 1)) {            /* exclude "'s" */
                buf[nc] = 0;                            /* nul-terminate */
                words = addword (words, buf, &wc, &maxw);   /* add word */
                nc = 0;     /* reset char count */
        else if (nc < MAXC - 1) {   /* add char to buf */
            buf[nc++] = c;
        else {  /* chars exceed MAXC - 1; storage capability of struct */
            fprintf (stderr, "error: characters exceed %d.\n", MAXC);
            return 1;
        prev = c;   /* save previous char */
    if (!isspace (prev) && !ispunct (prev))     /* handle non-POSIX end */
        words = addword (words, buf, &wc, &maxw);

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    qsort (words, wc, sizeof *words, cmpinst);  /* sort words by frequency */

    printf ("'%s' contained '%zu' words.\n\n",  /* output total No. words */
            fp == stdin ? "stdin" : argv[1], wc);

    /* output top words (or all words in descending order if top not given) */
    for (size_t i = 0; i < (top != 0 ? top : wc); i++) {
        printf ("  %-28s    %5zu\n", words[i].word, words[i].ninst);
        total += words[i].ninst;
    printf ("%33s------\n%34s%5d\n", " ", "Total: ", total);

    free (words);

    return 0;

/** add word to words, updating pointer to word-count 'wc' and
 *  the maximum words allocated 'maxw' as needed. returns pointer
 *  to words (which must be assigned back in the caller).
void *addword (words_t *words, const char *word, size_t *wc, size_t *maxw)
    size_t i;

    for (i = 0; i < *wc; i++)
        if (strcmp (words[i].word, word) == 0) {
            return words;

    if (*wc == *maxw)
        words = xrealloc (words, sizeof *words, maxw);

    strcpy (words[*wc].word, word);

    return words;

/** realloc 'ptr' of 'nelem' of 'psz' to 'nelem * 2' of 'psz'.
 *  returns pointer to reallocated block of memory with new
 *  memory initialized to 0/NULL. return must be assigned to
 *  original pointer in caller.
void *xrealloc (void *ptr, size_t psz, size_t *nelem)
{   void *memptr = realloc ((char *)ptr, *nelem * 2 * psz);
    if (!memptr) {
        perror ("realloc(): virtual memory exhausted.");
        exit (EXIT_FAILURE);
    }   /* zero new memory (optional) */
    memset ((char *)memptr + *nelem * psz, 0, *nelem * psz);
    *nelem *= 2;
    return memptr;



$ ./bin/getchar_wordcnt_top dat/damages.txt 10
'dat/damages.txt' contained '109' words.

  the                                12
  a                                  10
  in                                  7
  of                                  7
  and                                 5
  anguish                             4
  injury                              4
  jury                                4
  mental                              4
  that                                4
                           Total:    61

