c - C - 从文件中逐字读取并将其插入哈希表
问题描述
我正在尝试使用链接哈希表来计算 .txt 文件中所有单词的重复次数。所以这就是我所做的,这是我的头文件:
#ifndef _header5_
#define _header5_
typedef struct cellule {
char cle[15]; // Cle=Word
int valeur; // Number of occurrences of the word
struct cellule *suivant;
} Cellule;
typedef Cellule * Liste; // type Liste
typedef struct table_hachage {
int taille; // table length
Liste *linkcase; // table of listes
} Table_hachage;
// type TableHacage
typedef Table_hachage *TableHachage;
/***************Methods*****/
int max(int,int);
int count_words (FILE*);
void read_words(FILE*);
TableHachage cree_table_hachage(int);
int hachage(TableHachage, char*);
int insere(TableHachage,char*);
int recherche(TableHachage, char*);
int Get_Value(TableHachage, char*);
void fill(TableHachage,FILE*);
#endif
这是我对头文件的实现:
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "fnv.h"
#include "header5.h"
int max(int a,int b){
if(a>b)
return a;
else if(a<b)
return b;
else
return a;
}
int count_words (FILE *f) {
char word[1024];
int count=0;
/* assumes no word exceeds length of 1023 */
while (fscanf(f, " %1023s", word) == 1) {
count++;
}
return count;
}
void read_words(FILE *f) {
char word[1024];
/* assumes no word exceeds length of 1023 */
while (fscanf(f, " %1023s", word) == 1) {
puts(word);
}
}
//Create Empty chaining hashtable
TableHachage cree_table_hachage(int taille) {
int i;
TableHachage table = (TableHachage)malloc(sizeof(Table_hachage));
table->taille = taille;
table->linkcase = (Liste*)malloc(table->taille * sizeof(Liste));
for (i = 0; i < table->taille; i++)
table->linkcase[i] = NULL;
printf("HashTable is created\n");
return table;
}
//Getting the hash code using FNV1 Algorithm
//it works just fine by the way.
int hachage(TableHachage table, char *cle) {
//FNV HashCode Algorithm version 32-bits
int codeh = fnv_32_str(cle, FNV1_32_INIT);
return (abs(codeh) % table->taille);
}
//Insert the word in the hashtable
int insere(TableHachage table, char* cle) {
int codeh;
Liste liste = NULL;
codeh = hachage(table, cle); //getting the HashCode
liste = table->linkcase[codeh]; //getting the LinkedList at index==HashCode
while (liste) {
//In case the key is already existed we increment its value
//which indicates the number of repetition of that word
if (strcmp(liste->cle, cle) == 0){
liste->valeur++;
return 0;
}
liste = liste->suivant;
}
//if it's the first time to encounter the word
//we insert it and give it's value 1
if (liste == NULL) {
liste = (Liste)malloc(sizeof(Cellule));
strcpy(liste->cle, cle);
liste->valeur=1;
liste->suivant = table->linkcase[codeh];
table->linkcase[codeh] = liste;
return 1;
}
}
//Search existence of a word
int recherche(TableHachage table, char *cle){
Liste liste = table->linkcase[hachage(table, cle)];
for (; liste; liste = liste->suivant)
if (strcmp(cle, liste->cle) == 0)
return 1;
return 0;
}
//Getting value of a key a.k.a number of repetition of a word
int Get_Value(TableHachage table, char *cle){
Liste liste = table->linkcase[hachage(table, cle)];
for (; liste; liste = liste->suivant)
if (strcmp(cle, liste->cle) == 0)
return liste->valeur;
}
//Fill my hashcode with words and number of repetition of that key in the file
void fill(TableHachage table,FILE* f){
char word[1024];
/* assumes no word exceeds length of 1023 */
while (fscanf(f, " %1023s", word) == 1) {
insere(table,word);
}
}
所以我的问题在于fill()
函数,它与 read_word() 函数几乎相同,除了打印单词之外,它工作得很好,我希望将它插入哈希表中。
当我检查哪个部分在 fill() 函数中不起作用时,我意识到它永远不会进入 while 循环。所以当我搜索这个词时,它找不到。
那么任何人都可以向我解释这个吗?
编辑: 这是我的主要():
#include <stdio.h>
#include <stdlib.h>
#include "fnv.h"
#include "header5.h"
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char *argv[]) {
FILE *file=fopen("fich.txt", "r");
int n=count_words(file);
//Creating an empty chaining hash table
TableHachage T=cree_table_hachage(n);
//fill hash table with words and its number of repetition in the text file
fill(T,file);
//student is a word in my file
if(recherche(T,"student")==1){
printf("found\n");
}
else{
printf("couldn't be found\n");
}
int occ=Get_Value(T, "student");
printf("Occ is: %d\n",occ);
fclose(file);
return 0
}
解决方案
作为这个问题的解决方案,这就是我所做的
#include <stdio.h>
#include <stdlib.h>
#include "fnv.h"
#include "header5.h"
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char *argv[]) {
FILE *file=fopen("fich.txt", "r");
int n=count_wordsV2(file);
//printf("number of words is: %d\n",n);
fclose(file);
file=fopen("fich.txt", "r");
//Creating an empty chaining hash table
TableHachage T=cree_table_hachage(n);
//fill hash table with words and its number of repetition in the text file
fill(T,file);
//Get occurences of each word in text file
char* Max=Get_Occurences(T);
printf("The most repeated word is: \"%s\" with a value=%d\n",Max,Get_Value(T, Max));
//int occ=Get_Value(T,"pilots");
//printf("occ: %d\n",occ);
fclose(file);
return 0;
}
推荐阅读
- c++ - std::filesystem 试图调用图像文件
- python-3.x - Protocol not available on SO_ACCEPTCONN
- java - 如何在spring boot中将创建的文件保存到application.properties文件中指定的路径
- javascript - React:如何访问对象和显示值?
- python - Python - 遍历边缘列表,对于具有特定属性的节点,找到所有具有不同特定属性的连接节点?
- python - 使用 pyinstaller 转换为 exe 时未加载 PYQT5 应用程序图像
- keras - keras input reshape ,我是否使输入变平?
- javascript - React Typescript 组件未正确调度 redux 操作
- discord.py - Discord.py 有没有办法改变机器人的状态?
- python - 从中提取文本
- 具体后
在特定的
用美丽的汤我正在尝试使用 BeautifulSoup从此页面中的某个“ol”中提取文本。我想要获取的信息位于具有特定类的特定“div”下,但我希望列表项中的文本立即出现在某个“h3”之后,其中包括带有类和 id 的“span”。看图