c - 内存泄漏 - 如何为在另一个结构中作为线程参数传递的 Typdef 结构分配内存?
问题描述
我对 C 语言非常陌生,并且正在努力进行适当的内存管理,并且在我的程序中遇到了许多 seg 错误。
我的最终目标是将文本文件和输入的线程数作为用户参数,获取文件大小,然后根据用户输入的线程数拆分文件。
然后每个线程将读取文件的一部分,然后从读取的块中提取令牌。如果标记大于 5 个字符,则将其连同该标记在整个文本中出现的次数一起添加到数组中。所以最终我希望得到一个文本中使用的 > 5 的前 n 个单词的列表。
然而,这可能是我第三次使用 C 语言并且我很挣扎并且遇到了很多错误。我一直在尝试使用 valgrind 和其他调试工具自己解决这个问题,但我很迷茫。
从 valgrind,我收到以下消息:
==27634== 1 errors in context 1 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B7B4C: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 2 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B7014: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 3 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B3875: _itoa_word (_itoa.c:179)
==27634== by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 4 of 5:
==27634== Use of uninitialised value of size 8
==27634== at 0x50B386B: _itoa_word (_itoa.c:179)
==27634== by 0x50B6F0D: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634==
==27634== 1 errors in context 5 of 5:
==27634== Conditional jump or move depends on uninitialised value(s)
==27634== at 0x50B78DA: vfprintf (vfprintf.c:1642)
==27634== by 0x50BFF25: printf (printf.c:33)
==27634== by 0x10919F: main (in /home/student/Desktop/hw2/main)
==27634==
==27634== ERROR SUMMARY: 5 errors from 5 contexts (suppressed: 0 from 0)
我还看到“地址 xxx 在块大小为 60 分配后为 0 字节”的消息
我相信我的问题在于:
if (strcmp(words[i].word, token) == 0) {
inArray = i;
}
这是因为我没有为 wordStruct 词正确分配内存吗?我不知道如何解决它,任何帮助将不胜感激。
谢谢
完整代码如下
#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
typedef struct {
char word[50];
int count;
int totalWords;
} wordsStruct ;
struct argStruct {
FILE *file;
int start;
int end;
int count;
wordsStruct *words;
};
int stringLength(char s[]) {
int c = 0;
while (s[c] != '\0')
c++;
return c;
}
void groomString(char *line){
for (int i = 0; line[i] != '\0'; ++i) {
line[i] = tolower(line[i]);
while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {
for (int j = i; line[j] != '\0'; j++) {
line[j] = line[j + 1];
line[j] = '\0';
}
}
}
}
int counter(int n){
static int test;
test = n;
if(n = 0){
return test;
} else {
n++;
return n;
}
}
void processFile(void *input) {
struct argStruct params = *(struct argStruct *) input;
wordsStruct *words = params.words;
FILE *textFile = params.file;
int start = params.start;
int end = params.end;
int count = params.count;
int size = (end - start) + 10;
char delim[] = " \t\v\n\r";
char *readFile = (malloc(sizeof(char) * size +10));
fread(readFile, 1, size, textFile);
char *copy = (malloc(sizeof(char) * size +10));
strcpy(copy, readFile);
char *saveptr;
int inArray;
int length;
static int added;
char *token = strtok_r(copy, delim, &saveptr);
while (token) {
groomString(token);
length = stringLength(token);
if (length > 5) {
inArray = 0;
for (int i = 0; i < added; i++) {
if (strcmp(words[i].word, token) == 0) {
inArray = i;
}
}
if (inArray == 0) {
added++;
strcpy(words[added].word, token);
words[added].count = 1;
} else {
words[inArray].count++;
}
}
token = strtok_r(NULL, delim, &saveptr);
}
words->totalWords = added;
free(token);
free(readFile);
}
int main(int argc, char *argv[])
{
FILE *pfile;
int threadCount = 0, fileSize = 0, divide = 0;
wordsStruct *allWords = (wordsStruct *) malloc( sizeof(wordsStruct));
if (argc > 2)
{
pfile = fopen( argv[1], "r");
if (pfile == NULL){
perror("FILE OPEN FAILURE");
}
threadCount = atoi(argv[2]);
pthread_t * thread = malloc(sizeof(pthread_t)* threadCount *10);
fseek(pfile, 0, SEEK_END);
fileSize= ftell(pfile);
fseek(pfile, 0, SEEK_SET);
divide = (fileSize/threadCount);
struct argStruct arguments;
arguments.file = pfile;
arguments.words = allWords;
int j = 0;
for(int i = 0; i < threadCount; i++) {
arguments.start = j;
arguments.end = j+divide;
arguments.count = i;
struct argStruct *passArgs = malloc(sizeof *passArgs);
*passArgs = arguments;
pthread_create(&thread[i], NULL, (void *) processFile, passArgs);
j+=divide;
}
for (int i = 0; i < threadCount +1; i++){
pthread_join(thread[i], NULL);
}
fclose(pfile);
} else {
printf("Please enter text file name and number of threads");
}
return 0;
}
解决方案
对于初学者,在最后一个线程上,您需要[设置后end
]:
if (i == (threadCount - 1)) arguments.end = fileSize;
获取最后一段中的所有字节并且不超出 EOF。只要文件大小不是线程数的精确倍数,就需要这样做。
为了防止argStruct
传递给线程的内存泄漏,在底部processFile
,您需要free(input)
此外,线程共享某些资源(请参阅 参考资料man pthreads
)。值得注意的是,打开文件描述符。因此,您需要一个互斥锁来访问textFile
. (例如)pthread_mutex_lock(&text_mutex);
等。人。
fseek
而且,每个线程都必须对它试图访问的文件部分执行自己的操作。
并且,size
“倾斜系数”为 10。这对于分配的区域来说是安全的,但它会导致读取太多数据。最好不要使用+ 10
, 或者做: int exact_size = end - start;
。另外,请注意,您在设置时正在做“倾斜”,size
并且在 中添加了额外的金额malloc
,因此不需要这样做。
另外,请注意,fread
它不能保证缓冲区末尾有一个 EOS 字符 (0x00) fgets
。因此,如果您要对缓冲区执行字符串操作,您需要自己执行此操作(并且您需要一个至少为 1 的“slop”):
所以,我们需要:
pthread_mutex_lock(&text_mutex);
fseek(textFile,start,0);
fread(readFile,1,exact_size,textFile);
readFile[exact_size] = 0;
pthread_mutex_unlock(&text_mutex);
而且,请记住,main
在执行.text_mutex
pthread_mutex_init
pthread_create
但...
在这里使用fread
可能会有问题。当您将文件分段为 length 块时divide
,您 [可能] 以这样一种方式切分文件,即第一个线程的最后一行被截断,下一个线程将在看到第一行之前看到该行的其余部分,等等 ...
您最好mmap
对 中的整个文件执行一次main
,并让它扫描缓冲区,寻找换行符[或空白],并为每个线程提供一个保证与换行符对齐的段。
更新:
我编写了一个使用mmap
并修复了一个小错误的版本[请原谅无偿的风格清理]。它可以干净地编译,即使使用-O2 -Wall
[您应该始终使用它来捕获所有警告]。我没有测试过它,但它应该让你更进一步。
#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>
char *filebuf;
typedef struct {
char word[50];
int count;
int totalWords;
} wordsStruct;
struct argStruct {
pthread_t threadid;
int start;
int end;
int count;
wordsStruct *words;
};
int
stringLength(char s[])
{
int c = 0;
while (s[c] != '\0')
c++;
return c;
}
void
groomString(char *line)
{
for (int i = 0; line[i] != '\0'; ++i) {
line[i] = tolower(line[i]);
while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {
for (int j = i; line[j] != '\0'; j++) {
line[j] = line[j + 1];
line[j] = '\0';
}
}
}
}
int
counter(int n)
{
static int test;
test = n;
// NOTE/BUG: this is the assignment operator and you want equality
#if 0
if (n = 0) {
#else
if (n == 0) {
#endif
return test;
}
else {
n++;
return n;
}
}
void
processFile(void *input)
{
struct argStruct *params = input;
wordsStruct *words = params->words;
int start = params->start;
int end = params->end;
#if 0
int count = params->count;
#endif
int size = (end - start) + 10;
char delim[] = " \t\v\n\r";
char *copy = malloc(size + 1);
memcpy(copy,&filebuf[start],size);
copy[size] = 0;
char *saveptr;
int inArray;
int length;
static int added;
char *token = strtok_r(copy, delim, &saveptr);
while (token) {
groomString(token);
length = stringLength(token);
if (length > 5) {
inArray = 0;
for (int i = 0; i < added; i++) {
if (strcmp(words[i].word, token) == 0) {
inArray = i;
}
}
if (inArray == 0) {
added++;
strcpy(words[added].word, token);
words[added].count = 1;
}
else {
words[inArray].count++;
}
}
token = strtok_r(NULL, delim, &saveptr);
}
words->totalWords = added;
free(copy);
free(token);
}
int
main(int argc, char *argv[])
{
int pfile;
int threadCount = 0,
fileSize = 0,
divide = 0;
struct stat st;
off_t curpos;
wordsStruct *allWords = (wordsStruct *) malloc(sizeof(wordsStruct));
if (argc > 2) {
pfile = open(argv[1],O_RDONLY);
if (pfile < 0) {
perror("FILE OPEN FAILURE");
}
threadCount = atoi(argv[2]);
struct argStruct *threads =
malloc(sizeof(struct argStruct) * threadCount);
struct argStruct *arg;
fstat(pfile,&st);
fileSize = st.st_size;
filebuf = mmap(NULL,fileSize,PROT_READ,MAP_PRIVATE,pfile,0);
divide = (fileSize / threadCount);
#if 0
int j = 0;
#endif
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
arg->words = allWords;
if (i == 0)
arg->start = 0;
else
arg->start = arg[-1].end;
curpos = arg->start + divide;
for (; curpos < fileSize; ++curpos) {
if (filebuf[curpos] == '\n') {
++curpos;
break;
}
}
if (curpos > fileSize)
curpos = fileSize;
arg->end = curpos;
arg->count = i;
}
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
pthread_create(&arg->threadid, NULL, (void *) processFile, arg);
}
for (int i = 0; i < threadCount + 1; i++) {
arg = &threads[i];
pthread_join(arg->threadid, NULL);
}
munmap(filebuf,fileSize);
close(pfile);
free(threads);
}
else {
printf("Please enter text file name and number of threads");
}
return 0;
}
更新#2:
哎呀,我错过了一些东西......
因为added
是用 定义的static
,所以所有线程都尝试使用它。他们比赛。这将是“坏的”。而且,列表的索引将是错误的 params->words
。
对此的访问将需要在线程循环的顶部/底部使用互斥锁/解锁对或使用原子原语(例如stdatomic.h
)。
但是,互斥锁对的效果是,无论哪个线程首先获得互斥锁,都会“垄断”它,并且所有线程或多或少地按顺序运行。从而破坏了拥有多个线程的目的。
因此,首先,我们要删除 ,static
以便每个线程都有自己的副本。
但是,现在,当我们向列表中添加一个新词时,事实证明words
(ie params->words
)并没有“增长”。所以,每当我们添加一个新单词时,我们都必须增加列表大小,所以我们需要添加一个realloc
调用。
在单个常见列表上执行此操作(例如,您在 中分配allWords
的位置main
)是有问题的。由于搜索 和realloc
,代码的“关键部分”必须被互斥保护几乎是整个循环体。
因此,一种解决方案是让每个线程维护它自己的每个线程列表(即params->words
每个线程都不同)。然后,线程不会竞争,并且在运行时不需要任何互斥锁。
但是,这将意味着线程之间存在重复。
因此,在main
对pthread_join
所有线程执行之后,main
必须重新创建一个单一的、统一的列表来消除重复项。
使用数组,这更麻烦。链接列表可能使重新组合各种列表变得更容易。
最简单的方法是从每个线程列表中复制所有条目,并附加到一个大列表中。
然后,对这个列表进行排序。
然后,创建一个消除重复项的新列表。
这是修复这些问题的更新版本[再次,未经测试]:
#include <time.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/stat.h>
#include <sys/mman.h>
char *filebuf;
typedef struct {
char word[50];
int count;
#if 0
int totalWords;
#endif
} wordsStruct;
struct argStruct {
pthread_t threadid;
int start;
int end;
int count;
int totalWords;
wordsStruct *words;
};
int
stringLength(char s[])
{
int c = 0;
while (s[c] != '\0')
c++;
return c;
}
void
groomString(char *line)
{
for (int i = 0; line[i] != '\0'; ++i) {
line[i] = tolower(line[i]);
while (!((line[i] >= 'a' && line[i] <= 'z') || line[i] == '\0')) {
for (int j = i; line[j] != '\0'; j++) {
line[j] = line[j + 1];
line[j] = '\0';
}
}
}
}
int
counter(int n)
{
static int test;
test = n;
// NOTE/BUG: this is the assignment operator and you want equality
#if 0
if (n = 0) {
#else
if (n == 0) {
#endif
return test;
}
else {
n++;
return n;
}
}
void *
processFile(void *input)
{
struct argStruct *params = input;
int start = params->start;
int end = params->end;
int size = (end - start) + 10;
char delim[] = " \t\v\n\r";
char *copy = malloc(size + 1);
memcpy(copy,&filebuf[start],size);
copy[size] = 0;
char *saveptr;
int inArray;
int length;
char *token = strtok_r(copy, delim, &saveptr);
int added = 0;
params->words = NULL;
params->count = 0;
while (token) {
groomString(token);
length = stringLength(token);
if (length > 5) {
wordsStruct *words = params->words;
// try to find an existing word struct for the current token
inArray = 0;
for (int i = 0; i < added; i++) {
if (strcmp(words[i].word, token) == 0) {
inArray = i;
break;
}
}
// found a token that is already in the words list -- just increment
// the count
if (inArray != 0) {
++words[inArray].count;
continue;
}
// add a new word struct to the list
++added;
words = realloc(words,sizeof(wordsStruct) * added);
params->words = words;
// fill it in and initialize its count
words += added;
strcpy(words->word, token);
words->count = 1;
}
token = strtok_r(NULL, delim, &saveptr);
}
params->totalWords = added;
free(copy);
free(token);
return (void *) 0;
}
int
wordcmp(const void *a,const void *b)
{
const wordsStruct *wa = a;
const wordsStruct *wb = b;
int cmpflg = strcmp(wa->word,wb->word);
return cmpflg;
}
int
main(int argc, char *argv[])
{
int pfile;
int threadCount = 0,
fileSize = 0,
divide = 0;
struct stat st;
off_t curpos;
#if 0
wordsStruct *allWords = malloc(sizeof(wordsStruct));
#endif
if (argc <= 2) {
printf("Please enter text file name and number of threads");
return 1;
}
pfile = open(argv[1],O_RDONLY);
if (pfile < 0) {
perror("FILE OPEN FAILURE");
}
threadCount = atoi(argv[2]);
struct argStruct *threads =
malloc(sizeof(struct argStruct) * threadCount);
struct argStruct *arg;
fstat(pfile,&st);
fileSize = st.st_size;
filebuf = mmap(NULL,fileSize,PROT_READ,MAP_PRIVATE,pfile,0);
divide = (fileSize / threadCount);
#if 0
int j = 0;
#endif
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
#if 0
arg->words = allWords;
#endif
if (i == 0)
arg->start = 0;
else
arg->start = arg[-1].end;
curpos = arg->start + divide;
for (; curpos < fileSize; ++curpos) {
if (filebuf[curpos] == '\n') {
++curpos;
break;
}
}
if (curpos > fileSize)
curpos = fileSize;
arg->end = curpos;
arg->count = i;
}
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
pthread_create(&arg->threadid, NULL, (void *) processFile, arg);
}
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
pthread_join(arg->threadid, NULL);
}
munmap(filebuf,fileSize);
close(pfile);
// get total number of words in all lists
int totalcnt = 0;
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
totalcnt += arg->totalWords;
}
// create a unified list [that may have duplicates]
wordsStruct *biglist = malloc(sizeof(wordsStruct) * totalcnt);
int bigidx = 0;
for (int i = 0; i < threadCount; i++) {
arg = &threads[i];
for (int wordidx = 0; wordidx < arg->totalWords; ++wordidx)
biglist[bigidx++] = arg->words[wordidx];
free(arg->words);
}
free(threads);
// sort the list
qsort(biglist,totalcnt,sizeof(wordsStruct),wordcmp);
// remove duplicates
int cleancnt = 0;
wordsStruct *cleanlist = malloc(sizeof(wordsStruct) * totalcnt);
if (totalcnt > 0)
cleanlist[cleancnt++] = biglist[0];
for (int bigidx = 1; bigidx < totalcnt; ++bigidx) {
if (strcmp(cleanlist[cleancnt - 1].word,biglist[bigidx].word) == 0)
continue;
cleanlist[cleancnt++] = biglist[bigidx];
}
free(biglist);
// trim the list
cleanlist = realloc(cleanlist,sizeof(wordsStruct) * cleancnt);
return 0;
}
推荐阅读
- maven - ${build.version} 和 ${project.version} 之间的区别
- android - SMART_BANNER 广告并不总是横向加载
- javascript - Promise 链接:在创建 Promise 时添加错误处理程序与使用 Promise 添加到变量
- ios - 向搜索栏添加进度视图
- python-3.x - 在 Keras/tensorflow 中连接 CNN 的向量
- css - 如何减慢背景图片的滚动速度
- json - 如何在颤振图表中显示 json 数据
- bash - 如何检查变量是否仅包含字母
- google-cloud-platform - 如何使用 gcloud 计算 scp?
- c# - 通过调用不同类的方法在表单中创建标签动态布局