首页 > 解决方案 > char* 函数返回 Null

问题描述

我目前正在开发一个接收和读取文本文件的垃圾邮件过滤程序。在initializeTraining函数中,我调用preprocess从给定文本文件中的每一行读取每个字符串的函数。

但是,一旦newDict从 line 执行函数first=newDict(string, NULL);,程序就会返回一个错误,指出函数中有atload of null pointer of type 'char'行。while(string[i] !='\0' && i<WORDLENGTH) {newDict

preprocess尽管该函数仍然从文本文件中接收传入的字符串,但似乎该函数正在返回空指针。preprocess我在函数中做错了什么吗?

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
 WORDLENGTH is the length of the word in the linked list named as dictionary
 MAILSEPARATOR is the totken to differentiate the mails that are included in one file.
 it is also the token to update updated in linkedlist
 */
#define WORDLENGTH 20
#define MAILSEPARATOR "@#@#@"

/*
 define DEBUG as 0 to disable debug mode and to 1 to enable the mode.
 */
#define DEBUG 0

typedef struct dictionary dict;
typedef dict* word_dict;
typedef enum {false, true} bool;

/*
 linked list, count is for the total word count and
 occur is the numbers of the mails that had the word
 */
struct dictionary{
    char word[WORDLENGTH];
    int occur;
    int count;
    word_dict next;
    bool updated;
};

// if there is no matching words after searching, create a new node
word_dict newDict(char *string, word_dict next){
    word_dict target = (word_dict)malloc(sizeof(dict));
    int i = 0;

    while(string[i] !='\0' && i<WORDLENGTH) {
        target->word[i] = string[i];
        i++;
    }
    target->count = 1;
    target->next = next;
    target->occur = 1;
    target->updated = true;
    return target;
}

/*
 preprocessor, convert string to lowercase
 and trim the puctuations at the back
*/
char* preprocess(char* string){
    #if DEBUG
        printf("\nbefore preprocess, string: %s \n", string);
    #endif
    
    int i=0;

    while(string[i] != '\0') { // convert to lower case
        if (string[i] >= 65 && string[i] < 90) {
            string[i] += 32;
            i++;
        }        
    
    
        while(true) {
            i--;
            if(i < 0) {
                #if DEBUG
                    printf("word of only punctuations \n");
                #endif
            
                return NULL;
            } else if((string[i] >= 97 && string[i] <= 122) || (string[i] >= 48 && string[i] <= 57)){
                string[i+1]='\0';
                break;
            }
        }

        i=0;

        while(true) {
            if ((string[i] >= 97 && string[i] <= 122) || (string[i] >= 48 && string[i] <= 57)){
                break;
            } else {
                string = &string[i+1];
            }

            i++;
        }
    }
        #if DEBUG
            printf("_after preprocess, string: %s\n", string);
        #endif
           
    
    return string;
}

/*
 initialize training
 reads the sample mails and creates a linked list of
 the percentages of the words occuring in the sample mails
*/
word_dict initializeTraining(char* filename){
    FILE *fp = NULL;
    fp = fopen(filename, "r");
    if(fp == NULL) {
        printf("no file found\n");
        return NULL;
    }
    
    char* string;
    string = (char*)malloc(sizeof(char)*50);
    word_dict first = NULL;
    fscanf(fp, "%s\n", string);
    string = preprocess(string);
    first = newDict(string, NULL);
    
    while(fscanf(fp,"%s", string) == 1) {
        first = searchDict(string, first);
    }
    fclose(fp);
    free(string);
    return first;
}


/*
 tests whether the mail is pam or not
 takes the filename of the test mail,
 returns true or false depending on the email's content
*/
bool bayesian_spam_filter(char * filename_for_test_email) {
    word_dict spamDict=initializeTraining("spam.txt");
    word_dict nonspamDict=initializeTraining("not_spam.txt");
    
#if DEBUG
    printDict(spamDict);
    printDict(nonspamDict);
#endif
    
    FILE *stream=NULL;
    stream = fopen(filename_for_test_email, "r");
    if(stream == NULL){
        printf("no file found\n");
        return false;
    }
    
    char* string;
    string = (char*)malloc(sizeof(char)*50);
    
    int ps, pn; // probability of spam mail and non-spam mail
    double prob = 0.5;
    while(fscanf(stream,"%s", string) == 1){
        char* tempString; // for handling the errors happening from string being null during preprocessing
        tempString = preprocess(string);
        if(tempString == NULL){
            continue;
        }

        if((ps = searchTest(tempString, spamDict)) != 0) {
            if((pn = searchTest(tempString, nonspamDict)) != 0) {
                printf("ps:%3d, pn:%3d, %s\n", ps, pn, tempString);
                prob = prob * (double) ps / ((prob* (double)ps + (1 - prob) * (double) pn));
                printf("this prob: %.10f\n", prob);
            }
        }
    }

    //printf("%d, %d \n", pSProduct, pNProduct);
    //proba=(float)(pSProduct/(pSProduct+pNProduct));
    printf("Probability of mail being spam: %.10f\n", prob);
    fclose(stream);
    free(string);
    if (prob > 0.9) {
        return true;
    }
    return false;
}

标签: c

解决方案


似乎预处理函数正在返回空指针

当它包含一行时,这不足为奇return NULL;。此时,您应该设置stringto的第一个字符'\0'并返回它,因为周围的代码希望在所有情况下都返回字符串。

在本节中可以发现另一个问题:

        i=0;

        while(true) {
            if ((string[i] >= 97 && string[i] <= 122) || (string[i] >= 48 && string[i] <= 57)){
                break;
            } else {
                string = &string[i+1];
            }

            i++;
        }

假设字符串是".a"并通过循环。由于第一个字符不是字母,我们不中断,而是更新string指针,使其指向"a"现在。然后,我们增加 i。在下一次迭代中,string[i]是空终止符,它不是字母,所以我们继续。由于我们已经过去了字符串的数据,因此接下来是未定义的行为。

解决此问题的简单方法是不增加i而是坚持使用,[0]因为您总是想从头开始删除。正确的解决方法是使用i但不增加string指针,因为您想稍后释放它 - 您必须调用free返回的指针malloc,因此修改指针会导致未定义的行为!不是从返回字符串,而是返回从开始的偏移量(由ipreprocess计数),这种方式稍后释放字符串将正常工作。调用代码将如下所示:

int offset = preprocess(string);
first = newDict(string + offset, NULL);

推荐阅读