首页 > 解决方案 > 将 CSV 解析为动态分配的结构数组 (ANSI 89)

问题描述

我正在尝试将 csv 解析为动态分配的结构数组,但是我的尝试因分段错误而崩溃。

这是我的数据结构:

SO02773202,5087001,0
SO02773203,5087001,0
SO02773204,5087001,0
SO02773205,5087001,0
SO02773206,5087001,14

这是我将数据解析为的结构:

typedef struct saleslines{
  char* salesid;
  char* smmcampaignid;
  int numberofbottles;
} saleslines_t;

这是我解析文件的尝试:

int read_saleslines(saleslines_t* saleslines, int number_of_lines){
  char c;

  FILE* fp; 
  fp = fopen(FILENAME, "r");             /* Open the saleslines file */

  if(fp == NULL){                              /* Crash if file not found */
  printf("Error - file not found\n");
    return 0;
  }

  c = getc(fp);
  while (c != EOF){
    if (c == '\n'){
    number_of_lines += 1;
    }
    c = getc(fp);
  }

  printf("Number of lines is %d\n", number_of_lines);

  saleslines = (saleslines_t*) malloc((number_of_lines * 2) * sizeof(saleslines_t));

  /* allocation of the buffer for every line in the File */
  char *buf = (char*) malloc(1000);
  char *tmp; 

  if ( ( fp = fopen(FILENAME, "r" ) ) == NULL )
  {
    printf( "File could not be opened.\n" );
  }
  int i = 0;
  while (fgets(buf, 255, fp) != NULL){
    if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
      buf[strlen (buf) - 1] = '\0';       

    tmp = strtok(buf, ",");
    saleslines[i].salesid = strdup(tmp);

    tmp = strtok(NULL, ",");
    saleslines[i].smmcampaignid = strdup(tmp);

    tmp = strtok(NULL, ",");
    saleslines[i].numberofbottles = atoi(tmp);

    printf("Salesid: %s\nCampaign: %s\nBottles: %i\n\n", saleslines[i].salesid , saleslines[i].smmcampaignid, saleslines[i].numberofbottles);

    i++;
  }
  free(buf);
  fclose(fp);
  printf("Number of lines is %i\n", number_of_lines);
  return number_of_lines;
}

由于某种原因,它会解析文件并打印结果数组,但是当我立即调用此函数时,它会因段错误而崩溃:

void print_saleslines_struct(saleslines_t* saleslines, int number_of_lines{
  int i;
  printf("Number of lines is %i", number_of_lines);
  for(i = 0; i < number_of_lines; i++){
    printf("Salesid:\t %s\n", saleslines[i].salesid);
    printf("Campaign:\t %s\n", saleslines[i].smmcampaignid);
    printf("# of Bottles:\t %d\n", saleslines[i].numberofbottles);
  }
}

我似乎找不到这个内存错误在哪里。

这是初始化和主要内容:

saleslines_t* saleslines;
saleslines_summary_t* saleslines_summary;
saleslines_grouped_t* saleslines_grouped;
int number_of_lines = 0;
int* number_of_linesp = &number_of_lines;

/* Main */

int main(){

  int chosen_option;

  while(1){

    printf("What would you like to do?\n");
    printf("1. Read saleslines.txt\n");
    printf("2. Print saleslines\n");
    printf("3. Summarise saleslines\n");
    printf("4. Exit the program\n");

    scanf("%d", &chosen_option);

    switch(chosen_option){

    /*  case 1 : number_of_lines = read_saleslines_file(saleslines, number_of_lines); break; */

      case 1 : number_of_lines = read_saleslines(saleslines, number_of_lines); break;

      case 2 : printf("Number of lines is %i", number_of_lines);  print_saleslines_struct(saleslines, number_of_lines); break;

      case 3 : summarise_saleslines(saleslines, number_of_linesp, saleslines_summary, saleslines_grouped); break;

      case 4 : free(saleslines); free(saleslines_summary); free(saleslines_grouped); return 0;   

    }

  }

  return 0;

}

更新

问题似乎与我对结构数组的初始化有关。

当我像这样初始化它时:saleslines_t* saleslines; 然后像这样 malloc :saleslines = malloc(number_of_lines + 1 * sizeof(saleslines_t);

我得到一个段错误。

但是如果我这样初始化:(saleslines[600];分配的行数超过文件中的行数),一切正常。

我怎样才能解决这个问题?我希望能够动态分配结构数组中的条目数。

编辑 2

以下是建议的更改:

int read_saleslines(saleslines_t** saleslines, int number_of_lines);

saleslines_t* saleslines;
int number_of_lines = 0;

int main(){

  while(1){

    printf("What would you like to do?\n");
    printf("1. Read saleslines.txt\n");
    printf("2. Print saleslines\n");
    printf("3. Summarise saleslines\n");
    printf("4. Exit the program\n");

    printf("Number of saleslines = %i\n", number_of_lines);

    scanf("%d", &chosen_option);

    switch(chosen_option){

    /*  case 1 : number_of_lines = read_saleslines_file(saleslines, number_of_lines); break; */

      case 1 : number_of_lines = read_saleslines(&saleslines, number_of_lines); break;

      case 2 : printf("Number of lines is %i", number_of_lines);  print_saleslines_struct(saleslines, number_of_lines); break;

      case 3 : summarise_saleslines(saleslines, number_of_linesp, saleslines_summary, saleslines_grouped); break;

      case 4 : free(saleslines); free(saleslines_summary); free(saleslines_grouped); return 0;   

    }

  }

  return 0;

}

int read_saleslines(saleslines_t** saleslines, int number_of_lines)
{

  char c;

  FILE* fp; 
  fp = fopen(FILENAME, "r");             /* Open the saleslines file */

  if(fp == NULL){                              /* Crash if file not found */
  printf("Error - file not found\n");
    return 0;
  }

  c = getc(fp);
  while (c != EOF){
    if (c == '\n'){
    number_of_lines += 1;
    }
    c = getc(fp);
  }

  fclose(fp);

  printf("Number of lines is %d\n", number_of_lines);

  *saleslines = (saleslines_t*) malloc((number_of_lines + 1) * sizeof(saleslines_t));

  /* allocation of the buffer for every line in the File */
  char *buf = malloc(25);
  char *tmp; 

  if ( ( fp = fopen(FILENAME, "r" ) ) == NULL )
  {
    printf( "File could not be opened.\n" );
  }
  int i = 0;
  while (fgets(buf, 25, fp) != NULL){
    if ((strlen(buf)>0) && (buf[strlen (buf) - 1] == '\n'))
      buf[strlen (buf) - 1] = '\0';       

    tmp = strtok(buf, ",");
    (*saleslines)[i].salesid = strdup(tmp);

    tmp = strtok(NULL, ",");
    (*saleslines)[i].smmcampaignid = strdup(tmp);

    tmp = strtok(NULL, ",");
    (*saleslines)[i].numberofbottles = atoi(tmp);

    printf("Salesid: %s\nCampaign: %s\nBottles: %i\n\n", saleslines[i]->salesid , saleslines[i]->smmcampaignid, saleslines[i]->numberofbottles);

    i++;
  }
  free(buf);
  fclose(fp);
  printf("Number of lines is %i\n", number_of_lines);
  return number_of_lines;
}

该程序现在在读取结构数组中的第一个元素后出现段错误。

标签: carrayscsvsegmentation-faultdynamic-memory-allocation

解决方案


您的代码中存在大量错误,并且通常使用您的方法。在分配然后重新读取文件以尝试解析数据之前,无需对文件进行两次遍历以确定行数。此外,无需对每一行进行标记以分隔逗号分隔值,sscanf()以解析两个字符串,并且int在读取每一行之后使用fgets.

虽然您可以自由传递您喜欢的任何参数组合并返回您喜欢的任何参数,但由于您正在分配结构数组并将值读取到数组中,因此从函数返回指向已分配数组的指针(或NULL失败时)并简单地更新作为指针传递的参数,以使调用者中读取的总行数可用。

此外,通常您希望在调用者中打开并验证文件,并将FILE*打开文件流的参数传递给您的函数。考虑到这一点,您可以将函数重构为:

/* read saleslines into array of saleslines_t, allocating for
 * salesid, and smmcampaignid within each struct. Return pointer
 * to allocated array on success with lines updated to hold the
 * number of elements, or NULL otherwise.
 */
saleslines_t *read_saleslines (FILE *fp, size_t *lines)
{

在您的函数中,您只需要一个缓冲区来保存读取的每一行,一个计数器来跟踪数组中分配的元素数量,以及一个指向要返回的数组的指针。例如,您可以执行以下操作来处理所有这三个:

    char buf[MAXC];                 /* buffer to hold line */
    size_t maxlines = MINL;         /* maxlines allocated */
    saleslines_t *sales = NULL;     /* pointer to array of struct */

注意:由于您正在跟踪通过作为参数传递的指针读取的行lines数,因此将该地址处的值初始化为零是有意义的)

现在你的函数的工作开始了,你想读入每一行buf并从每一行解析所需的信息。由于salesidsmmcampaignid都是结构中的字符指针,因此您需要为从该行解析的每个字符串分配一块内存,将字符串复制到新的内存块,然后将块的起始地址分配给你的每一个指针。要“动态”处理为您的结构分配元素,您只需检查填充*lines的行数maxlines((或新分配)存储为您的结构数组。*linesreallocrealloc

当您realloc始终realloc使用临时指针时,如果realloc失败并返回NULL,则不会覆盖指向当前分配块的指针,NULL从而造成内存泄漏。

在你的函数开始时将所有这些放在一起可能看起来令人生畏,但它实际上是直截了当的,例如

    while (fgets (buf, MAXC, fp)) { /* read each line in file */
        char id[MAXC], cid[MAXC];   /* temp arrays to hold strings */
        int bottles;                /* temp int for numberofbottles */
        if (*lines == maxlines || !*lines) {    /* check if realloc req'd */
            /* always realloc with a temp pointer */
            void *tmp = realloc (sales, 2 * maxlines * sizeof *sales);
            if (!tmp) { /* if realloc fails, original pointer still valid */
                perror ("realloc-sales");   /* throw error */
                return sales;               /* return current pointer      */ 
            }                               /* (don't exit or return NULL) */
            sales = tmp;    /* assign reallocated block to sales */
            /* (optional) zero newly allocated memory */
            memset (sales + *lines, 0, maxlines * sizeof *sales);
            maxlines *= 2;  /* update maxlines allocated */
        }

现在您已准备好使用 解析行中的所需信息sscanf,然后在成功解析信息后,您可以为每个salesidsmmcampaignid指针分配,将解析的信息复制到新的内存块,为每个指针分配起始地址,分别,例如

        /* parse needed data from line (sscanf is fine here) */
        if (sscanf (buf, "%1023[^,],%1023[^,],%d", id, cid, &bottles) == 3) {
            size_t  idlen  = strlen (id),   /* get lengths of strings */
                    cidlen = strlen (cid);
            sales[*lines].salesid = malloc (idlen + 1); /* allocate string */
            if (!sales[*lines].salesid) {               /* validate! */
                perror ("malloc-sales[*lines].salesid");
                break;
            }
            sales[*lines].smmcampaignid = malloc (cidlen + 1);  /* ditto */
            if (!sales[*lines].smmcampaignid) {
                perror ("malloc-sales[*lines].smmcampaignid");
                break;
            }
            memcpy (sales[*lines].salesid, id, idlen + 1);  /* copy strings */
            memcpy (sales[*lines].smmcampaignid, cid, cidlen + 1);
            sales[(*lines)++].numberofbottles = bottles;    /* assign int */
        }   /* (note lines counter updated in last assignment) */

注意:您可以使用strdup来获取每个已解析字符串的长度,并分配足够的内存来保存字符串并一次性将其分配给您的指针,例如sales[*lines].salesid = strdup (id);,但是...strdup不需要包含在 C99 或更高版本中,因此获取长度、分配length + 1字节然后memcpy手动分配字符串以确保可移植性同样简单。此外,由于strdup分配内存,您必须验证返回的指针——99% 的使用它的人都忽略了这一点。)

就是这样,当fgets()失败时,你已经达到了EOF,现在简单地说:

    return sales;   /* return dynamically allocated array of struct */
}

将其完全放在一个简短的工作示例中,该示例将文件名作为程序的第一个参数读取(stdin如果没有给出参数,则默认读取),您可以这样做:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXC 1024   /* if you need a constant, #define one (or more) */
#define MINL    2

typedef struct saleslines{
    char *salesid;
    char *smmcampaignid;
    int numberofbottles;
} saleslines_t;

/* read saleslines into array of saleslines_t, allocating for
 * salesid, and smmcampaignid within each struct. Return pointer
 * to allocated array on success with lines updated to hold the
 * number of elements, or NULL otherwise.
 */
saleslines_t *read_saleslines (FILE *fp, size_t *lines)
{
    char buf[MAXC];                 /* buffer to hold line */
    size_t maxlines = MINL;         /* maxlines allocated */
    saleslines_t *sales = NULL;     /* pointer to array of struct */

    *lines = 0;     /* zero lines */

    while (fgets (buf, MAXC, fp)) { /* read each line in file */
        char id[MAXC], cid[MAXC];   /* temp arrays to hold strings */
        int bottles;                /* temp int for numberofbottles */
        if (*lines == maxlines || !*lines) {    /* check if realloc req'd */
            /* always realloc with a temp pointer */
            void *tmp = realloc (sales, 2 * maxlines * sizeof *sales);
            if (!tmp) { /* if realloc fails, original pointer still valid */
                perror ("realloc-sales");   /* throw error */
                return sales;               /* return current pointer      */ 
            }                               /* (don't exit or return NULL) */
            sales = tmp;    /* assign reallocated block to sales */
            /* (optional) zero newly allocated memory */
            memset (sales + *lines, 0, maxlines * sizeof *sales);
            maxlines *= 2;  /* update maxlines allocated */
        }
        /* parse needed data from line (sscanf is fine here) */
        if (sscanf (buf, "%1023[^,],%1023[^,],%d", id, cid, &bottles) == 3) {
            size_t  idlen  = strlen (id),   /* get lengths of strings */
                    cidlen = strlen (cid);
            sales[*lines].salesid = malloc (idlen + 1); /* allocate string */
            if (!sales[*lines].salesid) {               /* validate! */
                perror ("malloc-sales[*lines].salesid");
                break;
            }
            sales[*lines].smmcampaignid = malloc (cidlen + 1);  /* ditto */
            if (!sales[*lines].smmcampaignid) {
                perror ("malloc-sales[*lines].smmcampaignid");
                break;
            }
            memcpy (sales[*lines].salesid, id, idlen + 1);  /* copy strings */
            memcpy (sales[*lines].smmcampaignid, cid, cidlen + 1);
            sales[(*lines)++].numberofbottles = bottles;    /* assign int */
        }   /* (note lines counter updated in last assignment) */
    }

    return sales;   /* return dynamically allocated array of struct */
}

int main (int argc, char **argv) {

    saleslines_t *sales = NULL; /* pointer to saleslines_t */
    size_t nlines;
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }

    sales = read_saleslines (fp, &nlines);  /* read saleslines */

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    for (size_t i = 0; i < nlines; i++) {   /* loop over each */
        printf ("sales[%2zu]:  %s  %s  %2d\n", i, sales[i].salesid,
                sales[i].smmcampaignid, sales[i].numberofbottles);
        free (sales[i].salesid);        /* free salesid */
        free (sales[i].smmcampaignid);  /* free smmcampaignid */
    }
    free (sales);   /* free sales */

    return 0;
}

示例使用/输出

$ ./bin/saleslines dat/saleslines.txt
sales[ 0]:  SO02773202  5087001   0
sales[ 1]:  SO02773203  5087001   0
sales[ 2]:  SO02773204  5087001   0
sales[ 3]:  SO02773205  5087001   0
sales[ 4]:  SO02773206  5087001  14

内存使用/错误检查

在您编写的任何动态分配内存的代码中,对于分配的任何内存块,您有两个责任:(1)始终保留指向内存块起始地址的指针,(2)它可以在没有时被释放更需要。

您必须使用内存错误检查程序来确保您不会尝试访问内存或写入超出/超出分配块的范围,尝试读取或基于未初始化的值进行条件跳转,最后确认释放所有分配的内存。

对于 Linuxvalgrind是正常的选择。每个平台都有类似的内存检查器。它们都易于使用,只需通过它运行您的程序即可。

$ valgrind ./bin/saleslines dat/saleslines.txt
==19819== Memcheck, a memory error detector
==19819== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==19819== Using Valgrind-3.12.0 and LibVEX; rerun with -h for copyright info
==19819== Command: ./bin/saleslines dat/saleslines.txt
==19819==
sales[ 0]:  SO02773202  5087001   0
sales[ 1]:  SO02773203  5087001   0
sales[ 2]:  SO02773204  5087001   0
sales[ 3]:  SO02773205  5087001   0
sales[ 4]:  SO02773206  5087001  14
==19819==
==19819== HEAP SUMMARY:
==19819==     in use at exit: 0 bytes in 0 blocks
==19819==   total heap usage: 13 allocs, 13 frees, 935 bytes allocated
==19819==
==19819== All heap blocks were freed -- no leaks are possible
==19819==
==19819== For counts of detected and suppressed errors, rerun with: -v
==19819== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

始终确认您已释放所有已分配的内存并且没有内存错误。

动态分配任何东西都没有什么困难。只需将它分成足够小的部分,将所有需要分配的指针全部点"I's"并交叉。"T's"如果您还有其他问题,请仔细查看并告诉我。


推荐阅读