c - 如何将 CSV 文件的内容存储在链接列表中?
问题描述
我正在尝试使用 C 将 csv 文件中的内容插入到链接列表中。但是,我得到了很多垃圾输出。源代码如下。
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
struct product *customer_head;
struct customer
{
long long int c_id;//first 6 characters=date & next 6 characters=time & next characters=counter no & city code
/*Compulsory Fields (Cannot be Skipped)*/
char name[57];
long long int ph_no;//10 digit phone number
/*Non Compulsory Fields (Can be Skipped)*/
char address[58];
char city[25];
char state_code[2];
char pin[6];
char email[60];
struct customer *next;
};
struct customer * load()
{
FILE * cust=fopen("customer_db.csv","r");
struct customer *temp,*ptr;
customer_head=NULL;
char str[208];
char *token,*eptr1,*eptr2;
int line_cnt=0,i=0;
while(fgets(str,234,cust)!=NULL)
{
line_cnt=0;
i=0;
ptr=(struct customer *)malloc(sizeof(struct customer));
for(;str[i];i++)
{
if(str[i]=='\n')
{
str[i]='\0';
i=0;
break;
}
}
token=strtok(str,",");
while(token!=NULL)
{
if(line_cnt==0)
ptr->c_id=strtoll(token,&eptr1,10);
else if(line_cnt==1)
ptr->ph_no=strtoll(token,&eptr2,10);
else if(line_cnt==2)
sprintf(ptr->name,"%s",token);
else if(line_cnt==3)
sprintf(ptr->address,"%s",token);
else if(line_cnt==4)
sprintf(ptr->city,"%s",token);
else if(line_cnt==5)
sprintf(ptr->state_code,"%s",token);
else if(line_cnt==6)
sprintf(ptr->pin,"%s",token);
else
sprintf(ptr->email,"%s",token);
line_cnt++;
token=strtok(NULL,",");
}
if(customer_head==NULL)
customer_head=ptr;
else
temp->next=ptr;
temp=ptr;
}
}
int print(struct customer *h)
{
while(h->next!=NULL)
{
printf("\nCustomer ID: ");
printf("%lld",h->c_id);
printf("\nName: ");
puts(h->name);
printf("Phone Number: ");
printf("%lld",h->ph_no);
printf("\nAddress: ");
puts(h->address);
printf("City: ");
puts(h->city);
printf("State Code: ");
puts(h->state_code);
printf("PIN: ");
puts(h->pin);
printf("Email: ");
puts(h->email);
h=h->next;
}
printf("\nCustomer ID: ");
printf("%lld",h->c_id);
printf("\nName: ");
puts(h->name);
printf("Phone Number: ");
printf("%ld",h->ph_no);
printf("\nAddress: ");
puts(h->address);
printf("City: ");
puts(h->city);
printf("State Code: ");
puts(h->state_code);
printf("PIN: ");
puts(h->pin);
printf("Email: ");
puts(h->email);
return 1;
}
int main()
{
load();
print(customer_head);
}
我还在此处附加了 csv 文件。为了让程序不那么复杂,我从我的 csv 文件中删除了标题。他们按顺序
Customer_ID,Phone_Number,Name,Address,City,State_Code,PIN,Email
1403201156540201,2226179183,Katherine_Hamilton,87_Thompson_St.,Fremont,IA,502645,k_hamilton@gmail.com
2204201532220103,8023631298,Marc_Knight,-,-,-,-,-
0305201423120305,8025595163,Albie_Rowland,-,Hamburg,NY,140752,-
0607201232220901,4055218053,Grant_Phelps,-,-,-,-,-
破折号 (-) 表示这些字段应保持为空。
输出如下:
Customer ID: 1403201156540201
Name: Katherine_Hamilton
Phone Number: 2226179183
Address: 87_Thompson_St.
City: Fremont
State Code: IA502645k_hamilton@gmail.com
PIN: 502645k_hamilton@gmail.com
Email: k_hamilton@gmail.com
Customer ID: 2204201532220103
Name: Marc_Knight
Phone Number: 8023631298
Address: -
City: -
State Code: -
PIN: -
Email: -
Customer ID: 305201423120305
Name: Albie_Rowland
Phone Number: 8025595163
Address: -
City: Hamburg
State Code: NY140752-
PIN: 140752-
Email: -
Customer ID: 607201232220901
Name: Grant_Phelps
Phone Number: 4055218053
Address: -
City: -
State Code: -
PIN: -
Email: -
如您所见,内容正在很多地方合并。我不明白为什么。
解决方案
由于从评论中您知道您的字符数组声明受到一个太少字符的影响,至少在char state_code[2];
您的数组没有导致Undefined Behavior的nul 终止字符的情况下,您应该确保您有有效的存储空间您的所有输入。(不要吝啬缓冲区大小)
一般来说,你让自己的事情变得比需要的更难。您拥有固定的输入字段,而不是尝试使用strtok()
和计算字段并处理 8 部分链中的每个if, else if ...
字段,因此只需解析数据sscanf()
并验证转换次数以确认成功解析,例如
/** fill list from csv file */
list_t *list_from_csv (list_t *list, FILE *fp)
{
char buf[MAXC];
node_t data = { .c_id = 0, .next = NULL };
while (fgets (buf, MAXC, fp)) { /* read each line in file */
/* parse and VALIDATE values from line */
if (sscanf (buf, "%lld,%lld,%63[^,],%63[^,],%31[^,],%7[^,],%7[^,],%63[^,\n]",
&data.c_id, &data.ph_no, data.name, data.address, data.city,
data.state_code, data.pin, data.email) == 8) {
if (!add (list, &data)) /* validate add to list or break */
break;
}
}
return list;
}
在这里,list_t
只是一个额外的“包装器”结构,它为您的链表保存一个head
和指针。tail
这允许您在所需范围内声明多个列表,并通过让tail
指针始终指向列表中的最后一个节点(您的)来允许相同的 O(1) 插入temp
。在这里,head
andtail
只是包装器的一部分并作为参数传递,而不必将列表指针声明为全局(不好的做法)。列表中的每个节点和包装器结构都可以写成:
#define BYTE8 8 /* if you need a constant, #define one (or more) */
#define BYTE32 32
#define BYTE64 64
#define MAXC 1024
typedef struct node_t { /* list node */
/* 6 characters=date & 6 characters=time & counter no & city code */
long long int c_id;
/*Compulsory Fields (Cannot be Skipped)*/
char name[BYTE64];
long long int ph_no; //10 digit phone number
/*Non Compulsory Fields (Can be Skipped)*/
char address[BYTE64];
char city[BYTE32];
char state_code[BYTE8];
char pin[BYTE8];
char email[BYTE64];
struct node_t *next;
} node_t;
typedef struct { /* list wrapper with head & tail pointers */
node_t *head, *tail;
} list_t;
然后,不要编写load()
包含 theFILE
和 list 操作的您,而是将您的列表操作分开。只需创建一个add()
函数即可将节点添加到您的列表中,例如
/** add node at end of list, update tail to end */
node_t *add (list_t *l, node_t *data)
{
node_t *node = malloc (sizeof *node); /* allocate node */
if (!node) { /* validate allocation */
perror ("malloc-node");
return NULL;
}
*node = *data; /* initialize members values */
if (!l->head) /* if 1st node, node is head/tail */
l->head = l->tail = node;
else { /* otherwise */
l->tail->next = node; /* add at end, update tail pointer */
l->tail = node;
}
return node; /* return new node */
}
现在,您的加载函数只需要从文件中读取每一行并在调用之前解析该行,然后add()
将指向数据结构的指针与列表指针一起作为参数传递。您的load()
功能减少到:
/** fill list from csv file */
list_t *list_from_csv (list_t *list, FILE *fp)
{
char buf[MAXC];
node_t data = { .c_id = 0, .next = NULL };
while (fgets (buf, MAXC, fp)) { /* read each line in file */
/* parse and VALIDATE values from line */
if (sscanf (buf, "%lld,%lld,%63[^,],%63[^,],%31[^,],%7[^,],%7[^,],%63[^,\n]",
&data.c_id, &data.ph_no, data.name, data.address, data.city,
data.state_code, data.pin, data.email) == 8) {
if (!add (list, &data)) /* validate add to list or break */
break;
}
}
return list;
}
(注意:使用strtok()
or时sscanf()
,无需'\n'
从输入字符串中去除尾随 - 只需将其作为分隔符包含在转换中strtok()
或将其排除在转换之外sscanf()
)
此外,您不需要多次调用puts()
和printf()
打印列表中每个节点的数据价值。查看您为打印数据而调用了多少函数。您只需要打一次电话printf()
,例如
/** print all nodes in list */
void prn_list (list_t *l)
{
if (!l->head) {
puts ("list-empty");
return;
}
for (node_t *n = l->head; n; n = n->next)
printf ("\nCustomer ID: %lld\n"
"Name: %s\n"
"Phone Number: %lld\n"
"Address: %s\n"
"City: %s\n"
"State Code: %s\n"
"PIN: %s\n"
"Email: %s\n", n->c_id, n->name, n->ph_no, n->address, n->city,
n->state_code, n->pin, n->email);
}
main()
只需声明包装器的一个实例,list_t
打开/验证您的FILE
,然后将指向列表的指针和文件流传递给您的list_from_csv()
(your load()
),然后打印列表,最后释放您分配的所有内存,就完成了。(是的,内存将在退出时被释放,但要尽早养成良好的习惯——不久之后您就会在函数中使用分配内存,在此之前未能释放return
会导致内存泄漏)
int main (int argc, char **argv) {
list_t list = { .head = NULL, .tail = NULL };
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!list_from_csv (&list, fp))
return 1;
if (fp != stdin) /* close file if not stdin */
fclose (fp);
prn_list (&list);
del_list (&list);
}
总而言之,您将拥有类似于以下内容的内容:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BYTE8 8 /* if you need a constant, #define one (or more) */
#define BYTE32 32
#define BYTE64 64
#define MAXC 1024
typedef struct node_t { /* list node */
/* 6 characters=date & 6 characters=time & counter no & city code */
long long int c_id;
/*Compulsory Fields (Cannot be Skipped)*/
char name[BYTE64];
long long int ph_no; //10 digit phone number
/*Non Compulsory Fields (Can be Skipped)*/
char address[BYTE64];
char city[BYTE32];
char state_code[BYTE8];
char pin[BYTE8];
char email[BYTE64];
struct node_t *next;
} node_t;
typedef struct { /* list wrapper with head & tail pointers */
node_t *head, *tail;
} list_t;
/** add node at end of list, update tail to end */
node_t *add (list_t *l, node_t *data)
{
node_t *node = malloc (sizeof *node); /* allocate node */
if (!node) { /* validate allocation */
perror ("malloc-node");
return NULL;
}
*node = *data; /* initialize members values */
if (!l->head) /* if 1st node, node is head/tail */
l->head = l->tail = node;
else { /* otherwise */
l->tail->next = node; /* add at end, update tail pointer */
l->tail = node;
}
return node; /* return new node */
}
/** print all nodes in list */
void prn_list (list_t *l)
{
if (!l->head) {
puts ("list-empty");
return;
}
for (node_t *n = l->head; n; n = n->next)
printf ("\nCustomer ID: %lld\n"
"Name: %s\n"
"Phone Number: %lld\n"
"Address: %s\n"
"City: %s\n"
"State Code: %s\n"
"PIN: %s\n"
"Email: %s\n", n->c_id, n->name, n->ph_no, n->address, n->city,
n->state_code, n->pin, n->email);
}
/** delete all nodes in list */
void del_list (list_t *l)
{
node_t *n = l->head;
while (n) {
node_t *victim = n;
n = n->next;
free (victim);
}
}
/** fill list from csv file */
list_t *list_from_csv (list_t *list, FILE *fp)
{
char buf[MAXC];
node_t data = { .c_id = 0, .next = NULL };
while (fgets (buf, MAXC, fp)) { /* read each line in file */
/* parse and VALIDATE values from line */
if (sscanf (buf, "%lld,%lld,%63[^,],%63[^,],%31[^,],%7[^,],%7[^,],%63[^,\n]",
&data.c_id, &data.ph_no, data.name, data.address, data.city,
data.state_code, data.pin, data.email) == 8) {
if (!add (list, &data)) /* validate add to list or break */
break;
}
}
return list;
}
int main (int argc, char **argv) {
list_t list = { .head = NULL, .tail = NULL };
/* use filename provided as 1st argument (stdin by default) */
FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;
if (!fp) { /* validate file open for reading */
perror ("file open failed");
return 1;
}
if (!list_from_csv (&list, fp))
return 1;
if (fp != stdin) /* close file if not stdin */
fclose (fp);
prn_list (&list);
del_list (&list);
}
示例使用/输出
使用您的输入文件dat/customer_list.txt
,运行您将收到的程序:
$ ./bin/customer_list dat/customer_list.txt
Customer ID: 1403201156540201
Name: Katherine_Hamilton
Phone Number: 2226179183
Address: 87_Thompson_St.
City: Fremont
State Code: IA
PIN: 502645
Email: k_hamilton@gmail.com
Customer ID: 2204201532220103
Name: Marc_Knight
Phone Number: 8023631298
Address: -
City: -
State Code: -
PIN: -
Email: -
Customer ID: 305201423120305
Name: Albie_Rowland
Phone Number: 8025595163
Address: -
City: Hamburg
State Code: NY
PIN: 140752
Email: -
Customer ID: 607201232220901
Name: Grant_Phelps
Phone Number: 4055218053
Address: -
City: -
State Code: -
PIN: -
Email: -
内存使用/错误检查
在您编写的任何动态分配内存的代码中,对于分配的任何内存块,您有两个责任:(1)始终保留指向内存块起始地址的指针,(2)它可以在没有时被释放更需要。
您必须使用内存错误检查程序,以确保您不会尝试访问内存或写入超出/超出分配块的边界,尝试读取或基于未初始化值的条件跳转,最后确认释放所有分配的内存。
对于 Linuxvalgrind
是正常的选择。每个平台都有类似的内存检查器。它们都易于使用,只需通过它运行您的程序即可。
$ valgrind ./bin/customer_list dat/customer_list.txt
==14823== Memcheck, a memory error detector
==14823== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==14823== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==14823== Command: ./bin/customer_list dat/customer_list.txt
==14823==
Customer ID: 1403201156540201
Name: Katherine_Hamilton
Phone Number: 2226179183
Address: 87_Thompson_St.
City: Fremont
State Code: IA
PIN: 502645
Email: k_hamilton@gmail.com
<snipped rest>
==14823==
==14823== HEAP SUMMARY:
==14823== in use at exit: 0 bytes in 0 blocks
==14823== total heap usage: 7 allocs, 7 frees, 6,728 bytes allocated
==14823==
==14823== All heap blocks were freed -- no leaks are possible
==14823==
==14823== For counts of detected and suppressed errors, rerun with: -v
==14823== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
始终确认您已释放所有已分配的内存并且没有内存错误。
虽然您可以通过确保每个字符串有足够的存储空间来解决大部分问题,但请花时间考虑使用值的解析,sscanf()
并且由于您控制转换,因此无需'\n'
从从文件中读取的行。(只是不要在从输入字符串解析的值中包含换行符)如果您确实想'\n'
从末尾解析,您应该使用,例如
str[strcspn (str, "\n")] = 0;
sscanf()
最后,使用与上面一起使用的格式字符串strcspn()
,确保您完全了解它们是如何工作的,请参阅man 3 scanf和man 3 strspn
如果您还有其他问题,请告诉我。
推荐阅读
- spring - 我们如何在 RetryContext 中获取 JobId?
- rest - API JSON Schema Validation with Optional Element 使用 Pydantic
- html - Bootstrap:行超出父容器并相互重叠
- c - 用 char 指针和 strcpy 理解 malloc
- go - 不能在赋值中使用 (type interface {}) 作为 int 类型:需要类型断言
- c# - 空接口代码在用于向通用接口添加约束时是否有异味?
- azure-devops - 您可以在不下载工件的情况下读取它的属性吗?
- excel - 循环通过包含公式的范围时出现类型不匹配错误 13
- php - 无法访问@include 之外的变量
- excel - Excel VBA If 语句的正确语法是什么