首页 > 解决方案 > 通过比较 NodeJs 中的两个 CSV 文件提取匹配行

问题描述

场景是我有两个大的 CSV 文件csv1.csvcsv2.csv。在这两个文件中,都有一个电子邮件列,我必须逐行读取csv1.csv并检查电子邮件是否存在于csv2.csv中,如果匹配则将 csv2.csv 的行写入 csv3.csv 中。我也尝试过读取流,但它没有按预期工作。任何指导或帮助表示赞赏。

提前感谢大家。

以下是 CSV 文件

csv1.csv

email,header1,header2
test1@example.com,test1,test1
test2@example.com,test2,test2
test3@example.com,test3,test3
test4@example.com,test4,test4
test5@example.com,test5,test5

csv2.csv

email,header1,header2
test4@example.com,test4,test4
test5@example.com,test5,test5
test6@example.com,test6,test6
test7@example.com,test7,test7
test8@example.com,test8,test8

以下是我尝试过的代码

const fs = require('fs');
const csv = require('fast-csv')

class CsvHelper {
  static write(filestream, rows, options) {
    return new Promise((res, rej) => {
      csv.writeToStream(filestream, rows, options)
        .on('error', err => rej(err))
        .on('finish', () => res());
    });
  }

  constructor(opts) {
    this.headers = opts.headers;
    this.path = opts.path;
    this.writeOpts = {
      headers: this.headers,
      includeEndRowDelimeter: true
    };
  }

  create(rows) {
    return CsvHelper.write(fs.createWriteStream(this.path, { flags: 'a' }), rows, { ...this.writeOpts });
  }

  append(rows) {
    return CsvHelper.write(fs.createWriteStream(this.path, { flags: 'a' }), rows, {
      ...this.writeOpts,
      writeHeaders: false,
    });
  }
}

class Helper {
  async matchCsv (outerRow) {
    try {
      const filePath2 = "csv2.csv";
      const filePath3 = "csv3.csv";
      let row = [];

      const csvFile = new CsvHelper({
        path: filePath3,
        headers: ["Email", "Active"]
      });

      return new Promise((resolve, reject) => {
        fs.createReadStream(filePath2)
        .on("error", err => {
          reject(err);
        })
        .pipe(csv.parse({headers: true}))
        .on("error", err => {
          reject(err);
        })
        .on("data", async innerRow => {
          if(outerRow["email"] === innerRow["email"]) {
            console.log("====================");
            console.log("match found");
            console.log(innerRow);
            console.log("====================");
            row.push([innerRow["email"], "yes"]);
            console.log("row: ", row);
          }
        })
        .on("finish", async() => {
          if (!fs.existsSync(filePath3)) {
            await csvFile.create(row).then(() => {
              resolve("Done from matchCsv");
            })
          } else {
            await csvFile.append(row).then(() => {
              resolve("Done from matchCsv");
            })
          }
        })
      });
    } catch (err) {
      throw(err);
    }
  }

  async generateCsv () {
    try {
      const filePath1 = "csv1.csv";

      return new Promise((resolve, reject) => {
        fs.createReadStream(filePath1)
        .on("error", err => {
          reject(err);
        })
        .pipe(csv.parse({headers: true}))
        .on("error", err => {
          reject(err);
        })
        .on("data", async outerRow => {
          const result = await this.matchCsv(outerRow);
          console.log("result: ", result);
        })
        .on("finish", () => {
          resolve("Generated csv3.csv file.");
        });
      });
    } catch (err) {
      throw(err);
    }
  }
}


async function main() {
  const helper = new Helper();
  const result = await helper.generateCsv()
  console.log(result);
}

main();

标签: javascriptnode.jsnodejs-stream

解决方案


所以这个问题有点令人困惑,但我想我知道你想要什么。这是我检查电子邮件是否存在的方法。它将所有行添加到一个数组中,循环遍历它们,然后如果电子邮件地址与您要查找的电子邮件匹配,它将执行其他操作...我想您说过您想再次写入 csv 文件行,但这应该很简单。

const csv = require('csv-parser');
const fs = require('fs');

const filepath = "./example_data.csv";

const emailAdd = "myemail@email.com";
var rowsArr = [];

fs.createReadStream(filepath)
    .on('error', () => {
        // handle error
    })

    .pipe(csv())
    .on('data', (row) => {
        rowsArr.push(row);
    })

    .on('end', () => {
        for (var i = 0; i <= rowsArr.length; i++) {
            if (rowsArr[i].emailAddress == emailAdd) {
                //do something
            }
        }
    })

推荐阅读