首页 > 解决方案 > 在 javascript 中使用 html 代码解析 CSV 文件

问题描述

我有一个带有Id key和的 CSV 文件Long descId key只是一个字符串,但它是Long descHTML 代码。

我的目标是将 CSV 文件解析为 JSON。(见输出

问题是我不能拆分它,"因为有一些属性,例如,color: ""red""一些文本包括。我的另一个想法是拆分示例后面的内容,但同样有一些 HTML 行包括:"Charger "15W";Id keyKE4I2-21;; 

我正在使用 node.js,我尝试使用一些 CSV 到 JSON 包转换器,但他们没有设法解析这些数据。

知道如何将这个奇怪的 CSV 文件转换为 JSON 吗?(我知道我的输出示例不正确,因为我正在打开和关闭"

""我开始用这样的替换所有'.replace(/""/g, "'")

CSV 文件 (desc.csv)

Id key;Long desc

KE4I2-21;"<p color=""red""><strong>Charger "15W" - Black</strong></p>

<iframe src=""https://www.youtube.com/embed/XXXXXX"" width=""560"" height=""315"" frameborder=""0"" allowfullscreen=""allowfullscreen""></iframe>

<p><strong>More</strong>:&nbsp;</p>

<ul>

<li><strong>List</li>

<li><strong>M&auml;rke</strong>: SiGN</li>

</ul>"

LE0PP;"<p>Type-C charger<br /> - OnePlus 2<br /></p>

<p><em>Warning</em></p>"

T12-XRE2;"<h2>&nbsp;</h2>

<h2><strong>Car Charger</strong></h2>

<p>Lorem ipsum dolor...</p>

<p><strong>Assets:</strong></p>

<ul>

<li><strong>Something</strong>Nice</strong>&nbsp;Ja</li>

<li><strong>Other</strong>Things here</strong>&nbsp;Ja</li>

</ul>"

输出

[
  {
    "Id key": "KE4I2-21",
    "Long desc": "<p color="red"><strong>Charger "15W" - Black</strong></p><iframe src="https://www.youtube.com/embed/XXXXXX" width="560" height="315" frameborder="0" allowfullscreen="allowfullscreen"></iframe><strong>More</strong>:&nbsp;</p>\n\n<ul>\n\n<li><strong>List</li>\n\n<li><strong>M&auml;rke</strong>: SiGN</li>\n\n</ul>"
  },
  {
    "Id key": "LE0PP",
    "Long desc": "<p>Type-C charger<br /> - OnePlus 2<br /></p>\n\n<p><em>Warning</em></p>"
  },
  {
    "Id key": "T12-XRE2",
    "Long desc": "<h2>&nbsp;</h2>\n\n<h2><strong>Car Charger</strong></h2>\n\n<p>Lorem ipsum dolor...</p>\n\n<p><strong>Assets:</strong></p>\n\n<ul>\n\n<li><strong>Something</strong>Nice</strong>&nbsp;Ja</li>\n\n<li><strong>Other</strong>Things here</strong>&nbsp;Ja</li>\n\n</ul>"
  }
]

这就是 csvtojson 包解析 csv 文件的方式

const CSVToJSON = require("csvtojson");

(async () => {
  let descriptions = await CSVToJSON().fromFile("./desc.csv");
  console.log(descriptions)
})();

// Output
[
  {
    'Id key;Long desc': 'KE4I2-21;"<p color=""red""><strong>Charger "15W" - Black</strong></p>'
  },
  {
    'Id key;Long desc': '<iframe src=""https://www.youtube.com/embed/XXXXXX"" width=""560"" height=""315"" frameborder=""0"" allowfullscreen=""allowfullscreen""></iframe>'
  },
  { 'Id key;Long desc': '<p><strong>More</strong>:&nbsp;</p>' },
  { 'Id key;Long desc': '<ul>' },
  { 'Id key;Long desc': '<li><strong>List</li>' },
  { 'Id key;Long desc': '<li><strong>M&auml;rke</strong>: SiGN</li>' },
  { 'Id key;Long desc': '</ul>"' },
  {
    'Id key;Long desc': 'LE0PP;"<p>Type-C charger<br /> - OnePlus 2<br /></p>'
  },
  { 'Id key;Long desc': '<p><em>Warning</em></p>"' },
  { 'Id key;Long desc': 'T12-XRE2;"<h2>&nbsp;</h2>' },
  { 'Id key;Long desc': '<h2><strong>Car Charger</strong></h2>' },
  { 'Id key;Long desc': '<p>Lorem ipsum dolor...</p>' },
  { 'Id key;Long desc': '<p><strong>Assets:</strong></p>' },
  { 'Id key;Long desc': '<ul>' },
  {
    'Id key;Long desc': '<li><strong>Something</strong>Nice</strong>&nbsp;Ja</li>'
  },
  {
    'Id key;Long desc': '<li><strong>Other</strong>Things here</strong>&nbsp;Ja</li>'
  },
  { 'Id key;Long desc': '</ul>"' }
]

标签: javascriptnode.jsjsoncsv

解决方案


所以我找到了一种“肮脏”的方式来将 CSV 文件解析为有效的 JSON 对象数组。

let desc = fs.readFileSync("./files/TD-products-beskrivning html.csv", "utf-8");
  desc = desc
    .replace(/Id key;Long desc/g, "")
    .replace(/\r/g, " ")
    .replace(/\n/g, " ")
    .replace(/""/g, "'")
    .replace(/    /g, " ");
  desc = desc.split(';"');
  
  let ids = [];

  desc.forEach(element => {
    let n = element.split(" ");
    ids.push(n[n.length - 1]);
  });

  // Remove empty id & desc
  ids.pop();
  desc.shift();

  let descriptions = []

  for (let i = 0; i < ids.length; i++) {
    let descriptionObject = {};
    descriptionObject["Id key"] = ids[i];
    descriptionObject["Long desc"] = desc[i];
    descriptions.push(descriptionObject);
  }

  console.log(descriptions);

输出

[
  {
    "Id key": "KE4I2-21",
    "Long desc": "<p color='red'><strong>Charger '15W' - Black</strong></p><iframe src='https://www.youtube.com/embed/XXXXXX' width='560' height='315' frameborder='0' allowfullscreen='allowfullscreen'></iframe><strong>More</strong>:&nbsp;</p>\n\n<ul>\n\n<li><strong>List</li>\n\n<li><strong>M&auml;rke</strong>: SiGN</li>\n\n</ul>"
  },
  {
    "Id key": "LE0PP",
    "Long desc": "<p>Type-C charger<br /> - OnePlus 2<br /></p>\n\n<p><em>Warning</em></p>"
  },
  {
    "Id key": "T12-XRE2",
    "Long desc": "<h2>&nbsp;</h2>\n\n<h2><strong>Car Charger</strong></h2>\n\n<p>Lorem ipsum dolor...</p>\n\n<p><strong>Assets:</strong></p>\n\n<ul>\n\n<li><strong>Something</strong>Nice</strong>&nbsp;Ja</li>\n\n<li><strong>Other</strong>Things here</strong>&nbsp;Ja</li>\n\n</ul>"
  }
]

推荐阅读