首页 > 解决方案 > 来自重复范围值的单个参数范围

问题描述

我有一个 HTML 文件,其中感兴趣的参数出现在方括号 [] 之间,但随后该范围在随后的方括号内以相同的名称重复,如下所示:

var jArray= {"2":["<span style='color:#c7b699'><b>May 1<\/b><\/span>","Percentage: <span>57%\n<\/span>","Interest Rate: <span>0.53","Amount Exchange: <span>150,<\/span>777,<\/span>695.16","Monthly Exchange: <span>370,<\/span>352.08","Interest Differentiation: <span>8.07","Interest RatePer: <span>0.54","Second Quarter","<b>Friday, May 1, 2020<\/b>","May","Phase: <span>Second Quarter<\/span>","May 1"],"0":["<b>April 29<\/b>","36%\n","Interest Rate: 0.52922522949925","Amount Exchange: 150701713.86852","Monthly Exchange: 378921.00811486","Interest Differentiation: 6.0176806272646","Interest RatePer: 0.52559281178633","Quarter Pre","April 29"],"1":["<b>April 30<\/b>","46%\n","Interest Rate: 0.52909124921306","Amount Exchange: 150739875.63889","Monthly Exchange: 374391.59013105","Interest Differentiation: 7.028645917145","Interest RatePer: 0.53195147367036","Quarter Pre","April 30"],"3":["<b>May 2<\/b>","68%\n","Interest Rate: 0.52882712965299","Amount Exchange: 150815161.77193","Monthly Exchange: 367050.8265867","Interest Differentiation: 9.1346986834112","Interest RatePer: 0.54259013649969","Second Quarter","May 2"],"4":["<b>May 3<\/b>","78%\n","Interest Rate: 0.52869706105934","Amount Exchange: 150852264.90232","Monthly Exchange: 364701.85696453","Interest Differentiation: 10.23252765695","Interest RatePer: 0.54608484792928","Second Quarter","May 3"]};

感兴趣的参数是“百分比”、“利率”、...“日期”的第一次出现。在第一个方括号 [ ] 内。如何仅将第一个方括号的内容导出到单独的文本文件中,同时忽略后续方括号内的所有剩余条目?此外,“金额交换”实际上是 150777695.16,“每月交换”是 370352.08。花括号 { } 内的条目没有换行符。我正在使用 Matlab。

标签: htmlmatlabdata-miningdata-extractionfile-processing

解决方案


我设法将给定的银行数据转换为正确的格式,然后在 .txt 文件的列中。但是,无法使用 javascript 遍历目录中的文件。为此,您必须使用像 php 或 python 这样的服务器端语言。但我不确定你是否可以访问它,这就是我用纯 JavaScript 做的原因。代码中的注释应该足以描述代码。我使用了这个stackoverflow帖子的下载功能

var jArray = {
  "2": ["<span style='color:#c7b699'><b>May 1<\/b><\/span>", "Percentage: <span>57%\n<\/span>", "Interest Rate: <span>0.53", "Amount Exchange: <span>150,<\/span>777,<\/span>695.16", "Monthly Exchange: <span>370,<\/span>352.08",
    "Interest Differentiation: <span>8.07", "Interest RatePer: <span>0.54", "Second Quarter", "<b>Friday, May 1, 2020<\/b>", "May", "Phase: <span>Second Quarter<\/span>", "May 1"
  ],
  "0": ["<b>April 29<\/b>", "36%\n", "Interest Rate: 0.52922522949925", "Amount Exchange: 150701713.86852", "Monthly Exchange: 378921.00811486", "Interest Differentiation: 6.0176806272646", "Interest RatePer: 0.52559281178633", "Quarter Pre",
    "April 29"
  ],
  "1": ["<b>April 30<\/b>", "46%\n", "Interest Rate: 0.52909124921306", "Amount Exchange: 150739875.63889", "Monthly Exchange: 374391.59013105", "Interest Differentiation: 7.028645917145", "Interest RatePer: 0.53195147367036", "Quarter Pre",
    "April 30"
  ],
  "3": ["<b>May 2<\/b>", "68%\n", "Interest Rate: 0.52882712965299", "Amount Exchange: 150815161.77193", "Monthly Exchange: 367050.8265867", "Interest Differentiation: 9.1346986834112", "Interest RatePer: 0.54259013649969", "Second Quarter",
    "May 2"
  ],
  "4": ["<b>May 3<\/b>", "78%\n", "Interest Rate: 0.52869706105934", "Amount Exchange: 150852264.90232", "Monthly Exchange: 364701.85696453", "Interest Differentiation: 10.23252765695", "Interest RatePer: 0.54608484792928", "Second Quarter",
    "May 3"
  ]
};

//the data that you want from the array
let toLoad = ["Percentage", "Interest Rate", "Amount Exchange", "Monthly Exchange", "Interest Differentiation", "Interest RatePer", "Phase"];



function loadData(array) {
  if (array.length == 0) {
    return false;
  }

  var data = jArray["2"];
  var dataToFile = "";

  //remove the unnececary items from the array
  data.splice(0, 1);
  data.splice(6, 3);
  for (i = 0; i < data.length; i++) {
    //remove the "Interest Rate: " etc from every item in the array
    var toRemove = array[i] + ": ";
    //remove the html, remove the ",", remove the toRemove variable, remove the linebreaks "\n".
    data[i] = ((data[i]).replace(/<\/?[^>]+(>|$)/g, "").replace(/,/gi, "")).replace(toRemove, "").replace("\n", "");
  }

  dataToFile += toLoad.join(",") + ",Date\n";
  dataToFile += data.join(",") + "\n";



  download("bankData.txt", dataToFile);
}


function download(filename, text) {
  var element = document.createElement('a');
  element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(text));
  element.setAttribute('download', filename);

  element.style.display = 'none';
  document.body.appendChild(element);

  element.click();

  document.body.removeChild(element);
}
<!DOCTYPE html>
<html lang="en" dir="ltr">

<head>
  <meta charset="utf-8">
</head>

<body>
  <button onclick="loadData(toLoad);">Download bankdata</button>


</body>

</html>

希望这可以帮助!

编辑 Oke,我已经设法将 forloop 转换为 Matlab 代码。我已经测试过了,它应该可以工作。但是你必须自己做剩下的事情,因为我还不具备 Matlab 的知识。


toLoad = ["Percentage", "Interest Rate", "Amount Exchange", "Monthly Exchange", "Interest Differentiation", "Interest RatePer", "Phase";];

data = ["<span style='color:#c7b699'><b>May 1<\/b><\/span>", "Percentage: <span>57%\n<\/span>", "Interest Rate: <span>0.53</span>", "Amount Exchange: <span>150,<\/span>777,<\/span>695.16", "Monthly Exchange: <span>370,<\/span>352.08", "Interest Differentiation: <span>8.07", "Interest RatePer: <span>0.54", "Second Quarter", "<b>Friday, May 1, 2020<\/b>", "May", "Phase: <span>Second Quarter<\/span>", "May 1";]

data = data([2:7]);

for a = 1:length(data) 
str = regexprep(data(1,a), '<.*?>','');
str = regexprep(str, '\\n', '');
str = regexprep(str, ',','');
data(1,a) = regexprep(str, toLoad(1,a) + ": ", '');
end



推荐阅读