首页 > 解决方案 > 如何在脚本中解析关于合并单元格的 html 表

问题描述

此标头包含在 js 文件中https://www.portaldefinancas.com/js-tx-ctb/th-cdib.js

在此处输入图像描述

document.write(""),document.write('</p></caption><thead><tr><th rowspan="4">M&ecirc;s de<br>Refer&ecirc;ncia</th><th colspan="7">Taxas - %</th></tr><tr>    <th rowspan="3">Mensal</th><th colspan="4">Anualizada</th><th colspan="2">Acumulada</th></tr><tr>   <th colspan="2">Ano de<br>252 dias<br> &uacute;teis</th><th colspan="2">Ano de<br>365/366 dias<br>corridos</th><th rowspan="2">No ano</th><th rowspan="2">Em <br>12 meses</th></tr><tr><th>Dias</th><th>    Taxa</th><th>Dias</th><th>  Taxa</th></tr></thead><tbody>');

如何解析有关合并行和合并列的标题。我今天使用的脚本是

function getHeaders(url) {
  var source = UrlFetchApp.fetch(url).getContentText()
  source = source.split('document')[2]
  var table = '<table><tr><th ' + source.match(/(?<=<th ).*(?=th>)/g) + 'th></tr></table>'
  table=table.replace(/&ecirc;/g,'ê').replace(/&uacute;/g,'ú').replace(/<br>/g,'\n')
  var doc = XmlService.parse(table);
  var rows = doc.getDescendants().filter(function(c) {
    var element = c.asElement();
    return element && element.getName() == "tr";
  });
  var data = rows.slice(0).map(function(row) {
    return row.getChildren("th").map(function(cell) {
      return cell.getValue();
    });
  });
  return data; 
}

但它不尊重合并区域。谢谢你的帮助 !

标签: google-apps-scriptgoogle-sheets

解决方案


由于智力锻炼是我的首选药物……我情不自禁。这是可能的解决方案。它在一定程度上有效,但几乎没有表现出崇高编码风格的特征:

function main() {
  var sheet = SpreadsheetApp.getActiveSheet();
  var data  = getHeaders();
  data      = handle_rowspans(data);
  sheet.getRange(1, 1, data.length, data[0].length).setValues(data);
}

function getHeaders(url) {
  // var source = UrlFetchApp.fetch(url).getContentText()
  // source = source.split('document')[2]

  var source = `<thead><tr><th rowspan="4">M&ecirc;s de<br>Refer&ecirc;ncia</th><th colspan="7">Taxas - %</th></tr><tr>    <th rowspan="3">Mensal</th><th colspan="4">Anualizada</th><th colspan="2">Acumulada</th></tr><tr>   <th colspan="2">Ano de<br>252 dias<br> &uacute;teis</th><th colspan="2">Ano de<br>365/366 dias<br>corridos</th><th rowspan="2">No ano</th><th rowspan="2">Em <br>12 meses</th></tr><tr><th>Dias</th><th>    Taxa</th><th>Dias</th><th>  Taxa</th></tr></thead><tbody>`;

  source = handle_colspans(source);

  table = '<table><tr><th ' + source.match(/(?<=<th ).*(?=th>)/g) + 'th></tr></table>';
  table = table.replace(/&ecirc;/g, 'ê').replace(/&uacute;/g, 'ú').replace(/<br>/g, '\n');
  var doc = XmlService.parse(table);
  var rows = doc.getDescendants().filter(function (c) {
    var element = c.asElement();
    return element && element.getName() == "tr";
  });
  var data = rows.slice(0).map(function (row) {
    return row.getChildren("th").map(function (cell) {
      return cell.getValue();
    });
  });
  return data;
}

function handle_colspans(table) {
  return table.split('</tr>').map(r => add_cells_in_row(r)).join('</tr>');

  function add_cells_in_row(row) {
    var cells = row.split('</th>');
    for (var i in cells) {
      if (/colspan/.test(cells[i])) {
        var colspan = cells[i].replace(/.*colspan="(\d+).*/, '$1');
        cells[i] += '{col' + colspan + '}';
        cells[i] = [cells[i], ...(new Array(+colspan - 1).fill('<th>'))];
      }
      if (/rowspan/.test(cells[i])) {
        var rowspan = cells[i].replace(/.*rowspan="(\d+).*/, '$1');
        cells[i] += '{row' + rowspan + '}';
      }
    }
    return cells.flat().join('</th>')
  }
}

function handle_rowspans(array) {
  for (var row in array) {
    for (var col in array[row]) {
      if (/\{row/.test(array[row][col])) {
        var rowspan = array[row][col].replace(/.*\{row(\d+).*/s, '$1');
        for (var r = 1; r < rowspan; r++) array[+row + r].splice(col, 0, '')
      }
    }
  }
  return array;
}

它会给你这样的桌子:

在此处输入图像描述

Whrere表示您需要连接到当前单元格的左侧{row#}{col#}底部多少个单元格或行以重新创建原始设计。这可能是智力练习的下一剂。:)


推荐阅读