首页 > 解决方案 > dc.js 系列图表 - 填充缺失数据时图表太慢

问题描述

我想创建一个多时间线图作为系列图。

我阅读了有关填充缺失数据 dc.js lineChart 的 Stack Overflow - 填充缺失的日期并在没有数据的情况下显示零

问题:我在那里实现了代码,它对单折线图效果很好。对于系列图表,我需要对其进行一些调整。它可以工作,但是性能很糟糕。

这是我们使用的示例数据:

let data = [{description: "Walmart", location: "40.216403 -74.541296", timeReported: 1581710670184}
 {description: "Target", location: "38.271996 -84.032575", timeReported: 1583524065011}
 {description: "Wendys", location: "39.255831 -75.532763", timeReported: 1583524065011}
 {description: "7-11", location: "34.925349 -78.463977", timeReported: 1583524065011}
 {description: "WaWa", location: "35.716208 -77.741230", timeReported: 1583524065013}
 {description: "7-11", location: "41.258950 -83.888060", timeReported: 1583524065013}
 {description: "Shell", location: "37.879694 -79.836127", timeReported: 1583524065011}
 {description: "Dominos", location: "35.890273 -80.700329", timeReported: 1583524065395}
 {description: "Dominos", location: "39.268777 -78.743366", timeReported: 1583524065397}
 {description: "Walgreens", location: "35.490215 -81.773863", timeReported: 1583524065399}
 {description: "7-11", location: "37.974797 -81.393449", timeReported: 1583524065506}
 {description: "Wendys", location: "40.859685 -76.963065", timeReported: 1583524065521}
 {description: "CVS", location: "38.517910 -78.251419", timeReported: 1583524065553}
 {description: "CVS", location: "35.947033 -81.616061", timeReported: 1583524142169}
 {description: "Shell", location: "39.566535 -77.992499", timeReported: 1583524142176}
 {description: "Target", location: "37.832142 -88.003151", timeReported: 1583524142170}
 {description: "Wendys", location: "40.245397 -80.061998", timeReported: 1583524142223}
 {description: "Macys", location: "39.631265 -75.157194", timeReported: 1583524142223}
 {description: "Macys", location: "36.631458 -77.803286", timeReported: 1583524142213}
 {description: "7-11", location: "36.249754 -79.830006", timeReported: 1583524142251}
 {description: "7-11", location: "41.138285 -83.298142", timeReported: 1583524142249}
 {description: "Wendys", location: "34.940485 -77.230388", timeReported: 1583524142249}
 {description: "7-11", location: "39.605373 -77.448768", timeReported: 1583524142296}
 {description: "Wendys", location: "35.609094 -79.455712", timeReported: 1583524142293}
 {description: "WaWa", location: "37.130753 -78.076709", timeReported: 1583524142310}
 {description: "Macys", location: "40.058482 -78.497258", timeReported: 1583524142338}
 {description: "Wendys", location: "39.255831 -75.532763", timeReported: 1582058735883}
 {description: "Macys", location: "39.631265 -75.157194", timeReported: 1582058735883}
 {description: "7-11", location: "36.249754 -79.830006", timeReported: 1582058735883}
 {description: "7-11", location: "39.605373 -77.448768", timeReported: 1582058735883}
 {description: "Wendys", location: "35.609094 -79.455712", timeReported: 1582058735883}
 {description: "WaWa", location: "37.130753 -78.076709", timeReported: 1582058735883}
 {description: "Macys", location: "40.058482 -78.497258", timeReported: 1582058735883}
 {description: "Kohls", location: "40.373533 -101.057470", timeReported: 1582838559493}] 

这是示例代码。顺便说一句,下面代码中的 curTimeInterval 只是 d3 timeIntervlas 的别名,可以由用户选择。(d3.timeHour、d3.timeDay、d3.timeWeek、d3.timeMonth)。

cf = crossfilter(data);

dateDim = cf.dimension((d) => {
  return curTimeInterval(d.timeReportedDate);
});
reportedGroup = dateDim.group().reduceSum((d) => 1);


let minDate = d3.min(reportedGroup.all(), (kv) => {
  return kv.key;
});
let maxDate = d3.max(reportedGroup.all(), (kv) => {
  return kv.key;
});
minDate = curTimeInterval.offset(minDate, -2);
maxDate = curTimeInterval.offset(maxDate, 2);

const runDimension = cf.dimension((d) => {
  return [d.description, curTimeInterval(d.timeReportedDate)];
});


const runGroup = runDimension.group();

// Fills the missing data in the group
const filledSeries = fill_composite_intervals(runGroup, curTimeInterval);

const seriesChart = new dc.SeriesChart('#series');
seriesChart
  .width(768)
  .height(480)
  .chart(function(c) {
    return new dc.LineChart(c).curve(d3.curveCardinal);
  })
  .x(d3.scaleTime().domain([minDate, maxDate]))
  .xUnits(curTimeInterval.range)
  .brushOn(false)
  .clipPadding(10)
  .elasticY(true)
  .dimension(runDimension)
  .group(filledSeries)
  .mouseZoomable(true)
  .seriesAccessor((d) => {
    return d.key[0];
  })
  .keyAccessor((d) => {
    return d.key[1];
  })
  .valueAccessor((d) => {
    return d.value;
  })
  .legend(dc.legend().x(350).y(350).itemHeight(13).gap(5).horizontal(1).legendWidth(140).itemWidth(70))
  .yAxis()
  .tickValues(d3.range(min > 0 ? min - 1 : min, max + 1));

seriesChart.margins().left += 40;


fill_composite_intervals = (group, interval) => {
  return {
    all: function() {
      const retVal = [];
      const allArray = group.all();
      if (!allArray.length) {
        return retVal;
      }
      allArray.sort((a, b) => {
        if (a.key[1].getTime() < b.key[1].getTime()) {
          return -1;
        }
        if (a.key[1].getTime() > b.key[1].getTime()) {
          return 1;
        }
        // a must be equal to b
        return 0;
      });
      const target = interval.range(allArray[0].key[1], allArray[allArray.length-1].key[1]);
      const allMap = new Map();
      allArray.forEach((obj) => {
        let innerArray = allMap.get(obj.key[0]);
        if (!innerArray) {
          innerArray = [];
          allMap.set(obj.key[0], innerArray);
        }
        innerArray.push({key: obj.key[1], value: obj.value});
      });
      allMap.forEach((value, key, map) => {
        const orig = value.map((kv) => ({key: new Date(kv.key), value: kv.value}));

        const result = [];
        if (orig.length) {

          let oi;
          let ti;
          for (oi = 0, ti = 0; oi < orig.length && ti < target.length;) {
            if (orig[oi].key <= target[ti]) {
              result.push(orig[oi]);
             if (orig[oi++].key.valueOf() === target[ti].valueOf()) {
                ++ti;
              }
            } else {
              result.push({key: target[ti], value: 0});
              ++ti;
            }
          }
          if (oi<orig.length) {
            Array.prototype.push.apply(result, orig.slice(oi));
          }
          if (ti<target.length) {
            Array.prototype.push.apply(result, target.slice(ti).map((t) => ({key: t, value: 0})));
          }
        }
        map.set(key, result);
      });

      allMap.forEach((value, key, map) => {
        value.forEach((obj) => {
          const newObj = {
            key: [key, obj.key],
            value: obj.value
          };

          retVal.push(newObj);
        });
      });
            return retVal;
    }
  };
};

标签: d3.jsdc.jscrossfilter

解决方案


我首先创建了一个说明问题的小提琴。这里有趣的是一个选择菜单,它显示了哪些时间间隔适合图表的数据和缩放级别(域)。

显示超过 width/2 点是不合适的(因为它们不会被渲染),并且显示少于两个点也不合适,所以“不合适”的选项是灰色的斜体:

小提琴说明适当/不适当的间隔

它使用对象将间隔名称映射到相应 d3 间隔中的毫秒数:

const intervals = {
  timeSecond: 1000,
  timeMinute: 60000,
  timeHour: 3600000,
  timeDay: 86400000,
  timeWeek: 604800000,
  timeMonth: 2628000000,
  timeYear: 31536000000
}

allowed_intervals确定第一个和最后一个适当的间隔:

function allowed_intervals(chart, intervals, dateDomain) {
  const dt = dateDomain[1].getTime() - dateDomain[0].getTime(),
    first = Object.entries(intervals).find(
        ([iname, ms]) => dt / ms < chart.width() / 2);
  if(!first)
    throw new Error('date range too long')
  const last = Object.entries(intervals).reverse().find(
     ([iname, ms]) => d3[iname](dateDomain[0]).getTime() !== d3[iname](dateDomain[1]).getTime());
  return [first[0],last[0]];
}

所以这一切都很好。该示例打印了结果数据,我们可以看到,如果我们用d3.timeMinute它填充示例数据,则会从原始的 15 个数据点生成 332482 个数据点。这显然是太多的数据,尤其是对于一个简单的示例。

这是一个可以找到合适的 d3 时间间隔的算法。但是,当我们启用缩放时它会失败,因为现在我们可以放大到一个小时,比如说,在timeMinute合适的地方,但是如果你对所有数据使用那个间隔,它的点太多了,图表会减速到停止。

所以我开始思考如何让它更有效率。我们实际上不需要填充每个缺失的时间间隔。我们真正需要的是确保我们捕捉到下降沿,当数据从非零变为零时,以及上升沿,当数据从零变为非零时。在这些情况下,我们只需要向输入数据添加零。

这是一个fill_composite_intervals使用上升沿和下降沿的新版本,只添加了显示这些沿所需的零:

// input: a group with keys [category, time] and numeric values; a d3 time interval
// output: the same, but with zeroes filled in per the interval
function fill_composite_intervals(group, interval) {
  return {
    all: function() {
      const retVal = [];
      const allArray = group.all().slice();
      if (!allArray.length) {
        return retVal;
      }
      // make sure input data is sorted
      allArray.sort((a, b) => a.key[1].getTime() - b.key[1].getTime());

      // find all time intervals within the data
      // pad at both ends to add leading and trailing zeros
      const target = interval.range(interval.offset(allArray[0].key[1], -1),
        interval.offset(allArray[allArray.length-1].key[1], 2));

      // separate the data for each category
      const allMap = new Map();
      allArray.forEach(({key: [cat, time], value}) => {
        let innerArray = allMap.get(cat);
        if (!innerArray) {
          innerArray = [];
          allMap.set(cat, innerArray);
        }
        innerArray.push({key: time, value});
      });

      // walk each category, adding leading and trailing zeros
      allMap.forEach((value, key, map) => {
        const orig = value.map(({key, value}) => ({key: new Date(key), value}));

        const result = [];
        if (orig.length) {
          let oi = 0, ti = 0, last_filled = false, skipped_fill = false;
          while(oi < orig.length && ti < target.length) {
            if (orig[oi].key <= target[ti]) {
              if(skipped_fill) {
                // in the last iteration, we skipped a zero
                // so add one now (rising edge)
                result.push({key: target[ti-1], value: 0});
                skipped_fill = false;
              }
              result.push(orig[oi]);
              if (orig[oi++].key.getTime() === target[ti].getTime()) {
                ++ti;
              }
              last_filled = false;
            } else {
              if(!last_filled) {
                // last iteration we pushed a value
                // so push a zero now (falling edge)
                result.push({key: target[ti], value: 0});
                last_filled = true;
              }
              else skipped_fill = true;
              ++ti;
            }
          }
          if (oi<orig.length) {
            Array.prototype.push.apply(result, orig.slice(oi));
          }
          if (ti<target.length) {
            // add one trailing zero at the end
            result.push({key: target[ti], value: 0});
          }
        }
        map.set(key, result);
      });

      allMap.forEach((value, key, map) => {
        value.forEach(({key: time, value}) => {
          retVal.push({
            key: [key, time],
            value
          });
        });
      });
      return retVal;
    }
  };
}

请参阅代码中的注释以获取解释。它只生成与输入数据成比例的数据,例如输入 15 的 67 个点timeMinute,而不是 300+K。

有趣的是,我发现d3.curveCardinal当零较少时会产生奇怪的伪影。直觉上,我认为如果跳过点,这条线会获得太多的“动力”。所以我选择了d3.curveMonotoneX。我觉得还是比较合适。

  .curve(d3.curveMonotoneX)

我还在开头和结尾填充了interval.range,以便数据以零开始和结束,这更吸引人。

当您选择时,此示例仍然很慢d3.timeSecond(它仍然迭代 300+K 点),但它似乎执行得还不错timeMinute,这似乎捕获了此数据的分辨率。

进一步可能的改进:

  1. 添加更多前导零和尾随零,以使曲线一致/对称
  2. 停止使用interval.range,以免计算和丢弃那么多点;相反,仅使用和下一个/最后一个数据点检测上升沿和下降沿interval.offset(棘手!)

示例小提琴截图 - 更高效; 前缘/后缘


推荐阅读