首页 > 解决方案 > D3.js WordCloud:单词重叠并且有奇怪的间距和分布

问题描述

我对 D3.js 相当陌生,我正在尝试使用Jason Davies的 D3 wordcloud 库制作一个 wordcloud。我不确定为什么不常见的单词会重叠,并且单词的分布是这样的,中间有很多空格。我希望它更加简洁和结构化。这是

在此处输入图像描述 我在这里看到了类似的问题,并且尝试了很多前面提到的解决方案,例如:

  1. 更改 svg 和画布尺寸。
  2. 更改字体样式,例如.font('Helvetica')
  3. 使用 10 个单词、30 个单词和 50 个单词的变体。
  4. 指定这样的文本访问器函数.text(function(d) { return d.word; })
  5. 探索选项.padding()
  6. 用过的.rotate(0)

这是我在 JS Fiddle上的代码。

对于单词的大小,而不是单词frequency的出现,我rank在我的代码中使用。这是因为由于我的数据集中的异常值,大小被打乱了。假设最高频率为 32,最低频率为 1,则两种尺寸之间存在很大差异,因为它们是按比例映射的。

所以我选择根据rank. 我使用 Javascript 对这些数据进行排序。

这是我在这里的第一个问题,如果有不清楚的地方,请道歉。希望有任何反馈或帮助/解决方案!

提前非常感谢:)

    // set the dimensions and margins of the graph
    var margin = { top: 10, right: 10, bottom: 10, left: 10 },
        width = 600 - margin.left - margin.right,
        height = 550 - margin.top - margin.bottom;

        var dataset =  {
                        "talk": 2,
                        "customer": 3,
                        "helpful": 1,
                        "upgrade": 2,
                        "excite": 12,
                        "yesterday": 6,
                        "feedback": 5,
                        "staging": 2,
                        "good": 12,
                        "work": 28,
                        "nice": 9,
                        "ship": 4,
                        "cool": 5,
                        "planner": 2,
                        "homepage": 2,
                        "awesome": 2,
                        "call": 3,
                        "week": 20,
                        "monthly": 2,
                        "focus": 6,
                        "marketing": 6,
                        "website": 7,
                        "annoy": 2,
                        "launch": 5,
                        "today": 7,
                        "nashville": 5,
                        "people": 8,
                        "golf": 2,
                        "afternoon": 6,
                        "snow": 6,
                        "tomorrow": 8,
                        "ph": 5,
                        "email": 4,
                        "exist": 2,
                        "user": 13,
                        "time": 14,
                        "morning": 13,
                        "early": 4,
                        "add": 4,
                        "product": 8,
                        "day": 13,
                        "block": 4,
                        "weekend": 7,
                        "bitcoin": 1,
                        "trillion": 1,
                        "move": 5,
                        "peep": 2,
                        "integration": 6,
                        "drive": 5,
                        "help": 4
                    }

    // append the svg object to the body of the page
    var svg = d3.select("#my_dataviz").append("svg")
        .attr("width", width + margin.left + margin.right)
        .attr("height", height + margin.top + margin.bottom)
        .append("g")
        .attr("transform",
            "translate(" + margin.left + "," + margin.top + ")");

    // convert word dictionary from json into a LIST for d3 graphs
    var word_list = Object.entries(dataset)

    // sort based on its value
    word_list = word_list.sort(function(a, b) { return a[1] - b[1]; });

    // For color gradient: computing max and min values for mapping color
    var maxValue = d3.max(word_list, function(d) { return d[1] });
    var minValue = d3.min(word_list, function(d) { return d[1] });

    // Color gradient scale for highest to lowest shade 
    var color = d3.scaleSequential()
        .interpolator(d3.interpolatePurples) // built in purple color scheme
        .domain([minValue, maxValue - 3]);

    // create function for the range of colors
    function get_rgb(from_percentage, to_percentage, value) {
        return (maxValue * from_percentage / 100 + value * (to_percentage - from_percentage) / 100)
    }

    var prev_frequency = 0
    var rank = 0

    word_list.forEach(function(d) {
        var frequency = d[1]

        if (frequency != prev_frequency) {
            rank += 1
        }

        d.rank = rank

        prev_frequency = frequency


        d.color = color(get_rgb(50, 100, frequency)); // enter the range: "from" and "to" value of the color scale here 
    });

    // Constructs a new cloud layout instance. It runs an algorithm to find the position of words that suits your requirements
    // Wordcloud features that are different from one word to the other must be here
    var layout = d3.layout.cloud()
        .size([width - 150, height - 200])
        .words(word_list)
        .padding(7) //space between words
        .rotate(0)
        .font('Helvetica')
        .fontWeight("bold")
        .fontSize(function(d) { return d.rank; }) // font size of words
        .text(function(d) { return d[0]; })
        .on("end", draw);
    layout.start();

    // This function takes the output of 'layout' above and draw the words
    // Wordcloud features that are THE SAME from one word to the other can be here
    function draw(words) {

        d3.select('#my_dataviz')
            .append('div')
            .attr('id', 'tooltip');
        // .attr('style', 'position: absolute; opacity: 0;');

        svg
            .append("g")
            .attr("transform", "translate(" + layout.size()[0] / 2 + "," + layout.size()[1] / 2 + ")")
            .selectAll("text")
            .data(words)
            .enter().append("text")
            .style("font-size", function(d) { return 5 + d.rank * 5 / 6; })
            // .style("fill", "#69b3a2")
            .attr("text-anchor", "middle")
            .style("font-family", "Helvetica")
            // .attr('font-family', 'Impact')
            .attr("fill", function(d) { return d.color; })
            .attr("transform", function(d) {
                return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")";
            })
            .text(function(d) { return d[0]; })
            .on('mouseover', function(event, d) {
                d3.select('#tooltip')
                    .style('opacity', 1)
                    .text(`${d[1]} times`)
                    .style('left', (event.pageX) + 'px')
                    .style('top', (event.pageY) + 'px')
            })
            .on('mouseout', function(event, d) {
                d3.select('#tooltip')
                    .style('opacity', 0)
            });
    }
#tooltip {
    position: absolute;
    opacity: 0;
    background-color: rgba(136, 136, 136, 0.884);
    font-family: Arial, Helvetica, sans-serif;
    font-size: 5pt;
    border-radius: 2px;
    padding: 3px;
    pointer-events: none;
    color: rgba(255, 255, 255, 0.842);
    letter-spacing: 0.25px;
}
<!DOCTYPE html>
<meta charset="utf-8">

<!-- Load d3.js -->
<script src="https://d3js.org/d3.v6.js"></script>
<link rel="stylesheet" type="text/css" href="style.css">

<!-- Load d3-cloud -->
<script src="https://cdn.jsdelivr.net/gh/holtzy/D3-graph-gallery@master/LIB/d3.layout.cloud.js"></script>

<script src="//d3js.org/d3-scale-chromatic.v0.3.min.js"></script>

<body>
    <div id="my_dataviz"></div>
    <script type="text/javascript" , src="wordcloud.js"></script>
</body>

标签: javascriptd3.jsword-cloud

解决方案


您在布局使用的字体大小之间存在差异:

 .fontSize(function(d) { return d.rank; }) // font size of words

以及您绘制的字体大小:

 .style("font-size", function(d) { return 5 + d.rank * 5 / 6; })

您想让布局考虑您正在绘制的文本的大小,因此您应该更新布局生成器以使用正确的字体大小:

 .fontSize(function(d) { return 5 + d.rank * 5 / 6; })

// set the dimensions and margins of the graph
    var margin = { top: 10, right: 10, bottom: 10, left: 10 },
        width = 600 - margin.left - margin.right,
        height = 550 - margin.top - margin.bottom;

        var dataset =  {
                        "talk": 2,
                        "customer": 3,
                        "helpful": 1,
                        "upgrade": 2,
                        "excite": 12,
                        "yesterday": 6,
                        "feedback": 5,
                        "staging": 2,
                        "good": 12,
                        "work": 28,
                        "nice": 9,
                        "ship": 4,
                        "cool": 5,
                        "planner": 2,
                        "homepage": 2,
                        "awesome": 2,
                        "call": 3,
                        "week": 20,
                        "monthly": 2,
                        "focus": 6,
                        "marketing": 6,
                        "website": 7,
                        "annoy": 2,
                        "launch": 5,
                        "today": 7,
                        "nashville": 5,
                        "people": 8,
                        "golf": 2,
                        "afternoon": 6,
                        "snow": 6,
                        "tomorrow": 8,
                        "ph": 5,
                        "email": 4,
                        "exist": 2,
                        "user": 13,
                        "time": 14,
                        "morning": 13,
                        "early": 4,
                        "add": 4,
                        "product": 8,
                        "day": 13,
                        "block": 4,
                        "weekend": 7,
                        "bitcoin": 1,
                        "trillion": 1,
                        "move": 5,
                        "peep": 2,
                        "integration": 6,
                        "drive": 5,
                        "help": 4
                    }

    // append the svg object to the body of the page
    var svg = d3.select("#my_dataviz").append("svg")
        .attr("width", width + margin.left + margin.right)
        .attr("height", height + margin.top + margin.bottom)
        .append("g")
        .attr("transform",
            "translate(" + margin.left + "," + margin.top + ")");

    // convert word dictionary from json into a LIST for d3 graphs
    var word_list = Object.entries(dataset)

    // sort based on its value
    word_list = word_list.sort(function(a, b) { return a[1] - b[1]; });

    // For color gradient: computing max and min values for mapping color
    var maxValue = d3.max(word_list, function(d) { return d[1] });
    var minValue = d3.min(word_list, function(d) { return d[1] });

    // Color gradient scale for highest to lowest shade 
    var color = d3.scaleSequential()
        .interpolator(d3.interpolatePurples) // built in purple color scheme
        .domain([minValue, maxValue - 3]);

    // create function for the range of colors
    function get_rgb(from_percentage, to_percentage, value) {
        return (maxValue * from_percentage / 100 + value * (to_percentage - from_percentage) / 100)
    }

    var prev_frequency = 0
    var rank = 0

    word_list.forEach(function(d) {
        var frequency = d[1]

        if (frequency != prev_frequency) {
            rank += 1
        }

        d.rank = rank

        prev_frequency = frequency


        d.color = color(get_rgb(50, 100, frequency)); // enter the range: "from" and "to" value of the color scale here 
    });

    // Constructs a new cloud layout instance. It runs an algorithm to find the position of words that suits your requirements
    // Wordcloud features that are different from one word to the other must be here
    var layout = d3.layout.cloud()
        .size([width, height])
        .words(word_list)
        .padding(2) //space between words
        .rotate(0)
        .font('Helvetica')
        .fontWeight("bold")
        .fontSize(function(d) { return 15 + d.rank * 5 / 3; })
        .text(function(d) { return d[0]; })
        .on("end", draw);
    layout.start();

    // This function takes the output of 'layout' above and draw the words
    // Wordcloud features that are THE SAME from one word to the other can be here
    function draw(words) {

        d3.select('#my_dataviz')
            .append('div')
            .attr('id', 'tooltip');
        // .attr('style', 'position: absolute; opacity: 0;');

        svg
            .append("g")
            .attr("transform", "translate(" + layout.size()[0] / 2 + "," + layout.size()[1] / 2 + ")")
            .selectAll("text")
            .data(words)
            .enter().append("text")
            .attr("font-size", function(d) { return 15 + d.rank * 5 / 3 + "px"; })
            // .style("fill", "#69b3a2")
            .attr("text-anchor", "middle")
            .style("font-family", "Helvetica")
            // .attr('font-family', 'Impact')
            .attr("fill", function(d) { return d.color; })
            .attr("transform", function(d) {
                return "translate(" + [d.x, d.y] + ")rotate(" + d.rotate + ")";
            })
            .text(function(d) { return d[0]; })
            .on('mouseover', function(event, d) {
                d3.select('#tooltip')
                    .style('opacity', 1)
                    .text(`${d[1]} times`)
                    .style('left', (event.pageX) + 'px')
                    .style('top', (event.pageY) + 'px')
            })
            .on('mouseout', function(event, d) {
                d3.select('#tooltip')
                    .style('opacity', 0)
            });
    }
#tooltip {
    position: absolute;
    opacity: 0;
    background-color: rgba(136, 136, 136, 0.884);
    font-family: Arial, Helvetica, sans-serif;
    font-size: 5pt;
    border-radius: 2px;
    padding: 3px;
    pointer-events: none;
    color: rgba(255, 255, 255, 0.842);
    letter-spacing: 0.25px;
}
<!DOCTYPE html>
<meta charset="utf-8">

<!-- Load d3.js -->
<script src="https://d3js.org/d3.v6.js"></script>
<link rel="stylesheet" type="text/css" href="style.css">

<!-- Load d3-cloud -->
<script src="https://cdn.jsdelivr.net/gh/holtzy/D3-graph-gallery@master/LIB/d3.layout.cloud.js"></script>

<script src="//d3js.org/d3-scale-chromatic.v0.3.min.js"></script>

<body>
    <div id="my_dataviz"></div>
    <script type="text/javascript" , src="wordcloud.js"></script>
</body>

在此处输入图像描述

(在输入循环中设置字体大小时,在渲染文本时指定单位也没有什么坏处——除了上面提到的更改之外,我还做了一个更改)

至于调整g元素的大小:这是其中内容的函数:您可以使用更大的字体大小并增加提供给词云布局生成器的大小(正如我在上面所做的那样)。


推荐阅读