python - python中的图表上有很多边
问题描述
我有以下脚本:
import pandas as pd
from igraph import *
df_p_c = pd.read_csv('data/edges.csv')
...
edges = list_edges
vertices = list(dict_case_to_number.keys())
g = Graph(edges=edges, directed=True)
plot(g, bbox=(6000, 6000))
我有 2300 条连接稀少的边。这是我的情节: 这里是它的几个部分的放大图:
该图不可读,因为边缘之间的距离太小。我怎样才能有更大的边缘之间的距离?只有来自同一“家庭”的边缘距离很小。
有没有其他方法可以改善有很多边缘的情节?我正在寻找任何可视化父子相关性的方法,它可能是另一个 python 数据包。
解决方案
你似乎有很多小的、不连贯的组件。如果您想要一个信息丰富的图表,我认为您应该按大小对连接的组件进行排序和分组。此外,许多网络布局算法的基本假设是存在一个巨大的组件。因此,如果您想要合理的坐标,您通常需要分别计算每个组件的布局,然后相对于彼此排列组件。我会以这种方式重新绘制你的图表:
我已经为这个图表编写了代码,networkx
因为这是我选择的模块。networkx
但是,用函数替换函数会很容易igraph
。您需要替换的两个功能是networkx.connected_component_subgraphs
以及您想要用于component_layout_func
.
#!/usr/bin/env python
import numpy as np
import matplotlib.pyplot as plt
import networkx
def layout_many_components(graph,
component_layout_func=networkx.layout.spring_layout,
pad_x=1., pad_y=1.):
"""
Arguments:
----------
graph: networkx.Graph object
The graph to plot.
component_layout_func: function (default networkx.layout.spring_layout)
Function used to layout individual components.
You can parameterize the layout function by partially evaluating the
function first. For example:
from functools import partial
my_layout_func = partial(networkx.layout.spring_layout, k=10.)
pos = layout_many_components(graph, my_layout_func)
pad_x, pad_y: float
Padding between subgraphs in the x and y dimension.
Returns:
--------
pos : dict node : (float x, float y)
The layout of the graph.
"""
components = _get_components_sorted_by_size(graph)
component_sizes = [len(component) for component in components]
bboxes = _get_component_bboxes(component_sizes, pad_x, pad_y)
pos = dict()
for component, bbox in zip(components, bboxes):
component_pos = _layout_component(component, bbox, component_layout_func)
pos.update(component_pos)
return pos
def _get_components_sorted_by_size(g):
subgraphs = list(networkx.connected_component_subgraphs(g))
return sorted(subgraphs, key=len)
def _get_component_bboxes(component_sizes, pad_x=1., pad_y=1.):
bboxes = []
x, y = (0, 0)
current_n = 1
for n in component_sizes:
width, height = _get_bbox_dimensions(n, power=0.8)
if not n == current_n: # create a "new line"
x = 0 # reset x
y += height + pad_y # shift y up
current_n = n
bbox = x, y, width, height
bboxes.append(bbox)
x += width + pad_x # shift x down the line
return bboxes
def _get_bbox_dimensions(n, power=0.5):
# return (np.sqrt(n), np.sqrt(n))
return (n**power, n**power)
def _layout_component(component, bbox, component_layout_func):
pos = component_layout_func(component)
rescaled_pos = _rescale_layout(pos, bbox)
return rescaled_pos
def _rescale_layout(pos, bbox):
min_x, min_y = np.min([v for v in pos.values()], axis=0)
max_x, max_y = np.max([v for v in pos.values()], axis=0)
if not min_x == max_x:
delta_x = max_x - min_x
else: # graph probably only has a single node
delta_x = 1.
if not min_y == max_y:
delta_y = max_y - min_y
else: # graph probably only has a single node
delta_y = 1.
new_min_x, new_min_y, new_delta_x, new_delta_y = bbox
new_pos = dict()
for node, (x, y) in pos.items():
new_x = (x - min_x) / delta_x * new_delta_x + new_min_x
new_y = (y - min_y) / delta_y * new_delta_y + new_min_y
new_pos[node] = (new_x, new_y)
return new_pos
def test():
from itertools import combinations
g = networkx.Graph()
# add 100 unconnected nodes
g.add_nodes_from(range(100))
# add 50 2-node components
g.add_edges_from([(ii, ii+1) for ii in range(100, 200, 2)])
# add 33 3-node components
for ii in range(200, 300, 3):
g.add_edges_from([(ii, ii+1), (ii, ii+2), (ii+1, ii+2)])
# add a couple of larger components
n = 300
for ii in np.random.randint(4, 30, size=10):
g.add_edges_from(combinations(range(n, n+ii), 2))
n += ii
pos = layout_many_components(g, component_layout_func=networkx.layout.circular_layout)
networkx.draw(g, pos, node_size=100)
plt.show()
if __name__ == '__main__':
test()
编辑
如果您希望子图紧密排列,则需要安装 rectangle-packer ( pip install rectangle-packer
),并替换_get_component_bboxes
为以下版本:
import rpack
def _get_component_bboxes(component_sizes, pad_x=1., pad_y=1.):
dimensions = [_get_bbox_dimensions(n, power=0.8) for n in component_sizes]
# rpack only works on integers; sizes should be in descending order
dimensions = [(int(width + pad_x), int(height + pad_y)) for (width, height) in dimensions[::-1]]
origins = rpack.pack(dimensions)
bboxes = [(x, y, width-pad_x, height-pad_y) for (x,y), (width, height) in zip(origins, dimensions)]
return bboxes[::-1]
编辑#2
我编写了一个用于可视化网络的库,称为netgraph。它以上述方式自动处理具有多个组件的网络。它与 networkx 和 igraph Graph 对象完全兼容,因此制作漂亮的图应该是容易和快速的(至少是这样的想法)。
import itertools
import matplotlib.pyplot as plt
import networkx as nx
# installation easiest via pip:
# pip install netgraph
from netgraph import Graph
# construct the graph as before:
g = nx.Graph()
# add 30 unconnected nodes
g.add_nodes_from(range(30))
# add 15 2-node components
g.add_edges_from([(ii, ii+1) for ii in range(30, 60, 2)])
# add 10 3-node components
for ii in range(60, 90, 3):
g.add_edges_from([(ii, ii+1), (ii, ii+2), (ii+1, ii+2)])
# add a couple of larger components
n = 90
for ii in [10, 20, 40]:
g.add_edges_from(itertools.combinations(range(n, n+ii), 2))
n += ii
# if there are any disconnected components, netgraph automatically handles them separately
Graph(g, node_layout='circular', node_size=1, node_edge_width=0.1, edge_width=0.1, edge_color='black', edge_alpha=1.)
plt.show()
推荐阅读
- java - Netflix Hystrix 指标在 /hystrix.stream 上不可见
- floating-point - Twig 'is same as(0.0)' 值 0.0 为 false
- elasticsearch - Elasticsearch - 匹配短语时避免基于单词和基于模糊的匹配
- spring-mvc - 在名称为“SpringDisptacher”的 DispatcherServlet 中找不到具有 URI [/WorkforceManagement/] 的 HTTP 请求的映射
- awk - Bash如何使用awk在空行上拆分文件
- python-3.x - 为什么 python 中的 expandtabs() 以奇怪的方式工作?
- ms-access - 如何从 3 个表格 FOR DATA REPORT 组标题中用 VB6 SHAPE 编码编写代码
- python - 在 Docker 中启动后如何运行 Django manage.py 命令(Django-background-tasks)?
- android - 缺少操作栏标题和导航抽屉图标
- reactjs - 为 reducer 编写测试用例