neo4j - Neo4J 中创建节点的吞吐量随着属性数量的增加而显着降低
问题描述
我正在做一个 A/B 测试来测量 Neo4J 中节点创建的吞吐量。而且我发现随着属性数量的增加,创建节点的吞吐量会显着降低。
设置: Neo4j 集群 3.5.7(3 个核心实例,其中一个是领导者,其余两个是追随者)
TestA:测量 Neo4j 中节点创建的吞吐量,每个节点有 20 个属性。
TestB:测量 Neo4j 集群 3.5.7 中节点创建的吞吐量,每个节点有 40 个属性。
结果:TestB 的吞吐量 = 1/2 * TestA 的吞吐量
下面是我用来生成负载和测量吞吐量的代码。
import org.neo4j.driver.v1.*;
import java.time.Duration;
import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public class UnwindCreateNodes {
Driver driver;
static int start;
static int end;
public UnwindCreateNodes(String uri, String user, String password) {
Config config = Config.build()
.withConnectionTimeout(10, TimeUnit.SECONDS)
.toConfig();
driver = GraphDatabase.driver(uri, AuthTokens.basic(user, password), config);
}
private void addNodes() {
List<Map<String, Object>> listOfProperties = new ArrayList<>();
for (int inner = start; inner < end; inner++) {
Map<String, Object> properties = new HashMap<>();
properties.put("name", "Jhon " + inner);
properties.put("last", "Alan" + inner);
properties.put("id", 2 + inner);
properties.put("key", "1234" + inner);
properties.put("field5", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field6", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field7", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field8", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field9", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field10", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field11", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field12", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field13", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field14", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field15", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field16", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field17", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field18", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field19", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field20", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field21", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field22", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field23", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field24", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field25", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field26", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field27", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field28", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field29", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field30", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field31", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field32", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field33", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field34", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field35", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field36", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field37", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field38", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field39", "kfhc iahf uheguehuguaeghuszjxcb sd");
properties.put("field40", "kfhc iahf uheguehuguaeghuszjxcb sd");
listOfProperties.add(properties);
}
int noOfNodes = 0;
for (int i = 0; i < listOfProperties.size() / 5000; i++) {
List<Map<String, Object>> events = new ArrayList<>();
for (; noOfNodes < (i + 1) * (5000) && noOfNodes < listOfProperties.size(); noOfNodes++) {
events.add(listOfProperties.get(noOfNodes));
}
Map<String, Object> apocParam = new HashMap<>();
apocParam.put("events", events);
String query = "UNWIND $events AS event CREATE (a:Label) SET a += event";
Instant startTime = Instant.now();
try (Session session = driver.session()) {
session.writeTransaction((tx) -> tx.run(query, apocParam));
}
Instant finish = Instant.now();
long timeElapsed = Duration.between(startTime, finish).toMillis();
System.out.println("######################--timeElapsed NODES--############################");
System.out.println("no of nodes per batch " + events.size());
System.out.println(timeElapsed);
System.out.println("############################--NODES--############################");
}
}
public void close() {
driver.close();
}
public static void main(String... args) {
start = 200001;
end = 400001;
if (args.length == 2) {
start = Integer.valueOf(args[0]);
end = Integer.valueOf(args[1]);
}
UnwindCreateNodes unwindCreateNodes = new UnwindCreateNodes("bolt+routing://x.x.x.x:7687", "neo4j", "neo4j");
unwindCreateNodes.addNodes();
unwindCreateNodes.close();
}
}
下面是图表。
插入5000个节点需要3.5秒,每个节点有40个属性
插入5000个节点需要1.8秒,每个节点有20个属性
这是一个显着的放缓,对于房产的数量来说,40 并不是一个很大的数字。我有一个要求,直到 100 个属性,但如果我不能扩展到 40,我不确定如何扩展到 100?
我尝试过的其他方法是使用apoc.periodic.iterate
With取出UNWIND
而不UNWIND
只是使用CREATE
等,但行为仍然存在。
我不想将属性存储在 RDBMS 等外部存储中,因为这对我来说很复杂,因为我正在构建一个通用应用程序,但我不知道将使用哪些属性。
我也不能使用 CSV 工具,因为我的数据来自 Kafka,而且它的结构也不是 CSV 工具想要的。所以我没有 CSV 工具。
有什么想法可以加快速度吗?
解决方案
推荐阅读
- python - 我对关于 auth lib 的 django 登录视图有疑问
- asp.net-core - 为生产配置 Aspnet Blazor Wasam
- reactjs - 如何使用反应路由器在 URL 中使用哈希
- javascript - JS中以毫秒为单位的时间计算
- docker - 如何给容器一个静态 ip,或者如何链接两个 Docker 容器?
- java - 詹金斯在运行 jar 文件后挂起
- python - Selenium 测试自动化,driver.get(url) 发生在网站启动之前
- c# - 如何将 Home/End 按钮按下传播到 WPF ScrollViewer 内的 UWP RichEditBox?
- matlab - 向量中的数据排列
- python - 在 while 循环 discord.py 之后执行 if 语句