c# - EF Core:插入大量数据
问题描述
是的, .NET / EF Core 中已经存在批量插入优化问题。但这个问题是不同的。
我遇到了一个场景,我必须创建一个数据库并用处理/计算/生成的数据填充它。记录的数量在 1 亿到 2 亿条记录之间。
由于播种数据需要很长时间,因此我无法在启动时(网络 api)进行。所以我创建了一个单独的控制台应用程序来迁移数据库并为其播种。
它仍然需要很长时间(我什至还没有完成一次播种)。
我可以在主要方法中优化的其他任何内容(除了删除目前仅用于调试的控制台日志)。
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Metadata.Builders;
using Microsoft.EntityFrameworkCore.Migrations;
using System;
using System.Diagnostics;
namespace EFCoreBulkInsert
{
class EntityA
{
public int Id { get; set; }
public int ColA { get; set; }
public string ColB { get; set; }
public EntityB EntityB { get; set; }
public EntityC EntityC { get; set; }
}
class EntityB
{
public int Id { get; set; }
public string Col { get; set; }
public EntityA EntityA { get; set; }
}
class EntityC
{
public int Id { get; set; }
public int Col { get; set; }
public EntityA EntityA { get; set; }
}
class MyDbContext : DbContext
{
public DbSet EntityA { get; set; }
public DbSet EntityB { get; set; }
public DbSet EntityC { get; set; }
public MyDbContext(DbContextOptions options) : base(options) { }
protected override void OnModelCreating(ModelBuilder builder)
{
builder.Entity(BuildEntityA);
builder.Entity(BuildEntityB);
builder.Entity(BuildEntityC);
}
protected virtual void BuildEntityA(EntityTypeBuilder entity)
{
entity
.ToTable("EntityA")
.HasKey(e => e.Id);
entity.Property(e => e.Id)
.HasColumnName("Id")
.UseIdentityColumn(1, 1)
.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore);
entity.Property(e => e.ColA)
.HasColumnName("ColA")
.IsRequired();
entity.HasIndex(e => e.ColA)
.HasName("IX_EntityA_ColA");
entity.Property(e => e.ColB)
.HasColumnName("ColB")
.IsRequired();
entity.HasIndex(e => e.ColB)
.HasName("IX_EntityA_ColB");
entity.HasOne(e => e.EntityB)
.WithOne(e => e.EntityA);
entity.HasOne(e => e.EntityC)
.WithOne(e => e.EntityA);
}
protected virtual void BuildEntityB(EntityTypeBuilder entity)
{
entity
.ToTable("EntityB")
.HasKey(e => e.Id);
entity.Property(e => e.Id)
.HasColumnName("Id")
//.UseIdentityColumn(1, 1)
//.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
;
entity.Property(e => e.Col)
.HasColumnName("Col")
.IsRequired();
entity.HasIndex(e => e.Col)
.HasName("IX_EntityB_Col");
entity.HasOne(e => e.EntityA)
.WithOne(e => e.EntityB)
.HasForeignKey(e => e.Id)
.HasConstraintName("FK_EntityB-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade);
}
protected virtual void BuildEntityC(EntityTypeBuilder entity)
{
entity
.ToTable("EntityC")
.HasKey(e => e.Id);
entity.Property(e => e.Id)
.HasColumnName("Id")
//.UseIdentityColumn(1, 1)
//.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
;
entity.Property(e => e.Col)
.HasColumnName("Col")
.IsRequired();
entity.HasIndex(e => e.Col)
.HasName("IX_EntityC_Col");
entity.HasOne(e => e.EntityA)
.WithOne(e => e.EntityC)
.HasForeignKey(e => e.Id)
.HasConstraintName("FK_EntityC-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade);
}
}
[DbContext(typeof(MyDbContext))]
partial class MyDbContextModelSnapshot : ModelSnapshot
{
protected override void BuildModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "3.1.0")
.HasAnnotation("Relational:MaxIdentifierLength", 128)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
{
b.Property("Id")
.ValueGeneratedOnAdd()
.HasColumnName("Id")
.HasColumnType("int")
.HasAnnotation("SqlServer:IdentityIncrement", 1)
.HasAnnotation("SqlServer:IdentitySeed", 1)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b.Property("ColA")
.HasColumnName("ColA")
.HasColumnType("int");
b.Property("ColB")
.IsRequired()
.HasColumnName("ColB")
.HasColumnType("nvarchar(450)");
b.HasKey("Id");
b.HasIndex("ColA")
.HasName("IX_EntityA_ColA");
b.HasIndex("ColB")
.HasName("IX_EntityA_ColB");
b.ToTable("EntityA");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
{
b.Property("Id")
.HasColumnName("Id")
.HasColumnType("int");
b.Property("Col")
.IsRequired()
.HasColumnName("Col")
.HasColumnType("nvarchar(450)");
b.HasKey("Id");
b.HasIndex("Col")
.HasName("IX_EntityB_Col");
b.ToTable("EntityB");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
{
b.Property("Id")
.HasColumnName("Id")
.HasColumnType("int");
b.Property("Col")
.HasColumnName("Col")
.HasColumnType("int");
b.HasKey("Id");
b.HasIndex("Col")
.HasName("IX_EntityC_Col");
b.ToTable("EntityC");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
{
b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
.WithOne("EntityB")
.HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
.HasConstraintName("FK_EntityB-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
{
b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
.WithOne("EntityC")
.HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
.HasConstraintName("FK_EntityC-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
#pragma warning restore 612, 618
}
}
public partial class Initial : Migration
{
protected override void Up(MigrationBuilder migrationBuilder)
{
migrationBuilder.CreateTable(
name: "EntityA",
columns: table => new
{
Id = table.Column(nullable: false)
.Annotation("SqlServer:Identity", "1, 1"),
ColA = table.Column(nullable: false),
ColB = table.Column(nullable: false)
},
constraints: table =>
{
table.PrimaryKey("PK_EntityA", x => x.Id);
});
migrationBuilder.CreateTable(
name: "EntityB",
columns: table => new
{
Id = table.Column(nullable: false),
Col = table.Column(nullable: false)
},
constraints: table =>
{
table.PrimaryKey("PK_EntityB", x => x.Id);
table.ForeignKey(
name: "FK_EntityB-Id_EntityA-Id",
column: x => x.Id,
principalTable: "EntityA",
principalColumn: "Id",
onDelete: ReferentialAction.Cascade);
});
migrationBuilder.CreateTable(
name: "EntityC",
columns: table => new
{
Id = table.Column(nullable: false),
Col = table.Column(nullable: false)
},
constraints: table =>
{
table.PrimaryKey("PK_EntityC", x => x.Id);
table.ForeignKey(
name: "FK_EntityC-Id_EntityA-Id",
column: x => x.Id,
principalTable: "EntityA",
principalColumn: "Id",
onDelete: ReferentialAction.Cascade);
});
migrationBuilder.CreateIndex(
name: "IX_EntityA_ColA",
table: "EntityA",
column: "ColA");
migrationBuilder.CreateIndex(
name: "IX_EntityA_ColB",
table: "EntityA",
column: "ColB");
migrationBuilder.CreateIndex(
name: "IX_EntityB_Col",
table: "EntityB",
column: "Col");
migrationBuilder.CreateIndex(
name: "IX_EntityC_Col",
table: "EntityC",
column: "Col");
}
protected override void Down(MigrationBuilder migrationBuilder)
{
migrationBuilder.DropTable(
name: "EntityB");
migrationBuilder.DropTable(
name: "EntityC");
migrationBuilder.DropTable(
name: "EntityA");
}
}
[DbContext(typeof(MyDbContext))]
[Migration("20200103182223_Initial")]
partial class Initial
{
protected override void BuildTargetModel(ModelBuilder modelBuilder)
{
#pragma warning disable 612, 618
modelBuilder
.HasAnnotation("ProductVersion", "3.1.0")
.HasAnnotation("Relational:MaxIdentifierLength", 128)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
{
b.Property("Id")
.ValueGeneratedOnAdd()
.HasColumnName("Id")
.HasColumnType("int")
.HasAnnotation("SqlServer:IdentityIncrement", 1)
.HasAnnotation("SqlServer:IdentitySeed", 1)
.HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);
b.Property("ColA")
.HasColumnName("ColA")
.HasColumnType("int");
b.Property("ColB")
.IsRequired()
.HasColumnName("ColB")
.HasColumnType("nvarchar(450)");
b.HasKey("Id");
b.HasIndex("ColA")
.HasName("IX_EntityA_ColA");
b.HasIndex("ColB")
.HasName("IX_EntityA_ColB");
b.ToTable("EntityA");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
{
b.Property("Id")
.HasColumnName("Id")
.HasColumnType("int");
b.Property("Col")
.IsRequired()
.HasColumnName("Col")
.HasColumnType("nvarchar(450)");
b.HasKey("Id");
b.HasIndex("Col")
.HasName("IX_EntityB_Col");
b.ToTable("EntityB");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
{
b.Property("Id")
.HasColumnName("Id")
.HasColumnType("int");
b.Property("Col")
.HasColumnName("Col")
.HasColumnType("int");
b.HasKey("Id");
b.HasIndex("Col")
.HasName("IX_EntityC_Col");
b.ToTable("EntityC");
});
modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
{
b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
.WithOne("EntityB")
.HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
.HasConstraintName("FK_EntityB-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
{
b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
.WithOne("EntityC")
.HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
.HasConstraintName("FK_EntityC-Id_EntityA-Id")
.OnDelete(DeleteBehavior.Cascade)
.IsRequired();
});
#pragma warning restore 612, 618
}
}
class Program
{
static int EntityCount = 0;
const int EntityLimit = 200_000_000;
///
/// Just a fake factory method mimicking the real procedural generation of the db records
///
/// The next enity to store or null if no more entities can be calculated.
static EntityA CalculateNextEntity()
{
if (EntityCount >= EntityLimit) return null;
EntityCount++;
return new EntityA
{
// id is generated by the db
ColA = EntityCount,
ColB = EntityCount.ToString(),
EntityB = new EntityB
{
// id is generated by the db
Col = EntityCount.ToString()
},
EntityC = new EntityC
{
// id is generated by the db
Col = EntityCount
}
};
}
static void Main(string[] args)
{
string connectionString = "Server=(localdb)\\mssqllocaldb;Database=TestDB;Trusted_Connection=True;MultipleActiveResultSets=true";
DbContextOptionsBuilder builder = new DbContextOptionsBuilder();
builder.UseSqlServer(connectionString);
using (MyDbContext dbContext = new MyDbContext(builder.Options))
{
Console.Write("migrating ... ");
dbContext.Database.Migrate();
Console.WriteLine(" done");
dbContext.ChangeTracker.AutoDetectChangesEnabled = false;
EntityA entity;
int count = 0;
int batchSize = 1000;
Stopwatch watch = new Stopwatch();
watch.Start();
while (true)
{
entity = CalculateNextEntity();
if (entity == null) break;
count++;
dbContext.Add(entity);
if (count == batchSize)
{
count = 0;
dbContext.SaveChanges();
}
Console.WriteLine($"{entity.Id} {entity.ColA} {entity.ColB} {entity.EntityB.Col} {entity.EntityC.Col}");
}
dbContext.SaveChanges();
Console.WriteLine("---");
int entityCount = dbContext.EntityA.CountAsync().Result;
Console.WriteLine($"inserted {entityCount} within {watch.ElapsedMilliseconds} milliseconds");
}
}
}
}
请注意,这是完整的工作代码。复制并粘贴到控制台应用程序 Program.cs 中,它会编译并运行。所需的核桃:
- Microsoft.EntityFrameworkCore 3.1.0
- Microsoft.EntityFrameworkCore.Relational 3.1.0
- Microsoft.EntityFrameworkCore.SqlServer 3.1.0
- Microsoft.EntityFrameworkCore.Tools 3.1.0
解决方案
在我看来,最好避免 EF 插入如此数量的行。最好尝试使用 DataTable 使用批量插入:
using (SqlConnection connection = new SqlConnection(connString))
{
SqlBulkCopy bulkCopy = new SqlBulkCopy(
connection,
SqlBulkCopyOptions.TableLock |
SqlBulkCopyOptions.FireTriggers |
SqlBulkCopyOptions.UseInternalTransaction,
null
);
bulkCopy.DestinationTableName = tableName;
connection.Open();
bulkCopy.WriteToServer(dataTable);
connection.Close();
}
然后只需为所有实体重用上述代码。
它有一些优点:
- 无需编写不必要的代码,例如方法
BuildEntityB
- 批量插入比 EF 更快。它非常快。
- 上面的代码是高度可重用的。您可以插入任何实体,而无需创建需要实体框架的辅助代码
推荐阅读
- c# - 设置没有名称属性的元素的内容
- c# - if(!bool) 的真正含义是什么?
- docker - 如何将现有的 Kubernetes 集群与在 WSL2 上运行的 Ubuntu 连接起来?
- python - 使用用于使用 Python 进行身份验证的两个密码之一登录到多个服务器
- java - 实际应用中的反射
- android - 使用导航组件时,DialogFragment 不会关闭
- ionic-framework - 在 Ionic 中显示预先格式化的 HTML 内容
- jquery - 无法在jquery carousal中显示动态数据
- python-3.x - 在 python 3.7 中创建文件对象
- python - 如何使用 pywin32 从 msproject 中的任务使用中提取数据?