首页 > 解决方案 > EF Core:插入大量数据

问题描述

是的, .NET / EF Core 中已经存在批量插入优化问题。但这个问题是不同的。


我遇到了一个场景,我必须创建一个数据库并用处理/计算/生成的数据填充它。记录的数量在 1 亿到 2 亿条记录之间。

由于播种数据需要很长时间,因此我无法在启动时(网络 api)进行。所以我创建了一个单独的控制台应用程序来迁移数据库并为其播种。

它仍然需要很长时间(我什至还没有完成一次播种)。

我可以在主要方法中优化的其他任何内容(除了删除目前仅用于调试的控制台日志)。


using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Infrastructure;
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Metadata.Builders;
using Microsoft.EntityFrameworkCore.Migrations;
using System;
using System.Diagnostics;

namespace EFCoreBulkInsert
{

    class EntityA
    {

        public int Id { get; set; }
        public int ColA { get; set; }
        public string ColB { get; set; }

        public EntityB EntityB { get; set; }
        public EntityC EntityC { get; set; }

    }

    class EntityB
    {

        public int Id { get; set; }
        public string Col { get; set; }

        public EntityA EntityA { get; set; }

    }

    class EntityC
    {

        public int Id { get; set; }
        public int Col { get; set; }

        public EntityA EntityA { get; set; }

    }

    class MyDbContext : DbContext
    {

        public DbSet EntityA { get; set; }
        public DbSet EntityB { get; set; }
        public DbSet EntityC { get; set; }

        public MyDbContext(DbContextOptions options) : base(options) { }

        protected override void OnModelCreating(ModelBuilder builder)
        {
            builder.Entity(BuildEntityA);
            builder.Entity(BuildEntityB);
            builder.Entity(BuildEntityC);
        }

        protected virtual void BuildEntityA(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityA")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                .UseIdentityColumn(1, 1)
                .Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore);

            entity.Property(e => e.ColA)
                .HasColumnName("ColA")
                .IsRequired();

            entity.HasIndex(e => e.ColA)
                .HasName("IX_EntityA_ColA");

            entity.Property(e => e.ColB)
                .HasColumnName("ColB")
                .IsRequired();

            entity.HasIndex(e => e.ColB)
                .HasName("IX_EntityA_ColB");

            entity.HasOne(e => e.EntityB)
                .WithOne(e => e.EntityA);

            entity.HasOne(e => e.EntityC)
                .WithOne(e => e.EntityA);

        }

        protected virtual void BuildEntityB(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityB")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                //.UseIdentityColumn(1, 1)
                //.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
                ;

            entity.Property(e => e.Col)
                .HasColumnName("Col")
                .IsRequired();

            entity.HasIndex(e => e.Col)
                .HasName("IX_EntityB_Col");

            entity.HasOne(e => e.EntityA)
                .WithOne(e => e.EntityB)
                .HasForeignKey(e => e.Id)
                .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                .OnDelete(DeleteBehavior.Cascade);

        }

        protected virtual void BuildEntityC(EntityTypeBuilder entity)
        {

            entity
                .ToTable("EntityC")
                .HasKey(e => e.Id);

            entity.Property(e => e.Id)
                .HasColumnName("Id")
                //.UseIdentityColumn(1, 1)
                //.Metadata.SetBeforeSaveBehavior(PropertySaveBehavior.Ignore)
                ;

            entity.Property(e => e.Col)
                .HasColumnName("Col")
                .IsRequired();

            entity.HasIndex(e => e.Col)
                .HasName("IX_EntityC_Col");

            entity.HasOne(e => e.EntityA)
                .WithOne(e => e.EntityC)
                .HasForeignKey(e => e.Id)
                .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                .OnDelete(DeleteBehavior.Cascade);

        }

    }

    [DbContext(typeof(MyDbContext))]
    partial class MyDbContextModelSnapshot : ModelSnapshot
    {
        protected override void BuildModel(ModelBuilder modelBuilder)
        {
#pragma warning disable 612, 618
            modelBuilder
                .HasAnnotation("ProductVersion", "3.1.0")
                .HasAnnotation("Relational:MaxIdentifierLength", 128)
                .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

            modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
            {
                b.Property("Id")
                    .ValueGeneratedOnAdd()
                    .HasColumnName("Id")
                    .HasColumnType("int")
                    .HasAnnotation("SqlServer:IdentityIncrement", 1)
                    .HasAnnotation("SqlServer:IdentitySeed", 1)
                    .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

                b.Property("ColA")
                    .HasColumnName("ColA")
                    .HasColumnType("int");

                b.Property("ColB")
                    .IsRequired()
                    .HasColumnName("ColB")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("ColA")
                    .HasName("IX_EntityA_ColA");

                b.HasIndex("ColB")
                    .HasName("IX_EntityA_ColB");

                b.ToTable("EntityA");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .IsRequired()
                    .HasColumnName("Col")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityB_Col");

                b.ToTable("EntityB");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .HasColumnName("Col")
                    .HasColumnType("int");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityC_Col");

                b.ToTable("EntityC");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityB")
                    .HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
                    .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityC")
                    .HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
                    .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });
#pragma warning restore 612, 618
        }
    }

    public partial class Initial : Migration
    {
        protected override void Up(MigrationBuilder migrationBuilder)
        {
            migrationBuilder.CreateTable(
                name: "EntityA",
                columns: table => new
                {
                    Id = table.Column(nullable: false)
                        .Annotation("SqlServer:Identity", "1, 1"),
                    ColA = table.Column(nullable: false),
                    ColB = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityA", x => x.Id);
                });

            migrationBuilder.CreateTable(
                name: "EntityB",
                columns: table => new
                {
                    Id = table.Column(nullable: false),
                    Col = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityB", x => x.Id);
                    table.ForeignKey(
                        name: "FK_EntityB-Id_EntityA-Id",
                        column: x => x.Id,
                        principalTable: "EntityA",
                        principalColumn: "Id",
                        onDelete: ReferentialAction.Cascade);
                });

            migrationBuilder.CreateTable(
                name: "EntityC",
                columns: table => new
                {
                    Id = table.Column(nullable: false),
                    Col = table.Column(nullable: false)
                },
                constraints: table =>
                {
                    table.PrimaryKey("PK_EntityC", x => x.Id);
                    table.ForeignKey(
                        name: "FK_EntityC-Id_EntityA-Id",
                        column: x => x.Id,
                        principalTable: "EntityA",
                        principalColumn: "Id",
                        onDelete: ReferentialAction.Cascade);
                });

            migrationBuilder.CreateIndex(
                name: "IX_EntityA_ColA",
                table: "EntityA",
                column: "ColA");

            migrationBuilder.CreateIndex(
                name: "IX_EntityA_ColB",
                table: "EntityA",
                column: "ColB");

            migrationBuilder.CreateIndex(
                name: "IX_EntityB_Col",
                table: "EntityB",
                column: "Col");

            migrationBuilder.CreateIndex(
                name: "IX_EntityC_Col",
                table: "EntityC",
                column: "Col");
        }

        protected override void Down(MigrationBuilder migrationBuilder)
        {
            migrationBuilder.DropTable(
                name: "EntityB");

            migrationBuilder.DropTable(
                name: "EntityC");

            migrationBuilder.DropTable(
                name: "EntityA");
        }
    }

    [DbContext(typeof(MyDbContext))]
    [Migration("20200103182223_Initial")]
    partial class Initial
    {
        protected override void BuildTargetModel(ModelBuilder modelBuilder)
        {
#pragma warning disable 612, 618
            modelBuilder
                .HasAnnotation("ProductVersion", "3.1.0")
                .HasAnnotation("Relational:MaxIdentifierLength", 128)
                .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

            modelBuilder.Entity("EFCoreBulkInsert.EntityA", b =>
            {
                b.Property("Id")
                    .ValueGeneratedOnAdd()
                    .HasColumnName("Id")
                    .HasColumnType("int")
                    .HasAnnotation("SqlServer:IdentityIncrement", 1)
                    .HasAnnotation("SqlServer:IdentitySeed", 1)
                    .HasAnnotation("SqlServer:ValueGenerationStrategy", SqlServerValueGenerationStrategy.IdentityColumn);

                b.Property("ColA")
                    .HasColumnName("ColA")
                    .HasColumnType("int");

                b.Property("ColB")
                    .IsRequired()
                    .HasColumnName("ColB")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("ColA")
                    .HasName("IX_EntityA_ColA");

                b.HasIndex("ColB")
                    .HasName("IX_EntityA_ColB");

                b.ToTable("EntityA");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .IsRequired()
                    .HasColumnName("Col")
                    .HasColumnType("nvarchar(450)");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityB_Col");

                b.ToTable("EntityB");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.Property("Id")
                    .HasColumnName("Id")
                    .HasColumnType("int");

                b.Property("Col")
                    .HasColumnName("Col")
                    .HasColumnType("int");

                b.HasKey("Id");

                b.HasIndex("Col")
                    .HasName("IX_EntityC_Col");

                b.ToTable("EntityC");
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityB", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityB")
                    .HasForeignKey("EFCoreBulkInsert.EntityB", "Id")
                    .HasConstraintName("FK_EntityB-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });

            modelBuilder.Entity("EFCoreBulkInsert.EntityC", b =>
            {
                b.HasOne("EFCoreBulkInsert.EntityA", "EntityA")
                    .WithOne("EntityC")
                    .HasForeignKey("EFCoreBulkInsert.EntityC", "Id")
                    .HasConstraintName("FK_EntityC-Id_EntityA-Id")
                    .OnDelete(DeleteBehavior.Cascade)
                    .IsRequired();
            });
#pragma warning restore 612, 618
        }
    }

    class Program
    {

        static int EntityCount = 0;
        const int EntityLimit = 200_000_000;

        /// 
        /// Just a fake factory method mimicking the real procedural generation of the db records
        /// 
        /// The next enity to store or null if no more entities can be calculated.
        static EntityA CalculateNextEntity()
        {
            if (EntityCount >= EntityLimit) return null;
            EntityCount++;

            return new EntityA
            {
                // id is generated by the db
                ColA = EntityCount,
                ColB = EntityCount.ToString(),

                EntityB = new EntityB
                {
                    // id is generated by the db
                    Col = EntityCount.ToString()
                },

                EntityC = new EntityC
                {
                    // id is generated by the db
                    Col = EntityCount
                }
            };
        }

        static void Main(string[] args)
        {

            string connectionString = "Server=(localdb)\\mssqllocaldb;Database=TestDB;Trusted_Connection=True;MultipleActiveResultSets=true";

            DbContextOptionsBuilder builder = new DbContextOptionsBuilder();
            builder.UseSqlServer(connectionString);

            using (MyDbContext dbContext = new MyDbContext(builder.Options))
            {

                Console.Write("migrating ... ");

                dbContext.Database.Migrate();

                Console.WriteLine(" done");


                dbContext.ChangeTracker.AutoDetectChangesEnabled = false;

                EntityA entity;

                int count = 0;
                int batchSize = 1000;

                Stopwatch watch = new Stopwatch();
                watch.Start();

                while (true)
                {
                    entity = CalculateNextEntity();
                    if (entity == null) break;

                    count++;

                    dbContext.Add(entity);

                    if (count == batchSize)
                    {
                        count = 0;
                        dbContext.SaveChanges();
                    }

                    Console.WriteLine($"{entity.Id} {entity.ColA} {entity.ColB} {entity.EntityB.Col} {entity.EntityC.Col}");

                }

                dbContext.SaveChanges();
                Console.WriteLine("---");

                int entityCount = dbContext.EntityA.CountAsync().Result;
                Console.WriteLine($"inserted {entityCount} within {watch.ElapsedMilliseconds} milliseconds");

            }


        }
    }
}

请注意,这是完整的工作代码。复制并粘贴到控制台应用程序 Program.cs 中,它会编译并运行。所需的核桃:

标签: c#entity-framework-core

解决方案


在我看来,最好避免 EF 插入如此数量的行。最好尝试使用 DataTable 使用批量插入:

using (SqlConnection connection = new SqlConnection(connString))
{
    SqlBulkCopy bulkCopy = new SqlBulkCopy(
        connection, 
        SqlBulkCopyOptions.TableLock | 
        SqlBulkCopyOptions.FireTriggers | 
        SqlBulkCopyOptions.UseInternalTransaction,
        null
        );    

    bulkCopy.DestinationTableName = tableName;
    connection.Open();
    bulkCopy.WriteToServer(dataTable);
    connection.Close();
}

然后只需为所有实体重用上述代码。

它有一些优点:

  • 无需编写不必要的代码,例如方法BuildEntityB
  • 批量插入比 EF 更快。它非常快。
  • 上面的代码是高度可重用的。您可以插入任何实体,而无需创建需要实体框架的辅助代码

推荐阅读