首页 > 解决方案 > 上传 CSV 文件时优化 bulk_create 的更好方法,与连接模型相关的记录?

问题描述

我有以下看法:

### VIEW ###

def project(request, project_id):
    creator_id = request.user.id

    # Get project
    project = get_object_or_404(Project, pk=project_id, creator_id=creator_id)

    # Get all backers
    all_backers = project.backers.all().distinct()

    # CSV UPLOAD
    csvform = csvUploadForm()
    if request.method == 'POST':
        csvform = csvUploadForm(request.POST, request.FILES)
        if csvform.is_valid():
            csvform = csvform.save(commit=False)
            csvform.for_project_id = project_id
            csvform.creator_id = creator_id
            csvform.save()

            # READ CSV
            csv_file = csvform.csv
            
            data_set = csv_file.read().decode('UTF-8')
            io_string = io.StringIO(data_set)
            next(io_string)
            
            backers_list = csv.reader(io_string, delimiter=',', quotechar='"')
        
            objs = [
                Backer(
                    backer_number=row[0],
                    backer_uid=row[1],
                    name=row[2],
                    email=row[3],
                    shipping_country=row[4],
                    shipping_amount=row[5],
                    reward_title=row[6],
                    backing_minimum=row[7],
                    reward_id=row[8],
                    pledge_amount=row[9],
                    rewards_sent=row[10],
                    pledged_status=row[11],
                    notes=row[12],
                    pid=project_id
                )
                for row in backers_list
            ]
            try:
                msg = Backer.objects.bulk_create(objs)
                returnmsg = {"status_code": 200}
                print('imported successfully')
            except Exception as e:
                print('Error While Importing Data: ',e)
                returnmsg = {"status_code": 500}


            # Add backer_id and project_id in Backed 
            new_backers = Backer.objects.filter(pid=project_id)

            backed_ids = [
                Backed(
                    backer_id=bid.id,
                    project_id=project_id
                )
                for bid in new_backers
            ]
            try:
                msg = Backed.objects.bulk_create(backed_ids)
                returnmsg = {"status_code": 200}
                print('imported successfully')
            except Exception as e:
                print('Error While Importing Data: ',e)
                returnmsg = {"status_code": 500}

            return redirect('project', project_id=project_id)
    else:
        csvform = csvUploadForm()


...

通过这个视图,我正在上传一个 CSV 文件,创建一个列表并使用 bulk_create 将其推送到模型(Backer),以便在有很多记录时加快处理速度。但是,为了让我的整个代码在当前状态下正常工作,我需要使用联结模型(Backed)。

到目前为止,一切都按预期工作;在使用 bulk_create 将 csv 记录上传到模型后(处理大约 3500 行和 15 列的整个列表大约需要 2 秒),我使用相同的方法在联结模型中建立关系。但是,当我为联结模型添加第二部分时,处理这么多记录的时间增加到大约 10 秒。

我对 Django 比较陌生,我不确定这是否是 3500 行的正常处理时间。为了优化这个机制,你有什么建议吗?以下是此特定功能的模型:

class Project(models.Model):
    backers = models.ManyToManyField(to='backers.Backer', through='backers.Backed', related_name="backers")
    title = models.CharField(max_length=160)
    description = models.TextField()
    image = ProcessedImageField(upload_to=upload_to, processors=[SmartResize(290, 290)], format='JPEG', options={'quality': 100}, null=True, blank=True)
    funding = models.IntegerField()
    goal = models.IntegerField()
    backers_count = models.IntegerField(null=True, blank=True)
    started = models.DateField()
    ended = models.DateField()
    created_on = models.DateField(default=datetime.now)
    creator = models.ForeignKey(User, on_delete=models.CASCADE, blank=True, null=True)

    def __str__(self):
        return self.title

class Backer(models.Model):
    pid = models.IntegerField(blank=True, null=True)
    backer_number = models.CharField(max_length=160, blank=True, null=True)
    backer_uid = models.CharField(max_length=160, blank=True, null=True)
    name = models.CharField(max_length=200, blank=True, null=True)
    email = models.CharField(max_length=200, blank=True, null=True)
    shipping_country = models.CharField(max_length=200, blank=True, null=True)
    shipping_amount = models.CharField(max_length=200, blank=True, null=True)
    reward_title = models.CharField(max_length=300, blank=True, null=True)
    backing_minimum = models.CharField(max_length=200, blank=True, null=True)
    reward_id = models.CharField(max_length=200, blank=True, null=True)
    pledge_amount = models.CharField(max_length=200, blank=True, null=True)
    pledged_at = models.CharField(max_length=200, blank=True, null=True)
    rewards_sent = models.CharField(max_length=200, blank=True, null=True)
    pledged_status = models.CharField(max_length=200, blank=True, null=True)
    notes = models.TextField(blank=True, null=True)
    created_on = models.DateField(default=datetime.now)

class Backed(models.Model):
    backer = models.ForeignKey(Backer, on_delete=models.CASCADE, related_name="backedbacker")
    project = models.ForeignKey(Project, on_delete=models.CASCADE, related_name="backedproject")

如果您需要一些额外的信息,请告诉我。任何提示将不胜感激。

谢谢

标签: djangodjango-modelsdjango-viewsdjango-orm

解决方案


推荐阅读