django - 上传 CSV 文件时优化 bulk_create 的更好方法,与连接模型相关的记录?
问题描述
我有以下看法:
### VIEW ###
def project(request, project_id):
creator_id = request.user.id
# Get project
project = get_object_or_404(Project, pk=project_id, creator_id=creator_id)
# Get all backers
all_backers = project.backers.all().distinct()
# CSV UPLOAD
csvform = csvUploadForm()
if request.method == 'POST':
csvform = csvUploadForm(request.POST, request.FILES)
if csvform.is_valid():
csvform = csvform.save(commit=False)
csvform.for_project_id = project_id
csvform.creator_id = creator_id
csvform.save()
# READ CSV
csv_file = csvform.csv
data_set = csv_file.read().decode('UTF-8')
io_string = io.StringIO(data_set)
next(io_string)
backers_list = csv.reader(io_string, delimiter=',', quotechar='"')
objs = [
Backer(
backer_number=row[0],
backer_uid=row[1],
name=row[2],
email=row[3],
shipping_country=row[4],
shipping_amount=row[5],
reward_title=row[6],
backing_minimum=row[7],
reward_id=row[8],
pledge_amount=row[9],
rewards_sent=row[10],
pledged_status=row[11],
notes=row[12],
pid=project_id
)
for row in backers_list
]
try:
msg = Backer.objects.bulk_create(objs)
returnmsg = {"status_code": 200}
print('imported successfully')
except Exception as e:
print('Error While Importing Data: ',e)
returnmsg = {"status_code": 500}
# Add backer_id and project_id in Backed
new_backers = Backer.objects.filter(pid=project_id)
backed_ids = [
Backed(
backer_id=bid.id,
project_id=project_id
)
for bid in new_backers
]
try:
msg = Backed.objects.bulk_create(backed_ids)
returnmsg = {"status_code": 200}
print('imported successfully')
except Exception as e:
print('Error While Importing Data: ',e)
returnmsg = {"status_code": 500}
return redirect('project', project_id=project_id)
else:
csvform = csvUploadForm()
...
通过这个视图,我正在上传一个 CSV 文件,创建一个列表并使用 bulk_create 将其推送到模型(Backer),以便在有很多记录时加快处理速度。但是,为了让我的整个代码在当前状态下正常工作,我需要使用联结模型(Backed)。
到目前为止,一切都按预期工作;在使用 bulk_create 将 csv 记录上传到模型后(处理大约 3500 行和 15 列的整个列表大约需要 2 秒),我使用相同的方法在联结模型中建立关系。但是,当我为联结模型添加第二部分时,处理这么多记录的时间增加到大约 10 秒。
我对 Django 比较陌生,我不确定这是否是 3500 行的正常处理时间。为了优化这个机制,你有什么建议吗?以下是此特定功能的模型:
class Project(models.Model):
backers = models.ManyToManyField(to='backers.Backer', through='backers.Backed', related_name="backers")
title = models.CharField(max_length=160)
description = models.TextField()
image = ProcessedImageField(upload_to=upload_to, processors=[SmartResize(290, 290)], format='JPEG', options={'quality': 100}, null=True, blank=True)
funding = models.IntegerField()
goal = models.IntegerField()
backers_count = models.IntegerField(null=True, blank=True)
started = models.DateField()
ended = models.DateField()
created_on = models.DateField(default=datetime.now)
creator = models.ForeignKey(User, on_delete=models.CASCADE, blank=True, null=True)
def __str__(self):
return self.title
class Backer(models.Model):
pid = models.IntegerField(blank=True, null=True)
backer_number = models.CharField(max_length=160, blank=True, null=True)
backer_uid = models.CharField(max_length=160, blank=True, null=True)
name = models.CharField(max_length=200, blank=True, null=True)
email = models.CharField(max_length=200, blank=True, null=True)
shipping_country = models.CharField(max_length=200, blank=True, null=True)
shipping_amount = models.CharField(max_length=200, blank=True, null=True)
reward_title = models.CharField(max_length=300, blank=True, null=True)
backing_minimum = models.CharField(max_length=200, blank=True, null=True)
reward_id = models.CharField(max_length=200, blank=True, null=True)
pledge_amount = models.CharField(max_length=200, blank=True, null=True)
pledged_at = models.CharField(max_length=200, blank=True, null=True)
rewards_sent = models.CharField(max_length=200, blank=True, null=True)
pledged_status = models.CharField(max_length=200, blank=True, null=True)
notes = models.TextField(blank=True, null=True)
created_on = models.DateField(default=datetime.now)
class Backed(models.Model):
backer = models.ForeignKey(Backer, on_delete=models.CASCADE, related_name="backedbacker")
project = models.ForeignKey(Project, on_delete=models.CASCADE, related_name="backedproject")
如果您需要一些额外的信息,请告诉我。任何提示将不胜感激。
谢谢
解决方案
推荐阅读
- docker - Jenkins 无法在具有特定运行参数的声明性管道中启动 docker 代理容器
- java - 通知不起作用 - Android Studio
- python - 添加具有在两个数据帧上应用 UDF 的结果的列
- ssl - 使用 SSL 连接到已编目 Db2 数据库的 SQLDriverConnect 函数的正确连接字符串是什么?
- servicestack - 从连接表中选择 OrmLite 新对象进行插入
- mailchimp - Mailchimp (mandrill) 交易电子邮件:如何将自定义数据添加到电子邮件模板?
- python - 如何通过 Python 以管理员身份运行程序
- javascript - 如何同时计算 2 个字段?
- php - xampp的Argon2安装
- flutter - 应该将新的提供者更新生成器更改为创建,出现错误