python - 使用 elasticsearch dsl 索引数据时出错
问题描述
我有两个模型,如下所示:
class PostUser(models.Model):
user_id = models.CharField(max_length=1000,blank=True,null=True)
reputation = models.CharField(max_length = 1000 , blank = True , null = True)
def __unicode__(self):
return self.user_id
def indexing(self):
obj = PostUserIndex(
meta = {'id': self.id},
user_id = self.user_id,
reputation = self.reputation,
)
obj.save(index = 'post-user-index')
return obj.to_dict(include_meta=True)
class Posts(models.Model):
user_post_id = models.CharField(max_length = 1000 , blank = True , null = True)
score = models.CharField(max_length = 1000 , blank = True , null = True)
owner_user_id = models.ForeignKey(PostUser,default="-100")
def __unicode__(self):
return self.user_post_id
def indexing(self):
obj = PostsIndex(
meta = {'id': self.id},
user_post_id = self.user_post_id,
score = self.score,
owner_user_id = self.owner_user_id,
)
obj.save(index = 'newuserposts-index')
return obj.to_dict(include_meta=True)
我试图索引我的数据的方式如下:
class PostUserIndex(DocType):
user_id = Text()
reputation = Text()
class PostsIndex(DocType):
user_post_id = Text()
score = Text()
owner_user_id = Nested(PostUserIndex)
然后我尝试运行以下方法来索引数据:
def posts_indexing():
PostsIndex.init(index='newuserposts-index')
es = Elasticsearch()
bulk(client=es, actions=(b.indexing() for b in models.Posts.objects.all().iterator()))
我通过手动输入嵌套属性以及从 doctype 更改为 PostUser 的内部 doc 尝试了不同的方法,但我仍然收到奇怪的错误。
错误:
AttributeError:“PostUser”对象没有属性“副本”
Traceback (most recent call last):
File "<console>", line 1, in <module>
File "/Users/ammarkhan/Desktop/danny/src/dataquerying/datatoindex.py", line 74, in new_user_posts_indexing
bulk(client=es, actions=(b.indexing() for b in models.Posts.objects.all().iterator()))
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 257, in bulk
for ok, item in streaming_bulk(client, actions, **kwargs):
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 180, in streaming_bulk
client.transport.serializer):
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch/helpers/__init__.py", line 58, in _chunk_actions
for action, data in actions:
File "/Users/ammarkhan/Desktop/danny/src/dataquerying/datatoindex.py", line 74, in <genexpr>
bulk(client=es, actions=(b.indexing() for b in models.Posts.objects.all().iterator()))
File "/Users/ammarkhan/Desktop/danny/src/dataquerying/models.py", line 167, in indexing
obj.save(index = 'newuserposts-index')
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/document.py", line 405, in save
self.full_clean()
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/utils.py", line 417, in full_clean
self.clean_fields()
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/utils.py", line 403, in clean_fields
data = field.clean(data)
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 179, in clean
data = super(Object, self).clean(data)
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 90, in clean
data = self.deserialize(data)
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 86, in deserialize
return self._deserialize(data)
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 166, in _deserialize
return self._wrap(data)
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/field.py", line 142, in _wrap
return self._doc_class.from_es(data)
File "/Users/ammarkhan/Desktop/danny/lib/python2.7/site-packages/elasticsearch_dsl/utils.py", line 342, in from_es
meta = hit.copy()
AttributeError: ‘PostUser' object has no attribute 'copy'
解决方案
您正在调用将文档保存到 elasticsearch.save
的indexing
方法,然后您也将其传递bulk
以完成相同的操作,这save
是额外的。
您还分配了一个 to 的实例,PostUser
而不是通过调用它的方法(没有内部)owner_user_id
来正确序列化它:indexing
save
def indexing(self):
obj = PostsIndex(
meta = {'id': self.id, 'index': 'newuserposts-index'},
user_post_id = self.user_post_id,
score = self.score,
owner_user_id = self.owner_user_id.indexing(),
)
return obj.to_dict(include_meta=True)
推荐阅读
- python - 继续获取 ptrace:尝试附加到 docker 容器中的进程时不允许操作
- jquery - 将 postgreSQL 服务器连接到 webapp 中的 DataTables.JS alt 编辑器
- reactjs - 用于反应括号间距的更漂亮的配置
- regex - 如何批量重命名许多文件以在字符串中添加顺序编号,如果前面的文本更改则重新启动?
- javascript - 如何将 javascript 数组转换为 json 格式以作为来自 REST api 的响应
- html - 我无法在命令行中选择 Firebase 目录
- node.js - POSTMAN 挂起在 express/Nodejs 应用程序中发送请求
- python - 'DataFrame' 对象没有属性 'str'
- c++ - 如何将存档写入在内存中创建的磁盘?
- html - 如何将项目保持在html页面的中心