Updating index do not respect get_queryset method
safwanrahman opened this issue · 7 comments
While populating ES index, it calls get_queryset
from the DocType
. So if I overwrite the get_queryset
method and filter the queryset to index only specific kind of object, it works perfectly only when I run from the management command (rebuild
or populate
)
But, while new objects get created it does not respect that queryset. So though I add a filter in the get_queryset
method, all objects get indexed.
Below is the example
# models.py
class Foo(model.Model):
name = models.CharField(max_length=100)
age = models.IntegerField()
# documents.py
@foo_index.doc_type
class FooDocument(DocType):
class Meta:
model = Foo
fields = ('name', 'age')
def get_queryset(self):
queryset = super().get_queryset()
return queryset.filter(age__gt=10)
So while running ./manage.py search_index --rebuild
, it indexes only the object that have age more than 10. But If we add another Foo
object with age=9
, it also get indexed.
Hey, do you have any workaround for that?
///Edit:
I'm using django-elasticsearch-dsl-drf and overwriting "get_queryset" method in ViewSet allows adding always used filters.
class UserViewSet(BaseDocumentViewSet):
def get_queryset(self):
queryset = self.search.query("match", is_staff=False)
queryset.model = self.document._doc_type.model
return queryset
@karpie28 in a DocType:
@INDEX.doc_type
class Document(es.DocType):
...
def update(self, thing, refresh=None, action='index', **kwargs):
if isinstance(thing, models.Model) and not thing.is_public:
if action == "index":
action = "delete"
return super(Document, self).update(thing, refresh, action, **kwargs)
def get_queryset(self):
return super(Document, self).get_queryset().exclude(is_public=False)
@karpie28 in a DocType:
@INDEX.doc_type class Document(es.DocType): ... def update(self, thing, refresh=None, action='index', **kwargs): if isinstance(thing, models.Model) and not thing.is_public: if action == "index": action = "delete" return super(Document, self).update(thing, refresh, action, **kwargs) def get_queryset(self): return super(Document, self).get_queryset().exclude(is_public=False)
This may result in 404 when saving a non-public instance that does not exist in the index.
@csdenboer true, instead of:
....
def update(self, thing, refresh=None, action='index', **kwargs):
if isinstance(thing, models.Model) and not thing.is_public:
if action == "index":
action = "delete"
return super(Document, self).update(thing, refresh, action, **kwargs)
try
....
def update(self, thing, refresh=None, action="index", **kwargs):
if isinstance(thing, models.Model) and action == "index":
if not thing.is_public:
return super(Document, self).update(thing, refresh, action="delete", raise_on_error=False, **kwargs)
return super(Document, self).update(thing, refresh, action, **kwargs)
Yeah or
def update(self, thing, refresh=None, action='index', **kwargs):
if isinstance(thing, models.Model) and not thing.is_public and action == "index":
action = "delete"
kwargs = {**kwargs, 'raise_on_error': False}
return super(Document, self).update(thing, refresh, action, **kwargs)```
@csdenboer does that imply ignore_signals = True
?
Fix #301 is cool but doesn't handle the deletion of indexed objects that should not be anymore after save.
I had to stay with this :
def update(self, thing, refresh=None, action='index', **kwargs):
if isinstance(thing, models.Model) and action == "index" and not self.get_queryset().filter(pk=thing.pk).exists():
action = "delete"
kwargs = {**kwargs, 'raise_on_error': False}
return super().update(thing, refresh, action, **kwargs)