When the optional ordering = ... attribute of a Django Model's Meta class contains a GenericRelation from the Content Types framework, there is no way to eliminate duplicate items being returned, even when using .distinct() (since .order_by(...) is applied by the ORM only after the SQL SELECT DISTINCT clause is built)

Duplicate items when ordering by Generic Relation in Django

When the optional ordering = ... attribute of a Django Model's Meta class contains a GenericRelation from the Content Types framework, there is no way to eliminate duplicate items being returned, even when using .distinct() (since .order_by(...) is applied by the ORM only after the SQL SELECT DISTINCT clause is built).

This problem is current as of Django 1.2 and has no generic solution as far as I know.

This problem will manifest when two or more different Models instances are attached to instances of the Model with the GenericForeignKey. In the following example, we have Model X which has a GenericForeignKey, and two Models, Y and Z, which have GenericRelations to X and ordering based on these relations.

1
2
3
4
5
from django.db.models import CharField
from django.contrib.contenttypes.models import \
    ContentType
from django.contrib.contenttypes.generic import \
    GenericForeignKey, GenericRelation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class X(Model):
    """
    This Model has a unique name and is attached to
    another Model through a Generic relation.
    """

    # The unique name of this X.
    name = CharField(u'name', unique = True,
        max_length = 128, null = False)

    # The Content Type of the generic relation to the
    # attached content.
    content_type = models.ForeignKey(ContentType,
        null = True, blank = True,
        related_name = 'x_contents')

    # The ID of the attached content.
    content_id = models.PositiveIntegerField(
        null = True, blank = True)

    # The attached content.
    content = GenericForeignKey('content_type',
        'content_id')

    def __unicode__(self):
        """
        The textual representation of this Model is
        its name.
        """
        return self.name

    class Meta:
        # order by X's name
        ordering = ['name', ]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class Y(Model):
    """
    This Model acts as content for an X, and has a
    convenient reverse Generic Relation to X.
    """

    # The unique name of this Y.
    name = CharField(u'name', null = False,
        unique = True, max_length = 128)

    # The X this Y is attached to.
    xs = GenericRelation(X,
        content_type_field = 'content_type',
        object_id_field = 'content_id')

    def __unicode__(self):
        """
        The textual representation of this Model is
        its name.
        """
        return self.name

    class Meta:
        # order by X's name
        ordering = ['xs__name', ]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class Z(Model):
    """
    This Model acts as content for an X, and has a
    convenient reverse Generic Relation to X.
    """

    # The unique name of this Z.
    name = CharField(u'name', null = False,
        unique = True, max_length = 128)

    # The X this Z is attached to.
    xs = GenericRelation(X,
        content_type_field = 'content_type',
        object_id_field = 'content_id')

    def __unicode__(self):
        """
        The textual representation of this Model is
        its name.
        """
        return self.name

    class Meta:
        # order by X's name
        ordering = ['xs__name', ]

The following sequence of statements demonstrates this problem:

x1 = X.objects.create(name = 'first x')
x2 = X.objects.create(name = 'second x')
x3 = X.objects.create(name = 'third x')
x4 = X.objects.create(name = 'fourth x')
y1 = Y.objects.create(name = 'first y')
y2 = Y.objects.create(name = 'second y')
z1 = Z.objects.create(name = 'first z')
z2 = Z.objects.create(name = 'second z')
x1.content = y1
x1.save()
x2.content = z1
x2.save()
x3.content = y2
x3.save()
x4.content = z2
x4.save()
X.objects.all()
[<X: first x>, <X: fourth x>, <X: second x>, <X: third x>]
Y.objects.all()
[<Y: first y>, <Y: first y>, <Y: second y>]

Notice that 'first y' appears twice. The SQL COUNT statement will not have any ordering applied, and so returns the correct value:

Y.objects.count()
2

However, when ordering is applied, the duplicate instance is present:

len(Y.objects.all())
3

This problem will manifest when two or more different Models' instances are attached to instances of the Model with the GenericForeignKey. In the following example, we have Model X which has a GenericForeignKey, and two Models Y and Z which have GenericRelations to X and ordering based on these relations.

1
2
3
4
5
from django.db.models import CharField
from django.contrib.contenttypes.models import \
    ContentType
from django.contrib.contenttypes.generic import \
    GenericForeignKey, GenericRelation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class X(Model):
    """
    This Model has a unique name and is attached to
    another Model through a Generic Relation.
    """

    # The unique name of this X.
    name = CharField(u'name', null = False,
        unique = True, max_length = 128)

    # The Content Type of the generic relation to
    # the attached content.
    content_type = models.ForeignKey(ContentType,
        null = True, blank = True,
        related_name = 'x_contents')

    # The ID of the attached content.
    content_id = models.PositiveIntegerField(
        null = True, blank = True)

    # The attached content.
    content = GenericForeignKey('content_type',
        'content_id')

    def __unicode__(self):
        """
        The textual representation of this Model is
        its name.
        """
        return self.name

    class Meta:
        # order by X's name
        ordering = ['name', ]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class Y(Model):
    """
    This Model acts as content for an X, and has a
    convenient reverse Generic Relation to X.
    """

    # The unique name of this Y.
    name = CharField(u'name', null = False,
        unique = True, max_length = 128)

    # The X this Y is attached to.
    xs = GenericRelation(X,
        content_type_field = 'content_type',
        object_id_field = 'content_id')

    def __unicode__(self):
        """
        The textual representation of this Model is
        its name.
        """
        return self.name

    class Meta:
        # order by X's name
        ordering = ['xs__name', ]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class Z(Model):
    """
    This Model acts as content for an X, and has a
    convenient reverse Generic Relation to X.
    """

    # The unique name of this Z.
    name = CharField(u'name', null = False,
        unique = True, max_length = 128)

    # The X this Z is attached to.
    xs = GenericRelation(X,
        content_type_field = 'content_type',
        object_id_field = 'content_id')

    def __unicode__(self):
        """
        The textual representation of this Model is
        its name.
        """
        return self.name

    class Meta:
        # order by X's name
        ordering = ['xs__name', ]

The following sequence of statements demonstrates this problem:

x1 = X.objects.create(name = 'first x')
x2 = X.objects.create(name = 'second x')
x3 = X.objects.create(name = 'third x')
x4 = X.objects.create(name = 'fourth x')

y1 = Y.objects.create(name = 'first y')
y2 = Y.objects.create(name = 'second y')

z1 = Z.objects.create(name = 'first z')
z2 = Z.objects.create(name = 'second z')

x1.content = y1
x1.save()

x2.content = z1
x2.save()

x3.content = y2
x3.save()

x4.content = z2
x4.save()

X.objects.all()
[<X: first x>, <X: fourth x>, <X: second x>, <X: third x>]

Y.objects.all()
[<Y: first y>, <Y: first y>, <Y: second y>]

Notice that 'first y' appears twice.

Y.objects.count()
2

The SQL COUNT statement will not have any ordering applied, and so returns the correct value.

len(Y.objects.all())
3