#34255: Annotation/group by with an expression on psycopg3
-------------------------------------+-------------------------------------
               Reporter:  Guillaume  |          Owner:  nobody
  Andreu Sabater                     |
                   Type:  Bug        |         Status:  new
              Component:  Database   |        Version:  dev
  layer (models, ORM)                |       Keywords:  orm postgres
               Severity:  Normal     |  psycopg3 annotation groupby
           Triage Stage:             |      Has patch:  0
  Unreviewed                         |
    Needs documentation:  0          |    Needs tests:  0
Patch needs improvement:  0          |  Easy pickings:  0
                  UI/UX:  0          |
-------------------------------------+-------------------------------------
 Given the following code:

 {{{
 import zoneinfo

 from django.db import models
 from django.db.models.functions import ExtractYear

 from django.contrib.postgres.fields import ArrayField

 TZ = zoneinfo.ZoneInfo("Europe/Paris")


 class JsonBuildObject(models.Func):
     function = "jsonb_build_object"
     output_field = models.JSONField()


 class SubqueryArray(models.Subquery):
     template = "ARRAY(%(subquery)s)"
     output_field = ArrayField(base_field=models.JSONField())


 class CurveQuerySet(models.QuerySet["Curve"]):
     """Curve QuerySet."""

     def annotate_loads(self) -> "CurveQuerySet":
         """Annotate baseload by year."""
         baseload_qs = (
             Point.objects.filter(curve=models.OuterRef("pk"))
             .annotate(year=ExtractYear("start_at", tzinfo=TZ))
             .values("year")
             .alias(baseload=models.Avg("value"))
             .annotate(
                 json=JsonBuildObject(
                     models.Value("year"),
                     models.F("year"),
                     models.Value("baseload"),
                     models.F("baseload"),
                 )
             )
             .values("json")
         )

         return self.annotate(_baseloads=SubqueryArray(baseload_qs))


 CurveManager = models.Manager.from_queryset(CurveQuerySet)


 class Curve(models.Model):
     """Curve."""

     objects = CurveManager()


 class Point(models.Model):
     """Curve point."""

     curve = models.ForeignKey(
         Curve,
         on_delete=models.CASCADE,
         related_name="points",
         related_query_name="point",
     )
     start_at = models.DateTimeField()
     value = models.FloatField()

 }}}

 I use the ''annotate_loads'' to compute yearly averages (with
 .values("year") acting as a GROUP BY) and dump the results in a json
 field.

 With psycopg3, from what I've seen, the query params/values are not
 interpolated in the query anymore, but sent alongside the query to the
 server.

 In my case, it looks like this:

 {{{
 SELECT
     "fail_curve"."id",
     ARRAY(
         SELECT
             jsonb_build_object(
                 $1,
                 EXTRACT(
                     YEAR
                     FROM
                         U0."start_at" AT TIME ZONE $2
                 ),
                 $3,
                 AVG(U0."value")
             ) AS "json"
         FROM
             "fail_point" U0
         WHERE
             U0."curve_id" = ("fail_curve"."id")
         GROUP BY
             EXTRACT(
                 YEAR
                 FROM
                     U0."start_at" AT TIME ZONE $4
             )
     ) AS "_baseloads"
 FROM
     "fail_curve"
 WHERE
     "fail_curve"."id" = $5
 }}}

 But postgres doesn't like: django.db.utils.ProgrammingError: column
 "u0.start_at" must appear in the GROUP BY clause or be used in an
 aggregate function

 because

 {{{
 EXTRACT(
     YEAR
     FROM
         U0."start_at" AT TIME ZONE $2
 )
 }}}
 is different from
 {{{
 EXTRACT(
     YEAR
     FROM
         U0."start_at" AT TIME ZONE $4
 )
 }}}

 I tested an updated query using the same placeholder ($4 -> $2) and it
 worked as expected:

 {{{
 PREPARE working (text, text, text, int) AS
     SELECT
         "fail_curve"."id",
         ARRAY(
             SELECT
                 jsonb_build_object(
                     $1,
                     EXTRACT(
                         YEAR
                         FROM
                             U0."start_at" AT TIME ZONE $2
                     ),
                     $3,
                     AVG(U0."value")
                 ) AS "json"
             FROM
                 "fail_point" U0
             WHERE
                 U0."curve_id" = ("fail_curve"."id")
             GROUP BY
                 EXTRACT(
                     YEAR
                     FROM
                         U0."start_at" AT TIME ZONE $2
                 )
         ) AS "_baseloads"
     FROM
         "fail_curve"
     WHERE
         "fail_curve"."id" = $4
     LIMIT
         1;
 EXECUTE working('year', 'Europe/Paris', 'base', 1);
 }}}

 My understanding is as follow:
 * group by is an expression
 * this expression is also used in select
 * they have a different placeholder in the query generated by
 django/psycopg3
 * postgres rejects it

 Let me know if this needs extra details.

-- 
Ticket URL: <https://code.djangoproject.com/ticket/34255>
Django <https://code.djangoproject.com/>
The Web framework for perfectionists with deadlines.

-- 
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web visit 
https://groups.google.com/d/msgid/django-updates/01070185a5f12189-2b0bbb72-8412-4c9c-9c57-1576c1897a64-000000%40eu-central-1.amazonses.com.

Reply via email to