ProgrammingError on Group By over annotated field in Django ORM using mssql-django connector
slegt opened this issue · 4 comments
Title:
ProgrammingError on Group By over annotated field in Django ORM using mssql-django connector
Software versions
- Django: 4.2.7
- mssql-django: 1.3
- python: 3.10.9
- SQL Server: Microsoft SQL Server 2019 (RTM-CU23) (KB5030333) - 15.0.4335.1 (X64)
- OS: Windows 11 22H2
Table schema and Model
class Book(models.Model):
title = models.CharField(max_length=100)
pages = models.IntegerField(default=0)`
Database Connection Settings
mssql = {
'ENGINE': 'mssql',
'NAME': 'TestDB',
'USER': 'sa',
'PASSWORD': 'yourStrong(!)Password',
'HOST': '127.0.0.1',
'PORT': '1433',
# tested with 18 and 17
'OPTIONS': {
'driver': 'ODBC Driver 18 for SQL Server',
'extra_params': "Encrypt=no;TrustServerCertificate=yes;",
}
}
DATABASES = {
'default': mssql
}
Problem description and steps to reproduce
If i use this annotated view
def bug_view(request):
queryset = Book.objects.all()
print(f"query: {queryset.query}")
#annotation
annotated_queryset = queryset.annotate(large=Case(
When(pages__gt=650, then=Value("expensive")),
default=Value("not_expensive"),
output_field=CharField()))
print(f"annotated_query: {annotated_queryset.query}")
grouped_and_annotated_queryset = annotated_queryset.values('large').annotate(sum=models.Sum('pages'))
print(f"grouped_and_annotated_query: {grouped_and_annotated_queryset.query}")
return HttpResponse(grouped_and_annotated_queryset)
where I first annotate a field and then group over it, I get an SQL ProgrammingError (full stack trace below):
django.db.utils.ProgrammingError: ('42000', "[42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]Column 'books_book.pages' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause. (8120) (SQLExecDirectW); [42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]Statement(s) could not be prepared. (8180)")
The printed queries are
query: SELECT [books_book].[id], [books_book].[title], [books_book].[pages] FROM [books_book]
annotated_query: SELECT [books_book].[id], [books_book].[title], [books_book].[pages], CASE WHEN [books_book].[pages] > 650 THEN expensive ELSE not_expensive END AS [large] FROM [books_book]
grouped_and_annotated_query: SELECT CASE WHEN [books_book].[pages] > 650 THEN expensive ELSE not_expensive END AS [large], SUM([books_book].[pages]) AS [sum] FROM [books_book] GROUP BY CASE WHEN [books_book].[pages] > 650 THEN expensive ELSE not_expensive END
Which strangely seem correct. I managed to execute them via Pycharms Database tool.
Expected behavior and actual behavior
Query should execute normally but raises an error.
Error message/stack trace
Error
Traceback (most recent call last):
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\backends\utils.py", line 89, in _execute
return self.cursor.execute(sql, params)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\mssql\base.py", line 621, in execute
return self.cursor.execute(sql, params)
pyodbc.ProgrammingError: ('42000', "[42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]Column 'books_book.pages' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause. (8120) (SQLExecDirectW); [42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]Statement(s) could not be prepared. (8180)")
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\slegt\PycharmProjects\djangoProject\books\tests.py", line 9, in test_bug_view
response = client.get("/books/bug")
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\test\client.py", line 927, in get
response = super().get(path, data=data, secure=secure, headers=headers, **extra)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\test\client.py", line 457, in get
return self.generic(
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\test\client.py", line 609, in generic
return self.request(**r)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\test\client.py", line 891, in request
self.check_exception(response)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\test\client.py", line 738, in check_exception
raise exc_value
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\core\handlers\exception.py", line 55, in inner
response = get_response(request)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\core\handlers\base.py", line 197, in _get_response
response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "C:\Users\slegt\PycharmProjects\djangoProject\books\views.py", line 23, in bug_view
return HttpResponse(grouped_and_annotated_queryset)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\http\response.py", line 376, in __init__
self.content = content
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\http\response.py", line 401, in content
content = b"".join(self.make_bytes(chunk) for chunk in value)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\models\query.py", line 398, in __iter__
self._fetch_all()
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\models\query.py", line 1881, in _fetch_all
self._result_cache = list(self._iterable_class(self))
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\models\query.py", line 208, in __iter__
for row in compiler.results_iter(
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\models\sql\compiler.py", line 1513, in results_iter
results = self.execute_sql(
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\models\sql\compiler.py", line 1562, in execute_sql
cursor.execute(sql, params)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\backends\utils.py", line 67, in execute
return self._execute_with_wrappers(
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\backends\utils.py", line 80, in _execute_with_wrappers
return executor(sql, params, many, context)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\backends\utils.py", line 84, in _execute
with self.db.wrap_database_errors:
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\utils.py", line 91, in __exit__
raise dj_exc_value.with_traceback(traceback) from exc_value
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\django\db\backends\utils.py", line 89, in _execute
return self.cursor.execute(sql, params)
File "C:\Users\slegt\PycharmProjects\djangoProject\venv\lib\site-packages\mssql\base.py", line 621, in execute
return self.cursor.execute(sql, params)
django.db.utils.ProgrammingError: ('42000', "[42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]Column 'books_book.pages' is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause. (8120) (SQLExecDirectW); [42000] [Microsoft][ODBC Driver 18 for SQL Server][SQL Server]Statement(s) could not be prepared. (8180)")
Any other details that can be helpful
On other native supported databases everything works as intended, so it should be a connector-related issue.
Hi @slegt, thanks for bringing this up. We don't have annotated querysets that are grouped fully implemented yet.
Hi @slegt. I hit the same issue. The query seems correct, and executed correctly when pasted in sql server directly. What I realized is that the problem was with the query having parameters. So, in my case:
SELECT count(1) , case when name = 'john' then 1 else 2 end from TABLE group by case when name = 'john' then 1 else 2 end;
works, however:
SELECT count(1) , case when name = ? then 1 else 2 end from TABLE group by case when name = ? then 1 else 2 end;
with params ("john", "john") fails.
A workaround I found was to use pymmsql to replace the params and execute the query without parameters:
from pymssql._mssql import substitute_params
class CustomCursorWrapper(base.CursorWrapper):
def replace_params(self, sql, params):
# This is copied from the original format_sql
if self.driver_charset and isinstance(sql, str):
sql = smart_str(sql, self.driver_charset)
if params:
sql = substitute_params(sql, tuple(params)).decode('utf8')
return sql
def execute(self, sql, params=None):
self.last_sql = sql
sql = self.replace_params(sql, params)
self.last_params = params
try:
# We don't send parameters, because they are all in the query now.
return self.cursor.execute(sql, tuple())
except pyodbc.Error as e:
self.connection._on_error(e)
raise
I know it is a dirty workaround, but it worked for me.
@dauinsight @mShan0
I have been using this as a workaround:
from pymssql._mssql import substitute_params
def _fix_query(query):
# For Django's inspectdb tests -- a model has a non-ASCII column name.
if not isinstance(query, str):
query = query.encode('utf-8')
# For Django's backends and expressions_regress tests.
query = query.replace('%%', '%')
return query
from django.utils import timezone
def _fix_value(value):
if isinstance(value, datetime.datetime):
if timezone.is_aware(value):
value = timezone.make_naive(value, timezone.utc)
return value
def _fix_params(params):
if params is not None:
params = tuple(_fix_value(value) for value in params)
return params
And this changes to CursorWrapper
def replace_params(self, sql, params):
if self.driver_charset and isinstance(sql, str):
sql = smart_str(sql, self.driver_charset)
if params:
sql = substitute_params(sql, tuple(params)).decode('utf8')
return sql
def execute(self, sql, params=None):
self.last_sql = sql
self.last_params = params
try:
sql = self.format_sql(self.last_sql, params)
params = self.format_params(self.last_params)
return self.cursor.execute(sql, params)
except (Database.DataError, Database.ProgrammingError) as e:
sql = self.replace_params(_fix_query(self.last_sql), _fix_params(self.last_params))
return self.cursor.execute(sql, tuple())
except Database.Error as e:
self.connection._on_error(e)
raise
The idea is that if using pyodbc we cannot execute the query with params, we use pymssql substitute_params to send a query without params. The query without params, works. If SQL Server is not configured to abort a transaction in case of error, this "retry" is usually harmless and works.
Maybe the dirty hack, configurable so the user can decide to use it or not, could be a way to workaround all these issues related to group by?
Hi @slegt, we've implemented this fix in our latest release. Running the query you provided works as expected now.