# package imports
# why all the try excepts?
# because weird stuff happens
# when this code is executed from a jupyter
# notebook vs as a module vs as __main__
from functools import partial
from datetime import date as Date
import asyncio
import typing as T
import sqlite3
import pkg_resources
try:
from graphql_example.logging_utilities import *
except ModuleNotFoundError:
from logging_utilities import *
try:
from graphql_example.on_startup import (
configure_logging,
configure_database,
create_tables,
seed_db
)
except ModuleNotFoundError:
from on_startup import (
configure_logging,
configure_database,
create_tables,
seed_db
)
try:
from graphql_example.on_cleanup import drop_tables, close_db
except:
from on_cleanup import drop_tables, close_db
try:
from graphql_example.db_queries import fetch_authors, fetch_books
except ModuleNotFoundError:
from db_queries import fetch_authors, fetch_books
from graphql.execution.executors.asyncio import AsyncioExecutor
from aiohttp_graphql import GraphQLView
from aiohttp import web
import markdown
preferably within a virtualenv:
pip3 install graphql-example
(after pip install)
graphql_example runserver
git clone https://github.com/knowsuchagency/graphql-example
pip3 install .[dev]
fab test
Really Genesis, shortly thereafter followed by SOAP
Let's say we had a simple data model such as the following:
class Author:
first_name: str
last_name: str
age: int
books: T.Optional[T.List['Book']]
class Book:
title: str
author: Author
published: Date
In a RESTful server, the endpoints used to retrieve that data might look something like this:
/rest/author/{id}
/rest/authors?limit=5?otherParam=value
/rest/book/{id}
/rest/books?author="Jim"
Well, let's say on the client-side, we wanted to retrieve a set of authors using a query like
/rest/authors?age=34
We may get a structure like the following
[
{
"first_name": "Amy",
"last_name: "Jones",
"age": 34,
"books": [{...}]
} ...
]
However, maybe we're unconcerned with the books
field and all that extra information seems to be slowing down our page load.
To mitigate this problem, maybe we add another filter to the authors endpoint i.e. /rest/authors?age=34&no_books=true
Great, so now we get something like the following
[
{
"first_name": "Amy",
"last_name: "Jones",
"age": 34,
} ...
]
But...
- What happens if we type
..?no_books=True
,..?no_books=TRUE
, or..?no_books=t
?
In general, how do we define and interpret the arguments passed as query params on the server side?
Furthermore, how to we enforce that contract with our clients and give them helpful feedback when mistakes are made?
Also, what happens if we have a similar query to the one above, where we ARE interested in the book
field for a given author, but only a subset of that data, such as book.title
excluding all other data in the book
field?
Seems hardly worth creating another filter. We'll probably just suck it up and retrieve all the data in the book field and ignore what isn't needed on the client side.
- How do we handle different data types and serialization between the client server for complex data types?
- How do we ensure that our rest api documenation is up-to-date? How do we ensure it's documented at all?
- How do we communicate to the client what data can be retrieved from the server?
Standards
Frameworks
Buzz
But really, GraphQL is first-and-foremost a declarative language specification for client-side data fetching.
And, despite the snark, the buzz is important. GraphQL is created and backed by Facebook, and there is a rapidly growing community and ecosystem of libraries that make GraphQL compelling over other standards like JSON-API, Falcor, or OData.
A GraphQL-compliant server will be able to tell what information can be exchanged with the client and how, in a way that is more expressive and provides more guarantees of correctness than REST.
Still, it's up to the backend engineer to correctly implement the spec, so it's not magic.
On the server-side, having the client describe the specific shape of data it wants can allow the server to make smarter decisions as to how to serve that data i.e. re-shaping SQL expressions or batching database queries.
On the client-side, it's great just to be able to introspect what's available and have a well-defined way of communicating with the server. Since the server will communicate what type of data can be sent/received, the client doesn't need to worry that the api documentation isn't up-to-date or doesn't exist.
We're going to implement a backend server that has a couple RESTful endpoints and one written in GraphQL to demonstrate our earlier points.
The framework we'll use to build our server will be aiohttp, which works on top of asyncio. This will allow us to write our views
or controllers
(depending on how one interprets MVC) using co-routines. This means requests can be processed asynchronously without resorting to muti-tenant architecture or multi-threading. We can write asynchronous code and leverage the full power of asyncio's event loop. Cool.
aiohttp also allows one to add views to routes using decorators or explicitly i.e.
from aiohttp import web
app = web.Application()
routes = web.RouteTableDef()
@routes.get('/get')
async def index(request):
...
#or
app.router.add_route('GET', '/', index)
# or
app.router.add_get('/', index)
template = """
# Welcome to the example page
## Rest routes
### Authors
For an individual author:
[/rest/author/{{id}}](/rest/author/1)
To query authors:
[/rest/authors?age={{number}}&limit={{another_number}}](/rest/authors?limit=3)
The following can be passed as query parameters to authors:
limit: The amount of results you wish to be limited to
age: The age of the author, as an integer
first_name: The first name of the author as a string
last_name: The last name of the author as a string
no_books: Removes the {{books}} field from the resulting authors
### Books
For an individual book:
[/rest/book/{{id}}](/rest/book/1)
To query books:
[/rest/books?author_id={{number}}&limit={{another_number}}](/rest/books?limit=3)
The following can be passed as query parameters to books:
limit: The amount of results you wish to be limiited to
title: The title of the book
published: The date published in the following format %m/%d/%Y
author_id: The uuid of the author in the database
## [GraphQL](/graphql)
"""
html = markdown.markdown(template)
#@routes.get('/')
async def index(request):
# this logging sexiness is a talk for another time
# but it's a thin wrapper around eliot.start_action
with log_request(request):
response = web.Response(
text=html,
content_type='Content-Type: text/html'
)
with log_response(response):
return response
These define the logic for the restful routes we'll create on our application i.e.
For a single resource:
/rest/author/{id}
/rest/book/{id}
Or based on url query parameters:
/rest/authors?age=42&no_books=true
/rest/books?author_id=3&limit=5
async def author(request):
"""Return a single author for a given id."""
connection = request.app['connection']
with log_request(request):
try:
# when using co-routines, it's important that each co-routine be non-blocking
# meaning no individual action will take a long amount of time, preventing
# the event loop from letting other co-routines execute
# since our database query may not immediately return, we run it
# in an "executor" (a thread pool) and await for it to finish
# functools.partial allows us to create a callable that
# bundles necessary positional and keyword arguments with it
# in a way that is pickle-able https://docs.python.org/3/library/pickle.html
# i.e. `print('hello', end=' ') == partial(print, 'hello', end= ' ')()`
db_query = partial(
fetch_authors, connection, id=int(request.match_info['id']))
# * unpacks an arbitrary number of tuples (see pep-3132)
author, *_ = await request.loop.run_in_executor(None, db_query)
except ValueError:
author = None
if not author:
log_message('Author not found', id=request.match_info['id'])
raise web.HTTPNotFound
response = web.json_response(author)
with log_response(response):
return response
async def book(request):
"""Return a single book for a given id."""
connection = request.app['connection']
with log_request(request):
try:
db_query = partial(
fetch_books, connection, id=int(request.match_info['id']))
book, *_ = await request.loop.run_in_executor(None, db_query)
except ValueError:
book = None
if not book:
log_message('Book not found', id=request.match_info['id'])
raise web.HTTPNotFound
response = web.json_response(book)
with log_response(response):
return response
async def books(request):
"""Return json response of books based on query params."""
connection = request.app['connection']
with log_request(request):
# parse values from query params
title = request.query.get('title')
published = request.query.get('published')
author_id = request.query.get('author_id')
limit = int(request.query.get('limit', 0))
query_db = partial(fetch_books,
request.app['connection'],
title=title,
published=published,
author_id=author_id,
limit=limit)
query_db_task = request.loop.run_in_executor(None, query_db)
books: T.List[dict] = await query_db_task
response = web.json_response(books)
with log_response(response):
return response
async def authors(request):
"""Return json response of authors based on query params."""
connection = request.app['connection']
with log_request(request):
# parse values from query params
first_name = request.query.get('first_name')
last_name = request.query.get('last_name')
age = None or int(request.query.get('age', 0))
limit = int(request.query.get('limit', 0))
# client may not want/need book information
no_books = str(request.query.get('no_books','')).lower().startswith('t')
query_db = partial(
fetch_authors,
request.app['connection'],
first_name=first_name,
last_name=last_name,
age=age,
limit=limit,
no_books=no_books
)
query_db_task = request.loop.run_in_executor(None, query_db)
authors: T.List[dict] = await query_db_task
response = web.json_response(authors)
with log_response(response):
return response
So before, we saw an example of how you might implement a REST api where each resource i.e.
/rest/author/{id}
/rest/authors? ...
GraphQL is conceptually very different. The idea is that instead mapping your URL's to your data and implenting queries and mutations through a combination of URL query params and HTTP verbs, your entire API is exposed via a single URL.
Requests to this url will have a GraphQL query either in the body of the HTTP request, or in the query
url parameter. This GraphQL query will tell the server what data to fetch or change.
The GraphQL server, in turn, exposes the data as a vertex-edge or node-link graph where the root node
has a special name, query
which is left out of graphql queries as its' implicitly there, in the
same way https://google.com
implicitly has the root DNS node contained https://google.com.
Each resource we create, such as an author
(objects in GraphQL parlance) will have an associated resolver function that exposes that object on the graph.
So concretely, what does this look like in-practice?
Well, on the client side hitting http://localhost:8080/rest/authors?limit=3&no_books=true
may return something like
[
{
"id": 1,
"first_name": "Willene",
"last_name": "Whitaker",
"age": 26
},
{
"id": 2,
"first_name": "Cedric",
"last_name": "Williams",
"age": 52
},
{
"id": 3,
"first_name": "Kaila",
"last_name": "Snider",
"age": 33
}
]
If we wanted to get the same information using graphql, we'd start by writing our query.
{
authors(limit: 3) {
id
first_name
last_name
age
}
}
In a scenario where we're using the excellent httpie client and we have the above query bound to the query
variable, we'd retreive the information as such:
http :8080/graphql "query=$query"
Since we're using graphene, we'll first need to use the library to create a schema describing our data.
This schema will then tell the library how to implement the GraphQL-compliant endpoint view.
import graphene as g
from graphql.execution.executors.asyncio import AsyncioExecutor
class Author(g.ObjectType):
"""This is a human being."""
id = g.Int(description='The primary key in the database')
first_name = g.String()
last_name = g.String()
age = g.Int()
# we can't use g.List(Book)
# directly since it's not
# yet defined
books = g.List(lambda: Book)
class Book(g.ObjectType):
"""A book, written by an author."""
id = g.Int(description='The primary key in the database')
title = g.String(description='The title of the book')
published = g.String(description='The date it was published')
author = g.Field(Author)
Once we've defined our schema, we need to expose that information by adding our objects as fields on the graph and writing resolver functions for those fields that describe out to fetch the actual data we want.
Remember that the root node of our graph is an object with the name query
.
async def configure_graphql(app):
"""
Since our resolvers depend on the app's db connection, this
co-routine must execute after that part of the application
is configured
"""
connection = app['connection']
class Query(g.ObjectType):
author = g.Field(Author)
book = g.Field(Book)
authors = g.List(
Author,
# the following will be passed as named
# arguments to the resolver function.
# Don't ask why; it took me forever to
# figure it out. Despite its functionality,
# graphene's documentation leaves a lot to be desired
id=g.Int(),
first_name=g.String(),
last_name=g.String(),
age=g.Int(),
limit=g.Int(
description='The amount of results you wish to be limited to'))
books = g.List(
Book,
id=g.Int(),
title=g.String(),
published=g.String(),
author_id=g.Int(
description='The unique ID of the author in the database'),
limit=g.Int(description='The amount of results you with to be limited to'))
async def resolve_books(self,
info,
id=None,
title=None,
published=None,
author_id=None,
limit=None):
query_db = partial(
fetch_books,
connection,
id=id,
title=title,
published=published,
author_id=author_id,
limit=limit)
fetched = await app.loop.run_in_executor(None, query_db)
books = []
for book_dict in fetched:
author = Author(
id=book_dict['author']['id'],
first_name=book_dict['author']['first_name'],
last_name=book_dict['author']['last_name'],
age=book_dict['author']['age'])
book = Book(
id=book_dict['id'],
title=book_dict['title'],
published=book_dict['published'],
author=author)
books.append(book)
return books
async def resolve_authors(self,
info,
id=None,
first_name=None,
last_name=None,
age=None,
limit=None):
query_db = partial(
fetch_authors,
connection,
id=id,
first_name=first_name,
last_name=last_name,
age=age,
limit=limit)
fetched = await app.loop.run_in_executor(None, query_db)
authors = []
for author_dict in fetched:
books = [
Book(id=b['id'], title=b['title'], published=b['published'])
for b in author_dict['books']
]
author = Author(
id=author_dict['id'],
first_name=author_dict['first_name'],
last_name=author_dict['last_name'],
age=author_dict['age'],
books=books)
authors.append(author)
return authors
schema = g.Schema(query=Query, auto_camelcase=False)
# create the view
executor = AsyncioExecutor(loop=app.loop)
gql_view = GraphQLView(schema=schema,
executor=executor,
graphiql=True,
enable_async=True
)
# attach the view to the app router
app.router.add_route(
'*',
'/graphql',
gql_view,
)
def app_factory(*args, db=':memory:', logfile='log.json', **config_params):
# initialize app
app = web.Application()
# set top-level configuration
app['config'] = {k: v
for k, v in config_params.items()
if v is not None}
if db:
app['config']['db'] = db
if logfile:
app['config']['logfile'] = logfile
# startup
app.on_startup.append(configure_logging)
app.on_startup.append(configure_database)
app.on_startup.append(create_tables)
app.on_startup.append(seed_db)
app.on_startup.append(configure_graphql)
# example routes
app.router.add_get('/', index)
# rest routes
app.router.add_get('/rest/author/{id}', author)
app.router.add_get('/rest/authors', authors)
app.router.add_get('/rest/book/{id}', book)
app.router.add_get('/rest/books', books)
# cleanup
app.on_cleanup.append(drop_tables)
app.on_cleanup.append(close_db)
return app
if __name__ == '__main__':
app = app_factory()
web.run_app(app, host='127.0.0.1', port=8080)