Start with the basics. Install GNU Emacs, and pytest and pytest-watch.
We’re ready.
Red
First, a red test:
import pytest
from stuff import Atom, Vector
import uuid
from our_json import dumps
hydrogen = Atom('H', (1.00784, 'Da'), 1, ['proton', 'electron'])
i = Vector(1, 0)
the_id = uuid.UUID('af281d69-6cca-49f5-b119-abccd46bb32c')
@pytest.mark.parametrize('obj, string', [
(hydrogen, '{"__type__": "Atom", "value": {"name": "H", "mass": [1.00784, "Da"], "atomic_number": 1, "constituents": ["proton", "electron"]}}'),
(i, '{"__type__": "Vector", "value": {"x": 1, "y": 0}}' ),
(the_id, '{"__type__": "UUID", "value": "af281d69-6cca-49f5-b119-abccd46bb32c"}'),
])
def test_dumps(obj, string):
assert dumps(obj) == stringThis is the data we will be using.
import dataclasses
@dataclasses.dataclass
class Atom:
name: str
mass: tuple
atomic_number: int
constituents: list[str]
@dataclasses.dataclass
class Vector:
x: int
y: intAnd this is the target of our tests, the thing we need to change to have a green light.
import json
import typing
def to_dict(obj: typing.Any) -> dict:
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)
See step-1.
Green
To change tests to green, start with the simplest thing, which, admitelly, is often ifs. Look:
import json
import typing
from stuff import Atom, Vector
import dataclasses
import uuid
def to_dict(obj: typing.Any) -> dict:
if isinstance(obj, Atom):
return {'__type__': 'Atom', 'value': dataclasses.asdict(obj)}
elif isinstance(obj, Vector):
return {'__type__': 'Vector', 'value': dataclasses.asdict(obj)}
elif isinstance(obj, uuid.UUID):
return {'__type__': 'UUID', 'value': str(obj)}
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)
See step-2.
Refactor
So now we have our safety belt to start refactoring.
Start factoring out the repeated code. In this case, the common pattern that all types are using to serialize:
import json
import typing
from stuff import Atom, Vector
import dataclasses
import uuid
def to_dict(obj: typing.Any) -> dict:
def type_value(t, v):
return {'__type__': t, 'value': v}
if isinstance(obj, Atom):
return type_value('Atom', dataclasses.asdict(obj))
elif isinstance(obj, Vector):
return type_value('Vector', dataclasses.asdict(obj))
elif isinstance(obj, uuid.UUID):
return type_value('UUID', str(obj))
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)
See step-3.
Next, we extract the ifs and put it in a dispatch table.
import json
import typing
from stuff import Atom, Vector
import dataclasses
import uuid
def type_value(t, v):
return {'__type__': t, 'value': v}
registry = {
Atom.__name__: lambda obj: type_value('Atom', dataclasses.asdict(obj)),
Vector.__name__: lambda obj: type_value('Vector', dataclasses.asdict(obj)),
uuid.UUID.__name__: lambda obj: type_value('UUID', str(obj)),
}
def to_dict(obj: typing.Any) -> dict:
method = registry.get(type(obj).__name__, None)
if method is not None:
return method(obj)
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)
See step-4.
We now decouple dispatch table registry of its actual content and populate it via the register function.
import json
import typing
from stuff import Atom, Vector
import dataclasses
import uuid
def type_value(t, v):
return {'__type__': t, 'value': v}
registry = {
}
def register(type_, fun):
registry[type_.__name__] = fun
register(Atom, lambda obj: type_value('Atom', dataclasses.asdict(obj)))
register(Vector, lambda obj: type_value('Vector', dataclasses.asdict(obj)))
register(uuid.UUID, lambda obj: type_value('UUID', str(obj)))
def to_dict(obj: typing.Any) -> dict:
method = registry.get(type(obj).__name__, None)
if method is not None:
return method(obj)
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)
See step-5.
Now we can push the type_value formatting helper and hide it inside the register function.
import json
import typing
from stuff import Atom, Vector
import dataclasses
import uuid
registry = {
}
def register(type_, fun):
def type_value(t, v):
return {'__type__': t, 'value': v}
registry[type_.__name__] = lambda obj: type_value(type_.__name__, fun(obj))
register(Atom, dataclasses.asdict)
register(Vector, dataclasses.asdict)
register(uuid.UUID, str)
def to_dict(obj: typing.Any) -> dict:
method = registry.get(type(obj).__name__, None)
if method is not None:
return method(obj)
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)
See step-6.
We are ready to free the our_json module to know all the types it serializes.
import json
import typing
registry = {
}
def register(type_, fun):
def type_value(t, v):
return {'__type__': t, 'value': v}
registry[type_.__name__] = lambda obj: type_value(type_.__name__, fun(obj))
def to_dict(obj: typing.Any) -> dict:
method = registry.get(type(obj).__name__, None)
if method is not None:
return method(obj)
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)We need to change the test a bit, to register the types we want to serialize, from the client side code. (Code client of the dumps function).
import pytest
from stuff import Atom, Vector
import uuid
from our_json import dumps, register
import dataclasses
register(Atom, dataclasses.asdict)
register(Vector, dataclasses.asdict)
register(uuid.UUID, str)
hydrogen = Atom('H', (1.00784, 'Da'), 1, ['proton', 'electron'])
i = Vector(1, 0)
the_id = uuid.UUID('af281d69-6cca-49f5-b119-abccd46bb32c')
@pytest.mark.parametrize('obj, string', [
(hydrogen, '{"__type__": "Atom", "value": {"name": "H", "mass": [1.00784, "Da"], "atomic_number": 1, "constituents": ["proton", "electron"]}}'),
(i, '{"__type__": "Vector", "value": {"x": 1, "y": 0}}' ),
(the_id, '{"__type__": "UUID", "value": "af281d69-6cca-49f5-b119-abccd46bb32c"}'),
])
def test_dumps(obj, string):
assert dumps(obj) == string
See step-7.
To this point the code is ok from the architectural point, because the our_json module is decoupled from its clients and it’s functionality is complete.
Sugar
But we can make it a little bit more palatable adding more sugar.
First we enable the register to curry over its first argument (the type).
import json
import typing
registry = {
}
def register(type_, fun=None):
if fun is None:
return lambda f: register(type_, f)
def register_type(fun):
def type_value(t, v):
return {'__type__': t, 'value': v}
registry[type_.__name__] = lambda obj: type_value(type_.__name__, fun(obj))
register_type(fun)
def to_dict(obj: typing.Any) -> dict:
method = registry.get(type(obj).__name__, None)
if method is not None:
return method(obj)
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)
See step-8.
This change allows us to change the client code a bit and register the serializers in a curried form that now will seem a little bit akward.
import pytest
from stuff import Atom, Vector
import uuid
from our_json import dumps, register
import dataclasses
register(Atom)(dataclasses.asdict)
register(Vector)(dataclasses.asdict)
register(uuid.UUID)(str)
hydrogen = Atom('H', (1.00784, 'Da'), 1, ['proton', 'electron'])
i = Vector(1, 0)
the_id = uuid.UUID('af281d69-6cca-49f5-b119-abccd46bb32c')
@pytest.mark.parametrize('obj, string', [
(hydrogen, '{"__type__": "Atom", "value": {"name": "H", "mass": [1.00784, "Da"], "atomic_number": 1, "constituents": ["proton", "electron"]}}'),
(i, '{"__type__": "Vector", "value": {"x": 1, "y": 0}}' ),
(the_id, '{"__type__": "UUID", "value": "af281d69-6cca-49f5-b119-abccd46bb32c"}'),
])
def test_dumps(obj, string):
assert dumps(obj) == string
See step-9.
Next we resolve the akwarness, to a more expressive use of the register function, because it can be used as a decorator.
import pytest
from stuff import Atom, Vector
import uuid
from our_json import dumps, register
import dataclasses
@register(Atom)
def _(atom: Atom) -> dict:
return dataclasses.asdict(atom)
@register(Vector)
def _(v: Vector) -> dict:
return dataclasses.asdict(v)
@register(uuid.UUID)
def _(u: uuid.UUID) -> str:
return str(u)
hydrogen = Atom('H', (1.00784, 'Da'), 1, ['proton', 'electron'])
i = Vector(1, 0)
the_id = uuid.UUID('af281d69-6cca-49f5-b119-abccd46bb32c')
@pytest.mark.parametrize('obj, string', [
(hydrogen, '{"__type__": "Atom", "value": {"name": "H", "mass": [1.00784, "Da"], "atomic_number": 1, "constituents": ["proton", "electron"]}}'),
(i, '{"__type__": "Vector", "value": {"x": 1, "y": 0}}' ),
(the_id, '{"__type__": "UUID", "value": "af281d69-6cca-49f5-b119-abccd46bb32c"}'),
])
def test_dumps(obj, string):
assert dumps(obj) == string
See step-10.
And now we’re ready for the final enlightment 😊: create a dispatch decorator that converts the to_dict function to a generic function that:
- exposes a register function to register its instances
- dispatches to the right implementation by type during runtime
import json
import typing
registry = {
}
def register(type_, fun=None):
if fun is None:
return lambda f: register(type_, f)
def register_type(fun):
def type_value(t, v):
return {'__type__': t, 'value': v}
registry[type_.__name__] = lambda obj: type_value(type_.__name__, fun(obj))
register_type(fun)
def dispatch(fun):
def wrapper(obj: typing.Any) -> dict:
method = registry.get(type(obj).__name__, None)
if method is not None:
return method(obj)
else:
raise NotImplementedError(f"not implemented for {type(obj).__name__}")
wrapper.register = register
return wrapper
@dispatch
def to_dict(obj: typing.Any) -> dict:
method = registry.get(type(obj).__name__, None)
if method is not None:
return method(obj)
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)We register the instances directly with @to_dict.register:
import pytest
from stuff import Atom, Vector
import uuid
from our_json import dumps, to_dict
import dataclasses
@to_dict.register(Atom)
def _(atom: Atom) -> dict:
return dataclasses.asdict(atom)
@to_dict.register(Vector)
def _(v: Vector) -> dict:
return dataclasses.asdict(v)
@to_dict.register(uuid.UUID)
def _(u: uuid.UUID) -> str:
return str(u)
hydrogen = Atom('H', (1.00784, 'Da'), 1, ['proton', 'electron'])
i = Vector(1, 0)
the_id = uuid.UUID('af281d69-6cca-49f5-b119-abccd46bb32c')
@pytest.mark.parametrize('obj, string', [
(hydrogen, '{"__type__": "Atom", "value": {"name": "H", "mass": [1.00784, "Da"], "atomic_number": 1, "constituents": ["proton", "electron"]}}'),
(i, '{"__type__": "Vector", "value": {"x": 1, "y": 0}}' ),
(the_id, '{"__type__": "UUID", "value": "af281d69-6cca-49f5-b119-abccd46bb32c"}'),
])
def test_dumps(obj, string):
assert dumps(obj) == string
See step-11.
So now, we can cleen up the module an hide the registry and the register function inside the dispatch decorator.
import json
import typing
def dispatch(fun):
registry = {
}
def register(type_, fun=None):
if fun is None:
return lambda f: register(type_, f)
def register_type(fun):
def type_value(t, v):
return {'__type__': t, 'value': v}
registry[type_.__name__] = lambda obj: type_value(type_.__name__, fun(obj))
register_type(fun)
def wrapper(obj: typing.Any) -> dict:
method = registry.get(type(obj).__name__, None)
if method is not None:
return method(obj)
else:
raise NotImplementedError(f"not implemented for {type(obj).__name__}")
wrapper.register = register
return wrapper
@dispatch
def to_dict(obj: typing.Any) -> dict:
raise NotImplementedError(
"to_dict not implemented for type: {name}".format(
name=type(obj).__name__,
)
)
def dumps(obj: typing.Any, default=to_dict, **kwargs) -> str:
return json.dumps(obj, default=default, **kwargs)
See step-12.
At this point, the dispatch function can be removed from the our_json module, because its totally generic, but at this point the exercise is completed and instead of doing this, we can use the functools.singledispatch decorator to achive this.
Also, functools.singledispatch has a lot of more sugar and niceties, like infering the type to register from the function signature. Take a look at it.