2. Introducing valid_model
It includes:
- base class - Object
- basic descriptors - Integer, Float, DateTime, String, ...
- nesting descriptors - Dict, List, Set, EmbeddedObject
3. Most similar libraries are tightly integrated to a persistence layer:
SQLAlchemy, Django ORM, mongokit, etc.
Or are targeted at web forms:
Formencode, colander, deform
So the goal was to build a highly flexible unopinionated data modeling
library.
4. Some Use Cases
● Database data model
● Form validation
● Test fixtures
● API request/response objects
● Scrubbing and normalizing data
● Data migration
5. car = {
'make': None,
'model': None,
'doors': None,
'horsepower': None,
}
class Car(object):
def __init__(self, make=None, model=None, doors=None,
horsepower=None):
self.make = make
self.model = model
self.doors = doors
self.horsepower = horsepower
It is valid python to arbitrarily add new instance attributes in other methods, which can lead to
headaches (and pylint complaints)
6. At least I know the fields ahead of time but what datatypes are these attributes?
def horse_check(value):
if value == 1:
raise ValidationError('Is this powered by an actual horse?')
elif value <= 0:
raise ValidationError('Phantom horses?')
return True
class Car(Object):
make = String(nullable=False)
model = String()
doors = Integer(validator=lambda x: x<=5)
horsepower = Integer(validator=horse_check)
7. Nested Schemas is Easy
class Person(Object):
name = String(nullable=False)
homepage = String()
class BlogPost(Object):
title = String(nullable=False, mutator=lambda x: x.title())
updated = DateTime(nullable=False, default=datetime.utcnow)
published = DateTime()
author = EmbeddedObject(Person)
contributors = List(value=EmbeddedObject(Person), nullable=False)
tags = List(value=String(nullable=False), nullable=False)
def validate(self):
super(BlogPost, self).validate()
if self.published is not None and self.published > self.updated:
raise ValidationError('a post cannot be published at a later date
than it was updated')
post = BlogPost(title='example post', author={'name': 'Josh'}, tags=['tag1', 'tag2'])
>>> print post
{'updated': datetime.datetime(2014, 10, 7, 13, 43, 1, 960174),
'author': {'homepage': None, 'name': u'Josh'},
'contributors': [], 'title': u'Example Post', 'tags': [u'tag1', u'tag2'], 'published': None}
8. valid_model also provides something closer to strict typing
class Car(Object):
make = String(nullable=False)
model = String()
doors = Integer(validator=lambda x: x<=5)
horsepower = Integer(validator=horse_check)
>>> Car(doors='five')
valid_model.exc.ValidationError: 'five' is not an int
>>> Car(doors=10)
valid_model.exc.ValidationError: doors
>>> Car(horsepower=1)
valid_model.exc.ValidationError: Is this powered by an actual horse?
>>> Car(make=None)
valid_model.exc.ValidationError: make is not nullable
9. Normalize your data when it gets set
class HTTPAccessLog(Object):
code = Integer(nullable=False)
status = String(nullable=False, mutator=lambda x: x.upper())
timestamp = DateTime(default=datetime.utcnow)
def validate(self):
super(HTTPAccessLog, self).validate()
if not self.status.startswith(unicode(self.code)):
raise ValidationError('code and status do not match')
>>> ping = HTTPAccessLog()
>>> ping.code = 404
>>> ping.status = '404 not found'
>>> print ping
{'status': u'404 NOT FOUND', 'timestamp': datetime.datetime(2014, 10, 7, 13, 36, 15, 217678),
'code': 404}
10. Descriptors Tangent
Python descriptors are fancy attributes.
class SomeDescriptor(object):
def __get__(self, instance, klass=None):
….
def __set__(self, instance, value):
….
def __del__(self, instance):
….
class Foo(object):
b = SomeDescriptor()
11. @property Descriptors
@property is the most common
class Foo(object):
@property
def a(self):
return self._a
@a.setter
def a(self, value):
self._a = value
# Make an attribute readonly by not defining the setter.
@property
def readonly(self):
return self._private_var
#Lazily initialize or cache expensive calculations
@property
def expensive_func(self):
if self._result is None:
self._result = expensive_func()
return self._result
12. Customizing Descriptors is Easy
Extending existing descriptors works like subclassing anything else in python
class SuperDateTime(DateTime):
def __set__(self, instance, value):
if isinstance(value, basestring):
value = dateutils.parse(value)
elif isinstance(value, (int, float)):
value = datetime.utcfromtimestamp(value)
super(SuperDateTime, self).__set__(instance, value)
class Decimal(Generic):
def __set__(self, instance, value):
if not isinstance(value, decimal.Decimal):
raise ValidationError('{} is not a decimal'.format(self.name))
super(Decimal, self).__set__(instance, value)
13. Simple wrappers for persistence
An example of using MongoDB with Redis as a cache
class PersistBlogPost(object):
def __init__(self, mongo_collection, redis_conn):
...
def insert(self, post):
self.mongo_collection.insert(post.__json__())
def find(self, title):
post = self.redis_conn.get(title)
if post:
return pickle.loads(post)
else:
post = self.mongo_collection.find_one({'title': title})
if post:
post = BlogPost(**post)
self.redis_conn.set(title, pickle.dumps(post))
return post