Refactor code structure for improved readability and maintainability

This commit is contained in:
claudi 2026-04-07 09:10:53 +02:00
parent 389d72a136
commit aa4c067ea8
1685 changed files with 393439 additions and 71932 deletions

View file

@ -0,0 +1,158 @@
import json
from warnings import warn
from .node import SchemaNode
from .strategies import BASIC_SCHEMA_STRATEGIES
class _MetaSchemaBuilder(type):
def __init__(cls, name, bases, attrs):
super().__init__(name, bases, attrs)
if 'EXTRA_STRATEGIES' in attrs:
schema_strategies = list(attrs['EXTRA_STRATEGIES'])
# add in all strategies inherited from base classes
for base in bases:
schema_strategies += list(getattr(base, 'STRATEGIES', []))
unique_schema_strategies = []
for schema_strategy in schema_strategies:
if schema_strategy not in unique_schema_strategies:
unique_schema_strategies.append(schema_strategy)
cls.STRATEGIES = tuple(unique_schema_strategies)
# create a version of SchemaNode loaded with the custom strategies
cls.NODE_CLASS = type('%sSchemaNode' % name, (SchemaNode,),
{'STRATEGIES': cls.STRATEGIES})
class SchemaBuilder(metaclass=_MetaSchemaBuilder):
"""
``SchemaBuilder`` is the basic schema generator class.
``SchemaBuilder`` instances can be loaded up with existing schemas
and objects before being serialized.
"""
DEFAULT_URI = 'http://json-schema.org/schema#'
NULL_URI = 'NULL'
NODE_CLASS = SchemaNode
STRATEGIES = BASIC_SCHEMA_STRATEGIES
def __init__(self, schema_uri='DEFAULT'):
"""
:param schema_uri: value of the ``$schema`` keyword. If not
given, it will use the value of the first available
``$schema`` keyword on an added schema or else the default:
``'http://json-schema.org/schema#'``. A value of ``False`` or
``None`` will direct GenSON to leave out the ``"$schema"``
keyword.
"""
if schema_uri is None or schema_uri is False:
self.schema_uri = self.NULL_URI
elif schema_uri == 'DEFAULT':
self.schema_uri = None
else:
self.schema_uri = schema_uri
if not issubclass(self.NODE_CLASS, SchemaNode):
raise TypeError("NODE_CLASS %r is not a subclass of SchemaNode"
% self.NODE_CLASS)
self._root_node = self.NODE_CLASS()
def add_schema(self, schema):
"""
Merge in a JSON schema. This can be a ``dict`` or another
``SchemaBuilder``
:param schema: a JSON Schema
.. note::
There is no schema validation. If you pass in a bad schema,
you might get back a bad schema.
"""
if isinstance(schema, SchemaBuilder):
schema_uri = schema.schema_uri
schema = schema.to_schema()
if schema_uri is None:
del schema['$schema']
elif isinstance(schema, SchemaNode):
schema = schema.to_schema()
if '$schema' in schema:
self.schema_uri = self.schema_uri or schema['$schema']
schema = dict(schema)
del schema['$schema']
self._root_node.add_schema(schema)
def add_object(self, obj):
"""
Modify the schema to accommodate an object.
:param obj: any object or scalar that can be serialized in JSON
"""
self._root_node.add_object(obj)
def to_schema(self):
"""
Generate a schema based on previous inputs.
:rtype: ``dict``
"""
schema = self._base_schema()
schema.update(self._root_node.to_schema())
return schema
def to_json(self, *args, **kwargs):
"""
Generate a schema and convert it directly to serialized JSON.
:rtype: ``str``
"""
return json.dumps(self.to_schema(), *args, **kwargs)
def __len__(self):
"""
Number of ``SchemaStrategy``s at the top level. This is used
mostly to check for emptiness.
"""
return len(self._root_node)
def __eq__(self, other):
"""
Check for equality with another ``SchemaBuilder`` object.
:param other: another ``SchemaBuilder`` object. Other types are
accepted, but will always return ``False``
"""
if other is self:
return True
if not isinstance(other, self.__class__):
return False
# use _base_schema to get proper comparison for $schema keyword
return (self._base_schema() == other._base_schema()
and self._root_node == other._root_node)
def _base_schema(self):
if self.schema_uri == self.NULL_URI:
return {}
else:
return {'$schema': self.schema_uri or self.DEFAULT_URI}
class Schema(SchemaBuilder):
def __init__(self):
warn('genson.Schema is deprecated in v1.0, and it may be '
'removed in future versions. Use genson.SchemaBuilder'
'instead.',
PendingDeprecationWarning)
super().__init__(schema_uri=SchemaBuilder.NULL_URI)
def to_dict(self, recurse='DEPRECATED'):
warn('#to_dict is deprecated in v1.0, and it may be removed in '
'future versions. Use #to_schema instead.',
PendingDeprecationWarning)
if recurse != 'DEPRECATED':
warn('the `recurse` option for #to_dict does nothing in v1.0',
DeprecationWarning)
return self.to_schema()

View file

@ -0,0 +1,140 @@
from .strategies import BASIC_SCHEMA_STRATEGIES, Typeless
class SchemaGenerationError(RuntimeError):
pass
class SchemaNode:
"""
Basic schema generator class. SchemaNode objects can be loaded
up with existing schemas and objects before being serialized.
"""
STRATEGIES = BASIC_SCHEMA_STRATEGIES
def __init__(self):
self._active_strategies = []
def add_schema(self, schema):
"""
Merges in an existing schema.
arguments:
* `schema` (required - `dict` or `SchemaNode`):
an existing JSON Schema to merge.
"""
# serialize instances of SchemaNode before parsing
if isinstance(schema, SchemaNode):
schema = schema.to_schema()
for subschema in self._get_subschemas(schema):
# delegate to SchemaType object
active_strategy = self._get_strategy_for_schema(subschema)
active_strategy.add_schema(subschema)
# return self for easy method chaining
return self
def add_object(self, obj):
"""
Modify the schema to accommodate an object.
arguments:
* `obj` (required - `dict`):
a JSON object to use in generating the schema.
"""
# delegate to SchemaType object
active_strategy = self._get_strategy_for_object(obj)
active_strategy.add_object(obj)
# return self for easy method chaining
return self
def to_schema(self):
"""
Convert the current schema to a `dict`.
"""
types = set()
generated_schemas = []
for active_strategy in self._active_strategies:
generated_schema = active_strategy.to_schema()
if len(generated_schema) == 1 and 'type' in generated_schema:
types.add(generated_schema['type'])
else:
generated_schemas.append(generated_schema)
if types:
if len(types) == 1:
(types,) = types
else:
types = sorted(types)
generated_schemas = [{'type': types}] + generated_schemas
if len(generated_schemas) == 1:
(result_schema,) = generated_schemas
elif generated_schemas:
result_schema = {'anyOf': generated_schemas}
else:
result_schema = {}
return result_schema
def __len__(self):
return len(self._active_strategies)
def __eq__(self, other):
""" Required for SchemaBuilder.__eq__ to work properly """
return (isinstance(other, self.__class__)
and self.__dict__ == other.__dict__)
# private methods
def _get_subschemas(self, schema):
if 'anyOf' in schema:
return [subschema for anyof in schema['anyOf']
for subschema in self._get_subschemas(anyof)]
elif isinstance(schema.get('type'), list):
other_keys = dict(schema)
del other_keys['type']
return [dict(type=tipe, **other_keys) for tipe in schema['type']]
else:
return [schema]
def _get_strategy_for_schema(self, schema):
return self._get_strategy_for_('schema', schema)
def _get_strategy_for_object(self, obj):
return self._get_strategy_for_('object', obj)
def _get_strategy_for_(self, kind, schema_or_obj):
# check existing types
for active_strategy in self._active_strategies:
if getattr(active_strategy, 'match_' + kind)(schema_or_obj):
return active_strategy
# check all potential types
for strategy in self.STRATEGIES:
if getattr(strategy, 'match_' + kind)(schema_or_obj):
active_strategy = strategy(self.__class__)
# incorporate typeless strategy if it exists
if self._active_strategies and \
isinstance(self._active_strategies[-1], Typeless):
typeless = self._active_strategies.pop()
active_strategy.add_schema(typeless.to_schema())
self._active_strategies.append(active_strategy)
return active_strategy
# no match found, if typeless add to first strategy
if kind == 'schema' and Typeless.match_schema(schema_or_obj):
if not self._active_strategies:
self._active_strategies.append(Typeless(self.__class__))
active_strategy = self._active_strategies[0]
return active_strategy
# no match found, raise an error
raise SchemaGenerationError(
'Could not find matching schema type for {0}: {1!r}'.format(
kind, schema_or_obj))

View file

@ -0,0 +1,37 @@
from .base import (
SchemaStrategy,
TypedSchemaStrategy
)
from .scalar import (
Typeless,
Null,
Boolean,
Number,
String
)
from .array import List, Tuple
from .object import Object
BASIC_SCHEMA_STRATEGIES = (
Null,
Boolean,
Number,
String,
List,
Tuple,
Object
)
__all__ = (
'SchemaStrategy',
'TypedSchemaStrategy',
'Null',
'Boolean',
'Number',
'String',
'List',
'Tuple',
'Object',
'Typeless',
'BASIC_SCHEMA_STRATEGIES'
)

View file

@ -0,0 +1,79 @@
from .base import SchemaStrategy
class BaseArray(SchemaStrategy):
"""
abstract array schema strategy
"""
KEYWORDS = ('type', 'items')
@staticmethod
def match_object(obj):
return isinstance(obj, list)
def to_schema(self):
schema = super().to_schema()
schema['type'] = 'array'
if self._items:
schema['items'] = self.items_to_schema()
return schema
class List(BaseArray):
"""
strategy for list-style array schemas. This is the default
strategy for arrays.
"""
@staticmethod
def match_schema(schema):
return schema.get('type') == 'array' \
and isinstance(schema.get('items', {}), dict)
def __init__(self, node_class):
super().__init__(node_class)
self._items = node_class()
def add_schema(self, schema):
super().add_schema(schema)
if 'items' in schema:
self._items.add_schema(schema['items'])
def add_object(self, obj):
for item in obj:
self._items.add_object(item)
def items_to_schema(self):
return self._items.to_schema()
class Tuple(BaseArray):
"""
strategy for tuple-style array schemas. These will always have
an items key to preserve the fact that it's a tuple.
"""
@staticmethod
def match_schema(schema):
return schema.get('type') == 'array' \
and isinstance(schema.get('items'), list)
def __init__(self, node_class):
super().__init__(node_class)
self._items = [node_class()]
def add_schema(self, schema):
super().add_schema(schema)
if 'items' in schema:
self._add(schema['items'], 'add_schema')
def add_object(self, obj):
self._add(obj, 'add_object')
def _add(self, items, func):
while len(self._items) < len(items):
self._items.append(self.node_class())
for subschema, item in zip(self._items, items):
getattr(subschema, func)(item)
def items_to_schema(self):
return [item.to_schema() for item in self._items]

View file

@ -0,0 +1,78 @@
from copy import copy
from warnings import warn
class SchemaStrategy:
"""
base schema strategy. This contains the common interface for
all subclasses:
* match_schema
* match_object
* __init__
* add_schema
* add_object
* to_schema
* __eq__
"""
KEYWORDS = ('type',)
@classmethod
def match_schema(cls, schema):
raise NotImplementedError("'match_schema' not implemented")
@classmethod
def match_object(cls, obj):
raise NotImplementedError("'match_object' not implemented")
def __init__(self, node_class):
self.node_class = node_class
self._extra_keywords = {}
def add_schema(self, schema):
self._add_extra_keywords(schema)
def _add_extra_keywords(self, schema):
for keyword, value in schema.items():
if keyword in self.KEYWORDS:
continue
elif keyword not in self._extra_keywords:
self._extra_keywords[keyword] = value
elif self._extra_keywords[keyword] != value:
warn(('Schema incompatible. Keyword {0!r} has conflicting '
'values ({1!r} vs. {2!r}). Using {1!r}').format(
keyword, self._extra_keywords[keyword], value))
def add_object(self, obj):
pass
def to_schema(self):
return copy(self._extra_keywords)
def __eq__(self, other):
""" Required for SchemaBuilder.__eq__ to work properly """
return (isinstance(other, self.__class__)
and self.__dict__ == other.__dict__)
class TypedSchemaStrategy(SchemaStrategy):
"""
base schema strategy class for scalar types. Subclasses define
these two class constants:
* `JS_TYPE`: a valid value of the `type` keyword
* `PYTHON_TYPE`: Python type objects - can be a tuple of types
"""
@classmethod
def match_schema(cls, schema):
return schema.get('type') == cls.JS_TYPE
@classmethod
def match_object(cls, obj):
return isinstance(obj, cls.PYTHON_TYPE)
def to_schema(self):
schema = super().to_schema()
schema['type'] = self.JS_TYPE
return schema

View file

@ -0,0 +1,97 @@
from collections import defaultdict
from re import search
from .base import SchemaStrategy
class Object(SchemaStrategy):
"""
object schema strategy
"""
KEYWORDS = ('type', 'properties', 'patternProperties', 'required')
@staticmethod
def match_schema(schema):
return schema.get('type') == 'object'
@staticmethod
def match_object(obj):
return isinstance(obj, dict)
def __init__(self, node_class):
super().__init__(node_class)
self._properties = defaultdict(node_class)
self._pattern_properties = defaultdict(node_class)
self._required = None
self._include_empty_required = False
def add_schema(self, schema):
super().add_schema(schema)
if 'properties' in schema:
for prop, subschema in schema['properties'].items():
subnode = self._properties[prop]
if subschema is not None:
subnode.add_schema(subschema)
if 'patternProperties' in schema:
for pattern, subschema in schema['patternProperties'].items():
subnode = self._pattern_properties[pattern]
if subschema is not None:
subnode.add_schema(subschema)
if 'required' in schema:
required = set(schema['required'])
if not required:
self._include_empty_required = True
if self._required is None:
self._required = required
else:
self._required &= required
def add_object(self, obj):
properties = set()
for prop, subobj in obj.items():
pattern = None
if prop not in self._properties:
pattern = self._matching_pattern(prop)
if pattern is not None:
self._pattern_properties[pattern].add_object(subobj)
else:
properties.add(prop)
self._properties[prop].add_object(subobj)
if self._required is None:
self._required = properties
else:
self._required &= properties
def _matching_pattern(self, prop):
for pattern in self._pattern_properties.keys():
if search(pattern, prop):
return pattern
def _add(self, items, func):
while len(self._items) < len(items):
self._items.append(self._schema_node_class())
for subschema, item in zip(self._items, items):
getattr(subschema, func)(item)
def to_schema(self):
schema = super().to_schema()
schema['type'] = 'object'
if self._properties:
schema['properties'] = self._properties_to_schema(
self._properties)
if self._pattern_properties:
schema['patternProperties'] = self._properties_to_schema(
self._pattern_properties)
if self._required or self._include_empty_required:
schema['required'] = sorted(self._required)
return schema
def _properties_to_schema(self, properties):
schema_properties = {}
for prop, schema_node in properties.items():
schema_properties[prop] = schema_node.to_schema()
return schema_properties

View file

@ -0,0 +1,78 @@
from .base import SchemaStrategy, TypedSchemaStrategy
class Typeless(SchemaStrategy):
"""
schema strategy for schemas with no type. This is only used when
there is no other active strategy, and it will be merged into the
first typed strategy that gets added.
"""
@classmethod
def match_schema(cls, schema):
return 'type' not in schema
@classmethod
def match_object(cls, obj):
return False
class Null(TypedSchemaStrategy):
"""
strategy for null schemas
"""
JS_TYPE = 'null'
PYTHON_TYPE = type(None)
class Boolean(TypedSchemaStrategy):
"""
strategy for boolean schemas
"""
JS_TYPE = 'boolean'
PYTHON_TYPE = bool
class String(TypedSchemaStrategy):
"""
strategy for string schemas - works for ascii and unicode strings
"""
JS_TYPE = 'string'
PYTHON_TYPE = str
class Number(SchemaStrategy):
"""
strategy for integer and number schemas. It automatically
converts from `integer` to `number` when a float object or a
number schema is added
"""
JS_TYPES = ('integer', 'number')
PYTHON_TYPES = (int, float)
@classmethod
def match_schema(cls, schema):
return schema.get('type') in cls.JS_TYPES
@classmethod
def match_object(cls, obj):
# cannot use isinstance() because boolean is a subtype of int
return type(obj) in cls.PYTHON_TYPES
def __init__(self, node_class):
super().__init__(node_class)
self._type = 'integer'
def add_schema(self, schema):
super().add_schema(schema)
if schema.get('type') == 'number':
self._type = 'number'
def add_object(self, obj):
if isinstance(obj, float):
self._type = 'number'
def to_schema(self):
schema = super().to_schema()
schema['type'] = self._type
return schema