Refactor code structure for improved readability and maintainability

This commit is contained in:
claudi 2026-04-07 09:10:53 +02:00
parent 389d72a136
commit aa4c067ea8
1685 changed files with 393439 additions and 71932 deletions

View file

@ -0,0 +1,12 @@
from .schema.builder import SchemaBuilder, Schema
from .schema.node import SchemaNode, SchemaGenerationError
from .schema.strategies.base import SchemaStrategy, TypedSchemaStrategy
__version__ = '1.3.0'
__all__ = [
'SchemaBuilder',
'SchemaNode',
'SchemaGenerationError',
'Schema',
'SchemaStrategy',
'TypedSchemaStrategy']

View file

@ -0,0 +1,155 @@
import argparse
import sys
import re
import json
from . import SchemaBuilder, __version__
class CLI:
def __init__(self, prog=None):
self._make_parser(prog)
self._prepare_args()
self.builder = SchemaBuilder(schema_uri=self.args.schema_uri)
def run(self):
if not self.args.schema and not self.args.object:
self.fail('noting to do - no schemas or objects given')
self.add_schemas()
self.add_objects()
self.print_output()
def add_schemas(self):
for fp in self.args.schema:
self._call_with_json_from_fp(self.builder.add_schema, fp)
fp.close()
def add_objects(self):
for fp in self.args.object:
self._call_with_json_from_fp(self.builder.add_object, fp)
fp.close()
def print_output(self):
print(self.builder.to_json(indent=self.args.indent))
def fail(self, message):
self.parser.error(message)
def _make_parser(self, prog=None):
file_type = argparse.FileType('r', encoding=self._get_encoding())
self.parser = argparse.ArgumentParser(
add_help=False,
prog=prog,
description="""Generate one, unified JSON Schema from one or more
JSON objects and/or JSON Schemas. Compatible with JSON-Schema Draft
4 and above.""")
self.parser.add_argument(
'-h', '--help', action='help', default=argparse.SUPPRESS,
help='Show this help message and exit.')
self.parser.add_argument(
'--version', action='version', default=argparse.SUPPRESS,
version='%(prog)s {}'.format(__version__),
help='Show version number and exit.')
self.parser.add_argument(
'-d', '--delimiter', metavar='DELIM',
help="""Set a delimiter. Use this option if the input files
contain multiple JSON objects/schemas. You can pass any string. A
few cases ('newline', 'tab', 'space') will get converted to a
whitespace character. If this option is omitted, the parser will
try to auto-detect boundaries.""")
self.parser.add_argument(
'-e', '--encoding', type=str, metavar='ENCODING',
help="""Use ENCODING instead of the default system encoding
when reading files. ENCODING must be a valid codec name or
alias.""")
self.parser.add_argument(
'-i', '--indent', type=int, metavar='SPACES',
help="""Pretty-print the output, indenting SPACES spaces.""")
self.parser.add_argument(
'-s', '--schema', action='append', default=[], type=file_type,
help="""File containing a JSON Schema (can be specified multiple
times to merge schemas).""")
self.parser.add_argument(
'-$', '--schema-uri', metavar='SCHEMA_URI', dest='schema_uri',
default=SchemaBuilder.DEFAULT_URI,
help="""The value of the '$schema' keyword (defaults to {default!r}
or can be specified in a schema with the -s option). If {null!r} is
passed, the "$schema" keyword will not be included in the
result.""".format(default=SchemaBuilder.DEFAULT_URI,
null=SchemaBuilder.NULL_URI))
self.parser.add_argument(
'object', nargs=argparse.REMAINDER, type=file_type,
help="""Files containing JSON objects (defaults to stdin if no
arguments are passed).""")
def _get_encoding(self):
"""
use separate arg parser to grab encoding argument before
defining FileType args
"""
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument('-e', '--encoding', type=str)
args, _ = parser.parse_known_args()
return args.encoding
def _prepare_args(self):
self.args = self.parser.parse_args()
self._prepare_delimiter()
# default to stdin if no objects or schemas
if not self.args.object and not sys.stdin.isatty():
self.args.object.append(sys.stdin)
def _prepare_delimiter(self):
"""
manage special conversions for difficult bash characters
"""
if self.args.delimiter == 'newline':
self.args.delimiter = '\n'
elif self.args.delimiter == 'tab':
self.args.delimiter = '\t'
elif self.args.delimiter == 'space':
self.args.delimiter = ' '
def _call_with_json_from_fp(self, method, fp):
for json_string in self._get_json_strings(fp.read().strip()):
try:
json_obj = json.loads(json_string)
except json.JSONDecodeError as err:
self.fail('invalid JSON in {}: {}'.format(fp.name, err))
method(json_obj)
def _get_json_strings(self, raw_text):
if self.args.delimiter is None or self.args.delimiter == '':
json_strings = self._detect_json_strings(raw_text)
else:
json_strings = raw_text.split(self.args.delimiter)
# sanitize data before returning
return [string.strip() for string in json_strings if string.strip()]
@staticmethod
def _detect_json_strings(raw_text):
"""
Use regex with lookaround to spot the boundaries between JSON
objects. Unfortunately, it has to match *something*, so at least
one character must be removed and replaced.
"""
strings = re.split(r'}\s*(?={)', raw_text)
# put back the stripped character
json_strings = [string + '}' for string in strings[:-1]]
# the last one doesn't need to be modified
json_strings.append(strings[-1])
return json_strings
def main():
CLI().run()
if __name__ == "__main__":
CLI('genson').run()

View file

@ -0,0 +1,158 @@
import json
from warnings import warn
from .node import SchemaNode
from .strategies import BASIC_SCHEMA_STRATEGIES
class _MetaSchemaBuilder(type):
def __init__(cls, name, bases, attrs):
super().__init__(name, bases, attrs)
if 'EXTRA_STRATEGIES' in attrs:
schema_strategies = list(attrs['EXTRA_STRATEGIES'])
# add in all strategies inherited from base classes
for base in bases:
schema_strategies += list(getattr(base, 'STRATEGIES', []))
unique_schema_strategies = []
for schema_strategy in schema_strategies:
if schema_strategy not in unique_schema_strategies:
unique_schema_strategies.append(schema_strategy)
cls.STRATEGIES = tuple(unique_schema_strategies)
# create a version of SchemaNode loaded with the custom strategies
cls.NODE_CLASS = type('%sSchemaNode' % name, (SchemaNode,),
{'STRATEGIES': cls.STRATEGIES})
class SchemaBuilder(metaclass=_MetaSchemaBuilder):
"""
``SchemaBuilder`` is the basic schema generator class.
``SchemaBuilder`` instances can be loaded up with existing schemas
and objects before being serialized.
"""
DEFAULT_URI = 'http://json-schema.org/schema#'
NULL_URI = 'NULL'
NODE_CLASS = SchemaNode
STRATEGIES = BASIC_SCHEMA_STRATEGIES
def __init__(self, schema_uri='DEFAULT'):
"""
:param schema_uri: value of the ``$schema`` keyword. If not
given, it will use the value of the first available
``$schema`` keyword on an added schema or else the default:
``'http://json-schema.org/schema#'``. A value of ``False`` or
``None`` will direct GenSON to leave out the ``"$schema"``
keyword.
"""
if schema_uri is None or schema_uri is False:
self.schema_uri = self.NULL_URI
elif schema_uri == 'DEFAULT':
self.schema_uri = None
else:
self.schema_uri = schema_uri
if not issubclass(self.NODE_CLASS, SchemaNode):
raise TypeError("NODE_CLASS %r is not a subclass of SchemaNode"
% self.NODE_CLASS)
self._root_node = self.NODE_CLASS()
def add_schema(self, schema):
"""
Merge in a JSON schema. This can be a ``dict`` or another
``SchemaBuilder``
:param schema: a JSON Schema
.. note::
There is no schema validation. If you pass in a bad schema,
you might get back a bad schema.
"""
if isinstance(schema, SchemaBuilder):
schema_uri = schema.schema_uri
schema = schema.to_schema()
if schema_uri is None:
del schema['$schema']
elif isinstance(schema, SchemaNode):
schema = schema.to_schema()
if '$schema' in schema:
self.schema_uri = self.schema_uri or schema['$schema']
schema = dict(schema)
del schema['$schema']
self._root_node.add_schema(schema)
def add_object(self, obj):
"""
Modify the schema to accommodate an object.
:param obj: any object or scalar that can be serialized in JSON
"""
self._root_node.add_object(obj)
def to_schema(self):
"""
Generate a schema based on previous inputs.
:rtype: ``dict``
"""
schema = self._base_schema()
schema.update(self._root_node.to_schema())
return schema
def to_json(self, *args, **kwargs):
"""
Generate a schema and convert it directly to serialized JSON.
:rtype: ``str``
"""
return json.dumps(self.to_schema(), *args, **kwargs)
def __len__(self):
"""
Number of ``SchemaStrategy``s at the top level. This is used
mostly to check for emptiness.
"""
return len(self._root_node)
def __eq__(self, other):
"""
Check for equality with another ``SchemaBuilder`` object.
:param other: another ``SchemaBuilder`` object. Other types are
accepted, but will always return ``False``
"""
if other is self:
return True
if not isinstance(other, self.__class__):
return False
# use _base_schema to get proper comparison for $schema keyword
return (self._base_schema() == other._base_schema()
and self._root_node == other._root_node)
def _base_schema(self):
if self.schema_uri == self.NULL_URI:
return {}
else:
return {'$schema': self.schema_uri or self.DEFAULT_URI}
class Schema(SchemaBuilder):
def __init__(self):
warn('genson.Schema is deprecated in v1.0, and it may be '
'removed in future versions. Use genson.SchemaBuilder'
'instead.',
PendingDeprecationWarning)
super().__init__(schema_uri=SchemaBuilder.NULL_URI)
def to_dict(self, recurse='DEPRECATED'):
warn('#to_dict is deprecated in v1.0, and it may be removed in '
'future versions. Use #to_schema instead.',
PendingDeprecationWarning)
if recurse != 'DEPRECATED':
warn('the `recurse` option for #to_dict does nothing in v1.0',
DeprecationWarning)
return self.to_schema()

View file

@ -0,0 +1,140 @@
from .strategies import BASIC_SCHEMA_STRATEGIES, Typeless
class SchemaGenerationError(RuntimeError):
pass
class SchemaNode:
"""
Basic schema generator class. SchemaNode objects can be loaded
up with existing schemas and objects before being serialized.
"""
STRATEGIES = BASIC_SCHEMA_STRATEGIES
def __init__(self):
self._active_strategies = []
def add_schema(self, schema):
"""
Merges in an existing schema.
arguments:
* `schema` (required - `dict` or `SchemaNode`):
an existing JSON Schema to merge.
"""
# serialize instances of SchemaNode before parsing
if isinstance(schema, SchemaNode):
schema = schema.to_schema()
for subschema in self._get_subschemas(schema):
# delegate to SchemaType object
active_strategy = self._get_strategy_for_schema(subschema)
active_strategy.add_schema(subschema)
# return self for easy method chaining
return self
def add_object(self, obj):
"""
Modify the schema to accommodate an object.
arguments:
* `obj` (required - `dict`):
a JSON object to use in generating the schema.
"""
# delegate to SchemaType object
active_strategy = self._get_strategy_for_object(obj)
active_strategy.add_object(obj)
# return self for easy method chaining
return self
def to_schema(self):
"""
Convert the current schema to a `dict`.
"""
types = set()
generated_schemas = []
for active_strategy in self._active_strategies:
generated_schema = active_strategy.to_schema()
if len(generated_schema) == 1 and 'type' in generated_schema:
types.add(generated_schema['type'])
else:
generated_schemas.append(generated_schema)
if types:
if len(types) == 1:
(types,) = types
else:
types = sorted(types)
generated_schemas = [{'type': types}] + generated_schemas
if len(generated_schemas) == 1:
(result_schema,) = generated_schemas
elif generated_schemas:
result_schema = {'anyOf': generated_schemas}
else:
result_schema = {}
return result_schema
def __len__(self):
return len(self._active_strategies)
def __eq__(self, other):
""" Required for SchemaBuilder.__eq__ to work properly """
return (isinstance(other, self.__class__)
and self.__dict__ == other.__dict__)
# private methods
def _get_subschemas(self, schema):
if 'anyOf' in schema:
return [subschema for anyof in schema['anyOf']
for subschema in self._get_subschemas(anyof)]
elif isinstance(schema.get('type'), list):
other_keys = dict(schema)
del other_keys['type']
return [dict(type=tipe, **other_keys) for tipe in schema['type']]
else:
return [schema]
def _get_strategy_for_schema(self, schema):
return self._get_strategy_for_('schema', schema)
def _get_strategy_for_object(self, obj):
return self._get_strategy_for_('object', obj)
def _get_strategy_for_(self, kind, schema_or_obj):
# check existing types
for active_strategy in self._active_strategies:
if getattr(active_strategy, 'match_' + kind)(schema_or_obj):
return active_strategy
# check all potential types
for strategy in self.STRATEGIES:
if getattr(strategy, 'match_' + kind)(schema_or_obj):
active_strategy = strategy(self.__class__)
# incorporate typeless strategy if it exists
if self._active_strategies and \
isinstance(self._active_strategies[-1], Typeless):
typeless = self._active_strategies.pop()
active_strategy.add_schema(typeless.to_schema())
self._active_strategies.append(active_strategy)
return active_strategy
# no match found, if typeless add to first strategy
if kind == 'schema' and Typeless.match_schema(schema_or_obj):
if not self._active_strategies:
self._active_strategies.append(Typeless(self.__class__))
active_strategy = self._active_strategies[0]
return active_strategy
# no match found, raise an error
raise SchemaGenerationError(
'Could not find matching schema type for {0}: {1!r}'.format(
kind, schema_or_obj))

View file

@ -0,0 +1,37 @@
from .base import (
SchemaStrategy,
TypedSchemaStrategy
)
from .scalar import (
Typeless,
Null,
Boolean,
Number,
String
)
from .array import List, Tuple
from .object import Object
BASIC_SCHEMA_STRATEGIES = (
Null,
Boolean,
Number,
String,
List,
Tuple,
Object
)
__all__ = (
'SchemaStrategy',
'TypedSchemaStrategy',
'Null',
'Boolean',
'Number',
'String',
'List',
'Tuple',
'Object',
'Typeless',
'BASIC_SCHEMA_STRATEGIES'
)

View file

@ -0,0 +1,79 @@
from .base import SchemaStrategy
class BaseArray(SchemaStrategy):
"""
abstract array schema strategy
"""
KEYWORDS = ('type', 'items')
@staticmethod
def match_object(obj):
return isinstance(obj, list)
def to_schema(self):
schema = super().to_schema()
schema['type'] = 'array'
if self._items:
schema['items'] = self.items_to_schema()
return schema
class List(BaseArray):
"""
strategy for list-style array schemas. This is the default
strategy for arrays.
"""
@staticmethod
def match_schema(schema):
return schema.get('type') == 'array' \
and isinstance(schema.get('items', {}), dict)
def __init__(self, node_class):
super().__init__(node_class)
self._items = node_class()
def add_schema(self, schema):
super().add_schema(schema)
if 'items' in schema:
self._items.add_schema(schema['items'])
def add_object(self, obj):
for item in obj:
self._items.add_object(item)
def items_to_schema(self):
return self._items.to_schema()
class Tuple(BaseArray):
"""
strategy for tuple-style array schemas. These will always have
an items key to preserve the fact that it's a tuple.
"""
@staticmethod
def match_schema(schema):
return schema.get('type') == 'array' \
and isinstance(schema.get('items'), list)
def __init__(self, node_class):
super().__init__(node_class)
self._items = [node_class()]
def add_schema(self, schema):
super().add_schema(schema)
if 'items' in schema:
self._add(schema['items'], 'add_schema')
def add_object(self, obj):
self._add(obj, 'add_object')
def _add(self, items, func):
while len(self._items) < len(items):
self._items.append(self.node_class())
for subschema, item in zip(self._items, items):
getattr(subschema, func)(item)
def items_to_schema(self):
return [item.to_schema() for item in self._items]

View file

@ -0,0 +1,78 @@
from copy import copy
from warnings import warn
class SchemaStrategy:
"""
base schema strategy. This contains the common interface for
all subclasses:
* match_schema
* match_object
* __init__
* add_schema
* add_object
* to_schema
* __eq__
"""
KEYWORDS = ('type',)
@classmethod
def match_schema(cls, schema):
raise NotImplementedError("'match_schema' not implemented")
@classmethod
def match_object(cls, obj):
raise NotImplementedError("'match_object' not implemented")
def __init__(self, node_class):
self.node_class = node_class
self._extra_keywords = {}
def add_schema(self, schema):
self._add_extra_keywords(schema)
def _add_extra_keywords(self, schema):
for keyword, value in schema.items():
if keyword in self.KEYWORDS:
continue
elif keyword not in self._extra_keywords:
self._extra_keywords[keyword] = value
elif self._extra_keywords[keyword] != value:
warn(('Schema incompatible. Keyword {0!r} has conflicting '
'values ({1!r} vs. {2!r}). Using {1!r}').format(
keyword, self._extra_keywords[keyword], value))
def add_object(self, obj):
pass
def to_schema(self):
return copy(self._extra_keywords)
def __eq__(self, other):
""" Required for SchemaBuilder.__eq__ to work properly """
return (isinstance(other, self.__class__)
and self.__dict__ == other.__dict__)
class TypedSchemaStrategy(SchemaStrategy):
"""
base schema strategy class for scalar types. Subclasses define
these two class constants:
* `JS_TYPE`: a valid value of the `type` keyword
* `PYTHON_TYPE`: Python type objects - can be a tuple of types
"""
@classmethod
def match_schema(cls, schema):
return schema.get('type') == cls.JS_TYPE
@classmethod
def match_object(cls, obj):
return isinstance(obj, cls.PYTHON_TYPE)
def to_schema(self):
schema = super().to_schema()
schema['type'] = self.JS_TYPE
return schema

View file

@ -0,0 +1,97 @@
from collections import defaultdict
from re import search
from .base import SchemaStrategy
class Object(SchemaStrategy):
"""
object schema strategy
"""
KEYWORDS = ('type', 'properties', 'patternProperties', 'required')
@staticmethod
def match_schema(schema):
return schema.get('type') == 'object'
@staticmethod
def match_object(obj):
return isinstance(obj, dict)
def __init__(self, node_class):
super().__init__(node_class)
self._properties = defaultdict(node_class)
self._pattern_properties = defaultdict(node_class)
self._required = None
self._include_empty_required = False
def add_schema(self, schema):
super().add_schema(schema)
if 'properties' in schema:
for prop, subschema in schema['properties'].items():
subnode = self._properties[prop]
if subschema is not None:
subnode.add_schema(subschema)
if 'patternProperties' in schema:
for pattern, subschema in schema['patternProperties'].items():
subnode = self._pattern_properties[pattern]
if subschema is not None:
subnode.add_schema(subschema)
if 'required' in schema:
required = set(schema['required'])
if not required:
self._include_empty_required = True
if self._required is None:
self._required = required
else:
self._required &= required
def add_object(self, obj):
properties = set()
for prop, subobj in obj.items():
pattern = None
if prop not in self._properties:
pattern = self._matching_pattern(prop)
if pattern is not None:
self._pattern_properties[pattern].add_object(subobj)
else:
properties.add(prop)
self._properties[prop].add_object(subobj)
if self._required is None:
self._required = properties
else:
self._required &= properties
def _matching_pattern(self, prop):
for pattern in self._pattern_properties.keys():
if search(pattern, prop):
return pattern
def _add(self, items, func):
while len(self._items) < len(items):
self._items.append(self._schema_node_class())
for subschema, item in zip(self._items, items):
getattr(subschema, func)(item)
def to_schema(self):
schema = super().to_schema()
schema['type'] = 'object'
if self._properties:
schema['properties'] = self._properties_to_schema(
self._properties)
if self._pattern_properties:
schema['patternProperties'] = self._properties_to_schema(
self._pattern_properties)
if self._required or self._include_empty_required:
schema['required'] = sorted(self._required)
return schema
def _properties_to_schema(self, properties):
schema_properties = {}
for prop, schema_node in properties.items():
schema_properties[prop] = schema_node.to_schema()
return schema_properties

View file

@ -0,0 +1,78 @@
from .base import SchemaStrategy, TypedSchemaStrategy
class Typeless(SchemaStrategy):
"""
schema strategy for schemas with no type. This is only used when
there is no other active strategy, and it will be merged into the
first typed strategy that gets added.
"""
@classmethod
def match_schema(cls, schema):
return 'type' not in schema
@classmethod
def match_object(cls, obj):
return False
class Null(TypedSchemaStrategy):
"""
strategy for null schemas
"""
JS_TYPE = 'null'
PYTHON_TYPE = type(None)
class Boolean(TypedSchemaStrategy):
"""
strategy for boolean schemas
"""
JS_TYPE = 'boolean'
PYTHON_TYPE = bool
class String(TypedSchemaStrategy):
"""
strategy for string schemas - works for ascii and unicode strings
"""
JS_TYPE = 'string'
PYTHON_TYPE = str
class Number(SchemaStrategy):
"""
strategy for integer and number schemas. It automatically
converts from `integer` to `number` when a float object or a
number schema is added
"""
JS_TYPES = ('integer', 'number')
PYTHON_TYPES = (int, float)
@classmethod
def match_schema(cls, schema):
return schema.get('type') in cls.JS_TYPES
@classmethod
def match_object(cls, obj):
# cannot use isinstance() because boolean is a subtype of int
return type(obj) in cls.PYTHON_TYPES
def __init__(self, node_class):
super().__init__(node_class)
self._type = 'integer'
def add_schema(self, schema):
super().add_schema(schema)
if schema.get('type') == 'number':
self._type = 'number'
def add_object(self, obj):
if isinstance(obj, float):
self._type = 'number'
def to_schema(self):
schema = super().to_schema()
schema['type'] = self._type
return schema