Source code for nti.externalization.representation

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
External representation support.

The provided implementations of
`~nti.externalization.interfaces.IExternalObjectIO` live here. We
provide and register two, one for `JSON <.EXT_REPR_JSON>` and one for
`YAML <.EXT_REPR_YAML>`.
"""
import decimal
from typing import cast

try:
    from persistent import Persistent
except ModuleNotFoundError:
    class Persistent: # type:ignore[no-redef]
        """Mock"""
    class POSError(Exception): # type:ignore[no-redef]
        """Mock"""
else:
    from ZODB.POSException import POSError # type:ignore[no-redef]
try:
    import orjson
    _HAS_ORJSON = True
except ModuleNotFoundError:
    import json
    _HAS_ORJSON = False

import yaml
from zope import component
from zope import interface

from ._base_interfaces import NotGiven as _NotGiven
from .externalization import toExternalObject
from .interfaces import EXT_REPR_JSON
from .interfaces import EXT_REPR_YAML
from .interfaces import IExternalObjectIO
from .interfaces import IExternalObjectRepresenter

__all__ = [
    'to_external_representation',
    'to_json_representation',
    'to_json_representation_fast',
    'to_json_representation_sorted',
    'WithRepr',
    'JsonRepresenter',
    'OrJsonRepresenter',
    'StdJsonRepresenter',
    'YamlRepresenter',
]

# Driver functions

def _to_external_representation(obj, io, name=_NotGiven,
                                **repr_kwargs) -> str|bytes:

    ext = toExternalObject(obj, name=name)
    return io.dump(ext, **repr_kwargs)

[docs] def to_external_representation(obj, ext_format=EXT_REPR_JSON, name=_NotGiven, **repr_kwargs) -> str|bytes: """ to_external_representation(obj, ext_format='json', name=NotGiven, **repr_kwargs) -> str|bytes Transforms (and returns) the *obj* into its external (string) representation. Uses :func:`nti.externalization.to_external_object`, passing in the *name*. :param str ext_format: One of `.EXT_REPR_JSON` or `.EXT_REPR_YAML`, or the name of some other utility that implements `~nti.externalization.interfaces.IExternalObjectRepresenter` The *repr_kwargs* are passed to the dump method of the representer. .. versionchanged:: 3.0.0 Added *repr_kwargs* .. versionchanged:: 3.1.0 Removed the deprecated 'registry' param """ # It would seem nice to be able to do this in one step during # the externalization process itself, but we would wind up traversing # parts of the datastructure more than necessary. Here we traverse # the whole thing exactly twice. io = component.getUtility( IExternalObjectRepresenter, name=ext_format ) return _to_external_representation(obj, io, name, **repr_kwargs)
[docs] def to_json_representation(obj) -> str: """ A convenience function that calls :func:`to_external_representation` with `.EXT_REPR_JSON`. """ return cast(str, to_external_representation(obj, EXT_REPR_JSON))
[docs] def to_json_representation_fast(obj) -> bytes: """ A convenience function that calls :func:`to_external_representation` with `.EXT_REPR_JSON` and additional parameters to optimize for speed. Note that this bypasses utility lookup and directly uses :class:`JsonRepresenter`. It is also only fastest when using orjson. .. versionadded:: 3.0.0 .. versionchanged:: 3.1.0 Now properly externalizes the object instead of relying on the second-chance externalization mechanism. """ return cast(bytes, _to_external_representation(obj, JsonRepresenter, sort_keys=False, as_str=False))
[docs] def to_json_representation_sorted(obj) -> str: """ Like `to_json_representation`, but guarantees that the keys are sorted. This is slower, but may be helpful in tests that do string comparisons. Note that this bypasses utility lookup and directly uses :class:`JsonRepresenter` .. versionadded:: 3.1.0 """ return cast(str, _to_external_representation(obj, JsonRepresenter, sort_keys=True))
# JSON class _FakeDecimalDumper: def represent_int(self, d): return int(d) def represent_float(self, f): return f def represent_scalar(self, _tag, d): return float(d) def _second_pass_to_external_object(obj): if isinstance(obj, decimal.Decimal): return _yaml_represent_decimal(_FakeDecimalDumper(), obj) result = toExternalObject(obj, name='second-pass') if result is obj: raise TypeError(repr(obj) + " is not serializable") return result
[docs] @interface.named(EXT_REPR_JSON) @interface.implementer(IExternalObjectIO) class OrJsonRepresenter: """ Default IO object using ``orjson`` for JSON input/output. """
[docs] @staticmethod def dump(obj, fp=None, sort_keys=False, as_str=True, **_unused) -> str|bytes: """ dump(obj, fp=None, sort_keys=False, as_str=True) -> str|bytes Given an object that is known to already be in an externalized form, convert it to JSON. This can be about 10% faster then requiring a pass across all the sub-objects of the object to check that they are in external form, while still handling a few corner cases with a second-pass conversion. (These things creep in during the object decorator phase and are usually links.) .. versionchanged:: 3.0.0 Added the *sort_keys* parameter, defaulting to false for speed. Added the *as_str* parameter, defaulting to true for backwards compatibility. If set to false, then a bytes object will be returned (and written to any *fp*). Bytes is orjson's native output format, meaning no encoding/decoding is required when this is false. Other keyword arguments are ignored. """ result = orjson.dumps(obj, option=orjson.OPT_SORT_KEYS if sort_keys else 0, default=_second_pass_to_external_object) if as_str: result = result.decode('utf-8') # type:ignore[assignment] if fp: return fp.write(result) return result
def load(self, stream): return orjson.loads(stream)
[docs] @interface.named(EXT_REPR_JSON) @interface.implementer(IExternalObjectIO) class StdJsonRepresenter: """ Default IO object using :mod:`json` for JSON input/output. """
[docs] @staticmethod def dump(obj, fp=None, sort_keys=False, as_str=True, **_unused) -> str|bytes: """ dump(obj, fp=None, sort_keys=False, as_str=True) -> str|bytes Given an object that is known to already be in an externalized form, convert it to JSON. This can be about 10% faster then requiring a pass across all the sub-objects of the object to check that they are in external form, while still handling a few corner cases with a second-pass conversion. (These things creep in during the object decorator phase and are usually links.) .. versionchanged:: 3.0.0 Added the *sort_keys* parameter, defaulting to false for speed. Added the *as_str* parameter, defaulting to true for backwards compatibility and speed. If set to false, then a bytes object will be returned (and written to any *fp*). Because str is the standard library's default output format, this requires decoding. Other keyword arguments are ignored. """ result = json.dumps(obj, # pylint: disable=used-before-assignment sort_keys=sort_keys, default=_second_pass_to_external_object) if not as_str: result = result.encode('utf-8') # type:ignore[assignment] if fp: return fp.write(result) return result
def load(self, stream): return json.loads(stream)
if _HAS_ORJSON: JsonRepresenter = OrJsonRepresenter else: JsonRepresenter = StdJsonRepresenter # type:ignore # This is meant for dumping already externalized objects, but # because of the second_pass_to_external_object default, # it will actually dump any dumpable object by first externalizing # it. Try not to rely on that. to_json_representation_externalized = JsonRepresenter.dump # YAML class _ExtDumper(yaml.SafeDumper): """ We want to represent all of our special object types, like LocatedExternalList/Dict and the ContentFragment subtypes, as plain yaml data structures. Therefore we must register their base types as multi-representers. """ # The difference between 'add_representer' and 'add_multi_representer' # is that the multi version accepts subclasses, but the plain version # requires an exact type match. _ExtDumper.add_multi_representer(list, _ExtDumper.represent_list) _ExtDumper.add_multi_representer(dict, _ExtDumper.represent_dict) _ExtDumper.add_multi_representer(str, _ExtDumper.represent_str) def _yaml_represent_decimal(dumper, data): s = str(data) if '.' not in s: try: int(s) except ValueError: pass else: return dumper.represent_int(data) # TODO: Try replacing these with math.nan and math.inf # pylint: disable=consider-math-not-float if data.is_nan(): return dumper.represent_float(float('nan')) if data.is_infinite(): return dumper.represent_float(float('-inf') if data.is_signed() else float('+inf')) return dumper.represent_scalar('tag:yaml.org,2002:float', str(data).lower()) _ExtDumper.add_representer(decimal.Decimal, _yaml_represent_decimal) # PyYAML uses the multi dumper on ``None`` as the fallback when # nothing else can be found. def _yaml_represent_unknown(dumper, data): ext_obj = _second_pass_to_external_object(data) return dumper.represent_data(ext_obj) _ExtDumper.add_multi_representer(None, _yaml_represent_unknown) # type:ignore[arg-type] class _UnicodeLoader(yaml.SafeLoader): def construct_yaml_str(self, node): # yaml defines strings to be unicode, but # the default reader encodes anything that can be # represented as ASCII back to bytes. We don't # want that. return self.construct_scalar(node) _UnicodeLoader.add_constructor('tag:yaml.org,2002:str', _UnicodeLoader.construct_yaml_str)
[docs] @interface.named(EXT_REPR_YAML) @interface.implementer(IExternalObjectIO) class YamlRepresenter(object): """ Default IO object using :mod:`yaml` for object input/output. """
[docs] @staticmethod def dump(obj, fp=None, **_unused) -> str: """ dump(obj, fp=None) -> str Other keyword arguments are ignored. """ # The default_flow_style changed in PyYaml 5.1 from None to False. # Using False produces multi-line, indented, verbose output. While being human readable, # this consumes space and eliminates simple parsing with JSON. Using True # produces JSON-compatible output in many cases. Using None (the old default) # produces backwards-compatible output that's a hybrid of indented and JSON-like. # https://github.com/yaml/pyyaml/issues/199 return yaml.dump(obj, stream=fp, Dumper=_ExtDumper, default_flow_style=True)
@staticmethod def load(stream): return yaml.load(stream, Loader=_UnicodeLoader)
# Misc def _type_name(self): t = type(self) type_name = t.__module__ + '.' + t.__name__ return type_name def _default_repr(self): # When we're executing, even if we're wrapped in a proxy when called, # we get an unwrapped self. return "<%s at %x %s>" % (_type_name(self), id(self), self.__dict__) def make_repr(default=_default_repr): default = default if callable(default) else _default_repr def __repr__(self): try: return default(self) except POSError as cse: return '<%s(Ghost, %r)>' % (_type_name(self), cse) except (ValueError, LookupError, AttributeError) as e: # Things like invalid NTIID, missing registrations for the first two. # The final would be a weird database-related issue. return '<%s(%r)>' % (_type_name(self), e) return __repr__ class _PReprException(Exception): # Raised for the sole purpose of carrying a smuggled # repr. def __init__(self, value): Exception.__init__(self) self.value = value def __repr__(self): return self.value def _add_repr_to_cls(cls, default=_default_repr): if issubclass(cls, Persistent): # Persistent 4.4 includes the OID and JAR repr # by default, and catches all the exceptions that our # make_repr would catch, handling them much better. We only want the # __dict__ in there by default, though if default is _default_repr: default = lambda self: repr(self.__dict__) # pylint:disable=unnecessary-lambda-assignment def _p_repr(self): raise _PReprException(default(self)) cls._p_repr = _p_repr # pylint:disable=protected-access else: cls.__repr__ = make_repr(default) return cls
[docs] def WithRepr(default=_default_repr): """ A class decorator factory to give a ``__repr__`` to the object. Useful for persistent objects. :param default: A callable to be used for the default value. """ # If we get one argument that is a type, we were # called bare (@WithRepr), so decorate the type if isinstance(default, type): return _add_repr_to_cls(default) # If we got None or anything else, we were called as a factory, # so return a decorator return lambda cls: _add_repr_to_cls(cls, default)