Source code for javaproperties.reading

from   io     import BytesIO, StringIO
import re
import sys
from   typing import Any, Callable, IO, Type, TypeVar, Union, overload
from   .util  import CONTINUED_RGX, ascii_splitlines

if sys.version_info[:2] >= (3,9):
    from import Iterable, Iterator
    from re import Match
    Dict = dict
    Tuple = tuple
    from typing import Dict, Iterable, Iterator, Match, Tuple

T = TypeVar('T')

def load(fp: IO) -> Dict[str, str]:

def load(fp: IO, object_pairs_hook: Type[T]) -> T:

def load(fp: IO, object_pairs_hook: Callable[[Iterator[Tuple[str,str]]], T]) -> T:

[docs]def load(fp, object_pairs_hook=dict): # type: ignore[no-untyped-def] """ Parse the contents of the `~io.IOBase.readline`-supporting file-like object ``fp`` as a simple line-oriented ``.properties`` file and return a `dict` of the key-value pairs. ``fp`` may be either a text or binary filehandle, with or without universal newlines enabled. If it is a binary filehandle, its contents are decoded as Latin-1. By default, the key-value pairs extracted from ``fp`` are combined into a `dict` with later occurrences of a key overriding previous occurrences of the same key. To change this behavior, pass a callable as the ``object_pairs_hook`` argument; it will be called with one argument, a generator of ``(key, value)`` pairs representing the key-value entries in ``fp`` (including duplicates) in order of occurrence. `load` will then return the value returned by ``object_pairs_hook``. .. versionchanged:: 0.5.0 Invalid ``\\uXXXX`` escape sequences will now cause an `InvalidUEscapeError` to be raised :param IO fp: the file from which to read the ``.properties`` document :param callable object_pairs_hook: class or function for combining the key-value pairs :rtype: `dict` of text strings or the return value of ``object_pairs_hook`` :raises InvalidUEscapeError: if an invalid ``\\uXXXX`` escape sequence occurs in the input """ return object_pairs_hook( (kv.key, kv.value) for kv in parse(fp) if isinstance(kv, KeyValue) )
@overload def loads(s: Union[str, bytes]) -> Dict[str, str]: ... @overload def loads(s: Union[str, bytes], object_pairs_hook: Type[T]) -> T: ... @overload def loads(s: Union[str, bytes], object_pairs_hook: Callable[[Iterator[Tuple[str,str]]], T]) -> T: ...
[docs]def loads(s, object_pairs_hook=dict): # type: ignore[no-untyped-def] """ Parse the contents of the string ``s`` as a simple line-oriented ``.properties`` file and return a `dict` of the key-value pairs. ``s`` may be either a text string or bytes string. If it is a bytes string, its contents are decoded as Latin-1. By default, the key-value pairs extracted from ``s`` are combined into a `dict` with later occurrences of a key overriding previous occurrences of the same key. To change this behavior, pass a callable as the ``object_pairs_hook`` argument; it will be called with one argument, a generator of ``(key, value)`` pairs representing the key-value entries in ``s`` (including duplicates) in order of occurrence. `loads` will then return the value returned by ``object_pairs_hook``. .. versionchanged:: 0.5.0 Invalid ``\\uXXXX`` escape sequences will now cause an `InvalidUEscapeError` to be raised :param Union[str,bytes] s: the string from which to read the ``.properties`` document :param callable object_pairs_hook: class or function for combining the key-value pairs :rtype: `dict` of text strings or the return value of ``object_pairs_hook`` :raises InvalidUEscapeError: if an invalid ``\\uXXXX`` escape sequence occurs in the input """ fp = BytesIO(s) if isinstance(s, bytes) else StringIO(s) return load(fp, object_pairs_hook=object_pairs_hook)
TIMESTAMP_RGX = re.compile( r'\A[ \t\f]*[#!][ \t\f]*' r'(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)' r' (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)' r' (?:[012][0-9]|3[01])' r' (?:[01][0-9]|2[0-3]):[0-5][0-9]:(?:[0-5][0-9]|6[01])' r' (?:[A-Za-z_0-9]{3})?' r' [0-9]{4,}' r'[ \t\f]*\r?\n?\Z' )
[docs]class PropertiesElement(Iterable[str]): """ .. versionadded:: 0.7.0 Superclass of objects returned by `parse()` """ def __init__(self, source: str) -> None: #: The raw, unmodified input line (including trailing newlines) self.source: str = source def __iter__(self) -> Iterator[str]: return iter((self.source,)) def __eq__(self, other: Any) -> bool: if type(self) is type(other): return tuple(self) == tuple(other) else: return NotImplemented def __repr__(self) -> str: return ( '{0.__module__}.{0.__name__}(source={1.source!r})' .format(type(self), self) ) @property def source_stripped(self) -> str: """ Like `source`, but with the final trailing newline and line continuation (if any) removed """ s = self.source.rstrip('\r\n') if s = s[:-1] return s def _with_source(self, newsource: str) -> "PropertiesElement": return type(self)(source=newsource)
[docs]class Comment(PropertiesElement): """ .. versionadded:: 0.7.0 Subclass of `PropertiesElement` representing a comment """ @property def value(self) -> str: """ Returns the contents of the comment, with the comment marker, any whitespace leading up to it, and the trailing newline removed """ s = self.source.lstrip(' \t\f') if s.startswith(('#', '!')): s = s[1:] return s.rstrip('\r\n') @property def source_stripped(self) -> str: """ Like `source`, but with the final trailing newline (if any) removed """ return self.source.rstrip('\r\n')
[docs] def is_timestamp(self) -> bool: """ Returns `True` iff the comment's value appears to be a valid timestamp as produced by Java 8's ``Date.toString()`` """ return bool(TIMESTAMP_RGX.fullmatch(self.source))
[docs]class Whitespace(PropertiesElement): """ .. versionadded:: 0.7.0 Subclass of `PropertiesElement` representing a line that is either empty or contains only whitespace (and possibly some line continuations) """
[docs]class KeyValue(PropertiesElement): """ .. versionadded:: 0.7.0 Subclass of `PropertiesElement` representing a key-value entry """ def __init__(self, key: str, value: str, source: str): super().__init__(source=source) #: The entry's key, after processing escape sequences self.key: str = key #: The entry's value, after processing escape sequences self.value: str = value def __iter__(self) -> Iterator[str]: return iter((self.key, self.value, self.source)) def __repr__(self) -> str: return ( '{0.__module__}.{0.__name__}(key={1.key!r}, value={1.value!r},' ' source={1.source!r})' .format(type(self), self) ) def _with_source(self, newsource: str) -> "KeyValue": return type(self)(key=self.key, value=self.value, source=newsource)
COMMENT_RGX = re.compile(r'^[ \t\f]*[#!]') BLANK_RGX = re.compile(r'^[ \t\f]*\r?\n?\Z') SEPARATOR_RGX = re.compile(r'(?<!\\)(?:\\\\)*([ \t\f]*[=:]|[ \t\f])[ \t\f]*')
[docs]def parse(src: Union[IO, str, bytes]) -> Iterator[PropertiesElement]: """ Parse the given data as a simple line-oriented ``.properties`` file and return a generator of `PropertiesElement` objects representing the key-value pairs (as `KeyValue` objects), comments (as `Comment` objects), and blank lines (as `Whitespace` objects) in the input in order of occurrence. If the same key appears multiple times in the input, a separate `KeyValue` object is emitted for each entry. ``src`` may be a text string, a bytes string, or a text or binary filehandle/file-like object supporting the `~io.IOBase.readline` method (with or without universal newlines enabled). Bytes input is decoded as Latin-1. .. versionchanged:: 0.5.0 Invalid ``\\uXXXX`` escape sequences will now cause an `InvalidUEscapeError` to be raised .. versionchanged:: 0.7.0 `parse()` now accepts strings as input, and it now returns a generator of custom objects instead of triples of strings :param src: the ``.properties`` document :type src: string or file-like object :rtype: Iterator[PropertiesElement] :raises InvalidUEscapeError: if an invalid ``\\uXXXX`` escape sequence occurs in the input """ liter: Iterator[str] if isinstance(src, bytes): liter = iter(ascii_splitlines(src.decode('iso-8859-1'))) elif isinstance(src, str): liter = iter(ascii_splitlines(src)) else: fp: IO = src def lineiter() -> Iterator[str]: while True: line = fp.readline() ll: str if isinstance(line, bytes): ll = line.decode('iso-8859-1') else: ll = line if ll == '': return for ln in ascii_splitlines(ll): yield ln liter = lineiter() for source in liter: line = source if COMMENT_RGX.match(line): yield Comment(source) continue elif BLANK_RGX.match(line): yield Whitespace(source) continue line = line.lstrip(' \t\f').rstrip('\r\n') while line = line[:-1] nextline = next(liter, '') source += nextline line += nextline.lstrip(' \t\f').rstrip('\r\n') if line == '': # series of otherwise-blank lines with continuations yield Whitespace(source) continue m = if m: yield KeyValue( unescape(line[:m.start(1)]), unescape(line[m.end():]), source, ) else: yield KeyValue(unescape(line), '', source)
SURROGATE_PAIR_RGX = re.compile(r'[\uD800-\uDBFF][\uDC00-\uDFFF]') ESCAPE_RGX = re.compile(r'\\(u.{0,4}|.)') U_ESCAPE_RGX = re.compile(r'^u[0-9A-Fa-f]{4}\Z')
[docs]def unescape(field: str) -> str: """ Decode escape sequences in a ``.properties`` key or value. The following escape sequences are recognized:: \\t \\n \\f \\r \\uXXXX \\\\ If a backslash is followed by any other character, the backslash is dropped. In addition, any valid UTF-16 surrogate pairs in the string after escape-decoding are further decoded into the non-BMP characters they represent. (Invalid & isolated surrogate code points are left as-is.) .. versionchanged:: 0.5.0 Invalid ``\\uXXXX`` escape sequences will now cause an `InvalidUEscapeError` to be raised :param str field: the string to decode :rtype: str :raises InvalidUEscapeError: if an invalid ``\\uXXXX`` escape sequence occurs in the input """ return SURROGATE_PAIR_RGX.sub(_unsurrogate, ESCAPE_RGX.sub(_unesc, field))
_unescapes = {'t': '\t', 'n': '\n', 'f': '\f', 'r': '\r'} def _unesc(m: Match[str]) -> str: esc = if esc[0] == 'u': if not U_ESCAPE_RGX.match(esc): # We can't rely on `int` failing, because it succeeds when `esc` # has trailing whitespace or a leading minus. raise InvalidUEscapeError('\\' + esc) return chr(int(esc[1:], 16)) else: return _unescapes.get(esc, esc) def _unsurrogate(m: Match[str]) -> str: c,d = map(ord, uord = ((c - 0xD800) << 10) + (d - 0xDC00) + 0x10000 return chr(uord)
[docs]class InvalidUEscapeError(ValueError): """ .. versionadded:: 0.5.0 Raised when an invalid ``\\uXXXX`` escape sequence (i.e., a ``\\u`` not immediately followed by four hexadecimal digits) is encountered in a simple line-oriented ``.properties`` file """ def __init__(self, escape: str) -> None: #: The invalid ``\uXXXX`` escape sequence encountered self.escape: str = escape def __str__(self) -> str: return 'Invalid \\u escape sequence: ' + self.escape