Source code for javaproperties.reading

from   __future__ import unicode_literals
import re
from   six        import binary_type, StringIO, BytesIO, unichr

[docs]def load(fp, object_pairs_hook=dict): """ Parse the contents of the `~io.IOBase.readline`-supporting file-like object ``fp`` as a ``.properties`` file and return a `dict` of the key-value pairs. ``fp`` may be either a text or binary filehandle, with or without universal newlines enabled. If it is a binary filehandle, its contents are decoded as Latin-1. By default, the key-value pairs extracted from ``fp`` are combined into a `dict` with later occurrences of a key overriding previous occurrences of the same key. To change this behavior, pass a callable as the ``object_pairs_hook`` argument; it will be called with one argument, a generator of ``(key, value)`` pairs representing the key-value entries in ``fp`` (including duplicates) in order of occurrence. `load` will then return the value returned by ``object_pairs_hook``. :param fp: the file from which to read the ``.properties`` document :type fp: file-like object :param callable object_pairs_hook: class or function for combining the key-value pairs :rtype: `dict` of text strings or the return value of ``object_pairs_hook`` """ return object_pairs_hook((k,v) for k,v,_ in parse(fp) if k is not None)
[docs]def loads(s, object_pairs_hook=dict): """ Parse the contents of the string ``s`` as a ``.properties`` file and return a `dict` of the key-value pairs. ``s`` may be either a text string or bytes string. If it is a bytes string, its contents are decoded as Latin-1. By default, the key-value pairs extracted from ``s`` are combined into a `dict` with later occurrences of a key overriding previous occurrences of the same key. To change this behavior, pass a callable as the ``object_pairs_hook`` argument; it will be called with one argument, a generator of ``(key, value)`` pairs representing the key-value entries in ``s`` (including duplicates) in order of occurrence. `loads` will then return the value returned by ``object_pairs_hook``. :param string s: the string from which to read the ``.properties`` document :param callable object_pairs_hook: class or function for combining the key-value pairs :rtype: `dict` of text strings or the return value of ``object_pairs_hook`` """ if isinstance(s, binary_type): fp = BytesIO(s) else: fp = StringIO(s) return load(fp, object_pairs_hook=object_pairs_hook)
[docs]def parse(fp): """ Parse the contents of the `~io.IOBase.readline`-supporting file-like object ``fp`` as a ``.properties`` file and return a generator of ``(key, value, original_lines)`` triples, including duplicate keys and including comments & blank lines (which have their ``key`` and ``value`` fields set to `None`). This is the only way to extract comments from a ``.properties`` file. ``fp`` may be either a text or binary filehandle, with or without universal newlines enabled. If it is a binary filehandle, its contents are decoded as Latin-1. :param fp: the file from which to read the ``.properties`` document :type fp: file-like object :rtype: generator of triples of text strings """ def readline(): ln = fp.readline() if isinstance(ln, binary_type): ln = ln.decode('iso-8859-1') return ln while True: line = source = readline() if line == '': return if re.match(r'^[ \t\f]*(?:[#!]|\r?\n?$)', line): yield (None, None, source) continue line = line.lstrip(' \t\f').rstrip('\r\n') while re.search(r'(?<!\\)(?:\\\\)*\\$', line): line = line[:-1] nextline = readline() # '' at EOF source += nextline line += nextline.lstrip(' \t\f').rstrip('\r\n') if line == '': # series of otherwise-blank lines with continuations yield (None, None, source) continue m = re.search(r'(?<!\\)(?:\\\\)*([ \t\f]*[=:]|[ \t\f])[ \t\f]*', line) if m: yield (unescape(line[:m.start(1)]),unescape(line[m.end():]),source) else: yield (unescape(line), '', source)
_unescapes = {'t': '\t', 'n': '\n', 'f': '\f', 'r': '\r'} def _unesc(m): esc = m.group(1) if len(esc) == 1: return _unescapes.get(esc, esc) else: return unichr(int(esc[1:], 16)) def _unsurrogate(m): c,d = map(ord, m.group()) return unichr(((c - 0xD800) << 10) + (d - 0xDC00) + 0x10000)
[docs]def unescape(field): """ Decode escape sequences in a ``.properties`` key or value. The following escape sequences are recognized:: \\t \\n \\f \\r \\uXXXX \\\\ If a backslash is followed by any other character, the backslash is dropped. In addition, any valid UTF-16 surrogate pairs in the string after escape-decoding are further decoded into the non-BMP characters they represent. (Invalid & isolated surrogate code points are left as-is.) :param field: the string to decode :type field: text string :rtype: text string """ return re.sub(r'[\uD800-\uDBFF][\uDC00-\uDFFF]', _unsurrogate, re.sub(r'\\(u[0-9A-Fa-f]{4}|.)', _unesc, field))