Source code for javaproperties.writing

from   datetime import datetime
from   io       import StringIO
import re
import sys
import time
from   typing   import Optional, TextIO, Union
from   .util    import itemize

if sys.version_info[:2] >= (3,9):
    from collections.abc import Iterable, Mapping
    from re import Match
    Tuple = tuple
else:
    from typing import Iterable, Mapping, Match, Tuple

[docs]def dump( props: Union[Mapping[str,str], Iterable[Tuple[str,str]]], fp: TextIO, separator: str = '=', comments: Optional[str] = None, timestamp: Union[None, bool, float, datetime] = True, sort_keys: bool = False, ensure_ascii: bool = True, ensure_ascii_comments: Optional[bool] = None, ) -> None: """ Write a series of key-value pairs to a file in simple line-oriented ``.properties`` format. .. versionchanged:: 0.6.0 ``ensure_ascii`` and ``ensure_ascii_comments`` parameters added :param props: A mapping or iterable of ``(key, value)`` pairs to write to ``fp``. All keys and values in ``props`` must be `str` values. If ``sort_keys`` is `False`, the entries are output in iteration order. :param TextIO fp: A file-like object to write the values of ``props`` to. It must have been opened as a text file. :param str separator: The string to use for separating keys & values. Only ``" "``, ``"="``, and ``":"`` (possibly with added whitespace) should ever be used as the separator. :param Optional[str] comments: if non-`None`, ``comments`` will be written to ``fp`` as a comment before any other content :param timestamp: If neither `None` nor `False`, a timestamp in the form of ``Mon Sep 02 14:00:54 EDT 2016`` is written as a comment to ``fp`` after ``comments`` (if any) and before the key-value pairs. If ``timestamp`` is `True`, the current date & time is used. If it is a number, it is converted from seconds since the epoch to local time. If it is a `datetime.datetime` object, its value is used directly, with naïve objects assumed to be in the local timezone. :type timestamp: `None`, `bool`, number, or `datetime.datetime` :param bool sort_keys: if true, the elements of ``props`` are sorted lexicographically by key in the output :param bool ensure_ascii: if true, all non-ASCII characters will be replaced with ``\\uXXXX`` escape sequences in the output; if false, non-ASCII characters will be passed through as-is :param Optional[bool] ensure_ascii_comments: if true, all non-ASCII characters in ``comments`` will be replaced with ``\\uXXXX`` escape sequences in the output; if `None`, only non-Latin-1 characters will be escaped; if false, no characters will be escaped :return: `None` """ if comments is not None: print(to_comment(comments, ensure_ascii=ensure_ascii_comments), file=fp) if timestamp is not None and timestamp is not False: print(to_comment(java_timestamp(timestamp)), file=fp) for k,v in itemize(props, sort_keys=sort_keys): print( join_key_value(k, v, separator, ensure_ascii=ensure_ascii), file=fp, )
[docs]def dumps( props: Union[Mapping[str,str], Iterable[Tuple[str,str]]], separator: str = '=', comments: Optional[str] = None, timestamp: Union[None, bool, float, datetime] = True, sort_keys: bool = False, ensure_ascii: bool = True, ensure_ascii_comments: Optional[bool] = None, ) -> str: """ Convert a series of key-value pairs to a `str` in simple line-oriented ``.properties`` format. .. versionchanged:: 0.6.0 ``ensure_ascii`` and ``ensure_ascii_comments`` parameters added :param props: A mapping or iterable of ``(key, value)`` pairs to serialize. All keys and values in ``props`` must be `str` values. If ``sort_keys`` is `False`, the entries are output in iteration order. :param str separator: The string to use for separating keys & values. Only ``" "``, ``"="``, and ``":"`` (possibly with added whitespace) should ever be used as the separator. :param Optional[str] comments: if non-`None`, ``comments`` will be output as a comment before any other content :param timestamp: If neither `None` nor `False`, a timestamp in the form of ``Mon Sep 02 14:00:54 EDT 2016`` is output as a comment after ``comments`` (if any) and before the key-value pairs. If ``timestamp`` is `True`, the current date & time is used. If it is a number, it is converted from seconds since the epoch to local time. If it is a `datetime.datetime` object, its value is used directly, with naïve objects assumed to be in the local timezone. :type timestamp: `None`, `bool`, number, or `datetime.datetime` :param bool sort_keys: if true, the elements of ``props`` are sorted lexicographically by key in the output :param bool ensure_ascii: if true, all non-ASCII characters will be replaced with ``\\uXXXX`` escape sequences in the output; if false, non-ASCII characters will be passed through as-is :param Optional[bool] ensure_ascii_comments: if true, all non-ASCII characters in ``comments`` will be replaced with ``\\uXXXX`` escape sequences in the output; if `None`, only non-Latin-1 characters will be escaped; if false, no characters will be escaped :rtype: text string """ s = StringIO() dump(props, s, separator=separator, comments=comments, timestamp=timestamp, sort_keys=sort_keys, ensure_ascii=ensure_ascii, ensure_ascii_comments=ensure_ascii_comments) return s.getvalue()
NON_ASCII_RGX = re.compile(r'[^\x00-\x7F]') NON_LATIN1_RGX = re.compile(r'[^\x00-\xFF]') NEWLINE_OLD_COMMENT_RGX = re.compile(r'\n(?![#!])') NON_N_EOL_RGX = re.compile(r'\r\n?')
[docs]def to_comment(comment: str, ensure_ascii: Optional[bool] = None) -> str: """ Convert a string to a ``.properties`` file comment. Non-Latin-1 or non-ASCII characters in the string may be escaped using ``\\uXXXX`` escapes (depending on the value of ``ensure_ascii``), a ``#`` is prepended to the string, any CR LF or CR line breaks in the string are converted to LF, and a ``#`` is inserted after any line break not already followed by a ``#`` or ``!``. No trailing newline is added. >>> to_comment('They say foo=bar,\\r\\nbut does bar=foo?') '#They say foo=bar,\\n#but does bar=foo?' .. versionchanged:: 0.6.0 ``ensure_ascii`` parameter added :param str comment: the string to convert to a comment :param Optional[bool] ensure_ascii: if true, all non-ASCII characters will be replaced with ``\\uXXXX`` escape sequences in the output; if `None`, only non-Latin-1 characters will be escaped; if false, no characters will be escaped :rtype: str """ comment = NON_N_EOL_RGX.sub('\n', comment) comment = NEWLINE_OLD_COMMENT_RGX.sub('\n#', comment) if ensure_ascii is None: comment = NON_LATIN1_RGX.sub(_esc, comment) elif ensure_ascii: comment = NON_ASCII_RGX.sub(_esc, comment) return '#' + comment
[docs]def join_key_value( key: str, value: str, separator: str = '=', ensure_ascii: bool = True, ) -> str: r""" Join a key and value together into a single line suitable for adding to a simple line-oriented ``.properties`` file. No trailing newline is added. >>> join_key_value('possible separators', '= : space') 'possible\\ separators=\\= \\: space' .. versionchanged:: 0.6.0 ``ensure_ascii`` parameter added :param str key: the key :param str value: the value :param str separator: the string to use for separating the key & value. Only ``" "``, ``"="``, and ``":"`` (possibly with added whitespace) should ever be used as the separator. :param bool ensure_ascii: if true, all non-ASCII characters will be replaced with ``\\uXXXX`` escape sequences in the output; if false, non-ASCII characters will be passed through as-is :rtype: str """ # Escapes `key` and `value` the same way as java.util.Properties.store() value = _base_escape(value, ensure_ascii=ensure_ascii) if value.startswith(' '): value = '\\' + value return escape(key, ensure_ascii=ensure_ascii) + separator + value
_escapes = { '\t': r'\t', '\n': r'\n', '\f': r'\f', '\r': r'\r', '!': r'\!', '#': r'\#', ':': r'\:', '=': r'\=', '\\': r'\\', } def _esc(m: Match[str]) -> str: c = m.group() try: return _escapes[c] except KeyError: return _to_u_escape(c) def _to_u_escape(c: str) -> str: co = ord(c) if co > 0xFFFF: # Does Python really not have a decent builtin way to calculate # surrogate pairs? assert co <= 0x10FFFF co -= 0x10000 return '\\u{0:04x}\\u{1:04x}'.format( 0xD800 + (co >> 10), 0xDC00 + (co & 0x3FF) ) else: return f'\\u{co:04x}' NEEDS_ESCAPE_ASCII_RGX = re.compile(r'[^\x20-\x7E]|[\\#!=:]') NEEDS_ESCAPE_UNICODE_RGX = re.compile(r'[\x00-\x1F\x7F]|[\\#!=:]') def _base_escape(field: str, ensure_ascii: bool = True) -> str: rgx = NEEDS_ESCAPE_ASCII_RGX if ensure_ascii else NEEDS_ESCAPE_UNICODE_RGX return rgx.sub(_esc, field)
[docs]def escape(field: str, ensure_ascii: bool = True) -> str: """ Escape a string so that it can be safely used as either a key or value in a ``.properties`` file. All non-ASCII characters, all nonprintable or space characters, and the characters ``\\ # ! = :`` are all escaped using either the single-character escapes recognized by `unescape` (when they exist) or ``\\uXXXX`` escapes (after converting non-BMP characters to surrogate pairs). .. versionchanged:: 0.6.0 ``ensure_ascii`` parameter added :param str field: the string to escape :param bool ensure_ascii: if true, all non-ASCII characters will be replaced with ``\\uXXXX`` escape sequences in the output; if false, non-ASCII characters will be passed through as-is :rtype: str """ return _base_escape(field, ensure_ascii=ensure_ascii).replace(' ', r'\ ')
DAYS_OF_WEEK = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] MONTHS = [ 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', ]
[docs]def java_timestamp(timestamp: Union[None, bool, float, datetime] = True) -> str: """ .. versionadded:: 0.2.0 Returns a timestamp in the format produced by |date_tostring|_, e.g.:: Mon Sep 02 14:00:54 EDT 2016 If ``timestamp`` is `True` (the default), the current date & time is returned. If ``timestamp`` is `None` or `False`, an empty string is returned. If ``timestamp`` is a number, it is converted from seconds since the epoch to local time. If ``timestamp`` is a `datetime.datetime` object, its value is used directly, with naïve objects assumed to be in the local timezone. The timestamp is always constructed using the C locale. :param timestamp: the date & time to display :type timestamp: `None`, `bool`, number, or `datetime.datetime` :rtype: str .. |date_tostring| replace:: Java 8's ``Date.toString()`` .. _date_tostring: https://docs.oracle.com/javase/8/docs/api/java/util/Date.html#toString-- """ if timestamp is None or timestamp is False: return '' if isinstance(timestamp, datetime) and timestamp.tzinfo is not None: timebits = timestamp.timetuple() # Assumes `timestamp.tzinfo.tzname()` is meaningful/useful tzname = timestamp.tzname() else: ### TODO: Reimplement this using datetime.astimezone() to convert ### everything to an aware datetime? ts: Optional[float] if timestamp is True: ts = None elif isinstance(timestamp, datetime): # Use `datetime.timestamp()`, as it (unlike `datetime.timetuple()`) # takes `fold` into account for naïve datetimes. ts = timestamp.timestamp() else: # If it's not a number, it's localtime()'s problem now. ts = timestamp timebits = time.localtime(ts) tzname = timebits.tm_zone assert 1 <= timebits.tm_mon <= 12, 'invalid month' assert 0 <= timebits.tm_wday <= 6, 'invalid day of week' return '{wday} {mon} {t.tm_mday:02d}' \ ' {t.tm_hour:02d}:{t.tm_min:02d}:{t.tm_sec:02d}' \ ' {tz} {t.tm_year:04d}'.format( t=timebits, tz=tzname, mon=MONTHS[timebits.tm_mon-1], wday=DAYS_OF_WEEK[timebits.tm_wday] )
[docs]def javapropertiesreplace_errors(e: Union[UnicodeError]) -> Tuple[str, int]: """ .. versionadded:: 0.6.0 Implements the ``'javapropertiesreplace'`` error handling (for text encodings only): unencodable characters are replaced by ``\\uXXXX`` escape sequences (with non-BMP characters converted to surrogate pairs first) """ if isinstance(e, UnicodeEncodeError): return (''.join(map(_to_u_escape, e.object[e.start:e.end])), e.end) else: raise e # pragma: no cover