Source code for javaproperties.writing

from __future__ import annotations
from collections.abc import Iterable, Mapping
from datetime import datetime
from io import StringIO
import re
import time
from typing import TextIO
from .util import itemize



[docs]
def dump(
    props: Mapping[str, str] | Iterable[tuple[str, str]],
    fp: TextIO,
    separator: str = "=",
    comments: str | None = None,
    timestamp: None | bool | float | datetime = True,
    sort_keys: bool = False,
    ensure_ascii: bool = True,
    ensure_ascii_comments: bool | None = None,
) -> None:
    """
    Write a series of key-value pairs to a file in simple line-oriented
    ``.properties`` format.

    .. versionchanged:: 0.6.0
        ``ensure_ascii`` and ``ensure_ascii_comments`` parameters added

    :param props: A mapping or iterable of ``(key, value)`` pairs to write to
        ``fp``.  All keys and values in ``props`` must be `str` values.  If
        ``sort_keys`` is `False`, the entries are output in iteration order.
    :param TextIO fp: A file-like object to write the values of ``props`` to.
        It must have been opened as a text file.
    :param str separator: The string to use for separating keys & values.  Only
        ``" "``, ``"="``, and ``":"`` (possibly with added whitespace) should
        ever be used as the separator.
    :param Optional[str] comments: if non-`None`, ``comments`` will be written
        to ``fp`` as a comment before any other content
    :param timestamp: If neither `None` nor `False`, a timestamp in the form of
        ``Mon Sep 02 14:00:54 EDT 2016`` is written as a comment to ``fp``
        after ``comments`` (if any) and before the key-value pairs.  If
        ``timestamp`` is `True`, the current date & time is used.  If it is a
        number, it is converted from seconds since the epoch to local time.  If
        it is a `datetime.datetime` object, its value is used directly, with
        naïve objects assumed to be in the local timezone.
    :type timestamp: `None`, `bool`, number, or `datetime.datetime`
    :param bool sort_keys: if true, the elements of ``props`` are sorted
        lexicographically by key in the output
    :param bool ensure_ascii: if true, all non-ASCII characters will be
        replaced with ``\\uXXXX`` escape sequences in the output; if false,
        non-ASCII characters will be passed through as-is
    :param Optional[bool] ensure_ascii_comments: if true, all non-ASCII
        characters in ``comments`` will be replaced with ``\\uXXXX`` escape
        sequences in the output; if `None`, only non-Latin-1 characters will be
        escaped; if false, no characters will be escaped
    :return: `None`
    """
    if comments is not None:
        print(to_comment(comments, ensure_ascii=ensure_ascii_comments), file=fp)
    if timestamp is not None and timestamp is not False:
        print(to_comment(java_timestamp(timestamp)), file=fp)
    for k, v in itemize(props, sort_keys=sort_keys):
        print(
            join_key_value(k, v, separator, ensure_ascii=ensure_ascii),
            file=fp,
        )




[docs]
def dumps(
    props: Mapping[str, str] | Iterable[tuple[str, str]],
    separator: str = "=",
    comments: str | None = None,
    timestamp: None | bool | float | datetime = True,
    sort_keys: bool = False,
    ensure_ascii: bool = True,
    ensure_ascii_comments: bool | None = None,
) -> str:
    """
    Convert a series of key-value pairs to a `str` in simple line-oriented
    ``.properties`` format.

    .. versionchanged:: 0.6.0
        ``ensure_ascii`` and ``ensure_ascii_comments`` parameters added

    :param props: A mapping or iterable of ``(key, value)`` pairs to serialize.
        All keys and values in ``props`` must be `str` values.  If
        ``sort_keys`` is `False`, the entries are output in iteration order.
    :param str separator: The string to use for separating keys & values.  Only
        ``" "``, ``"="``, and ``":"`` (possibly with added whitespace) should
        ever be used as the separator.
    :param Optional[str] comments: if non-`None`, ``comments`` will be output
        as a comment before any other content
    :param timestamp: If neither `None` nor `False`, a timestamp in the form of
        ``Mon Sep 02 14:00:54 EDT 2016`` is output as a comment after
        ``comments`` (if any) and before the key-value pairs.  If ``timestamp``
        is `True`, the current date & time is used.  If it is a number, it is
        converted from seconds since the epoch to local time.  If it is a
        `datetime.datetime` object, its value is used directly, with naïve
        objects assumed to be in the local timezone.
    :type timestamp: `None`, `bool`, number, or `datetime.datetime`
    :param bool sort_keys: if true, the elements of ``props`` are sorted
        lexicographically by key in the output
    :param bool ensure_ascii: if true, all non-ASCII characters will be
        replaced with ``\\uXXXX`` escape sequences in the output; if false,
        non-ASCII characters will be passed through as-is
    :param Optional[bool] ensure_ascii_comments: if true, all non-ASCII
        characters in ``comments`` will be replaced with ``\\uXXXX`` escape
        sequences in the output; if `None`, only non-Latin-1 characters will be
        escaped; if false, no characters will be escaped
    :rtype: text string
    """
    s = StringIO()
    dump(
        props,
        s,
        separator=separator,
        comments=comments,
        timestamp=timestamp,
        sort_keys=sort_keys,
        ensure_ascii=ensure_ascii,
        ensure_ascii_comments=ensure_ascii_comments,
    )
    return s.getvalue()



NON_ASCII_RGX = re.compile(r"[^\x00-\x7F]")
NON_LATIN1_RGX = re.compile(r"[^\x00-\xFF]")
NEWLINE_OLD_COMMENT_RGX = re.compile(r"\n(?![#!])")
NON_N_EOL_RGX = re.compile(r"\r\n?")



[docs]
def to_comment(comment: str, ensure_ascii: bool | None = None) -> str:
    """
    Convert a string to a ``.properties`` file comment.  Non-Latin-1 or
    non-ASCII characters in the string may be escaped using ``\\uXXXX`` escapes
    (depending on the value of ``ensure_ascii``), a ``#`` is prepended to the
    string, any CR LF or CR line breaks in the string are converted to LF, and
    a ``#`` is inserted after any line break not already followed by a ``#`` or
    ``!``.  No trailing newline is added.

    >>> to_comment('They say foo=bar,\\r\\nbut does bar=foo?')
    '#They say foo=bar,\\n#but does bar=foo?'

    .. versionchanged:: 0.6.0
        ``ensure_ascii`` parameter added

    :param str comment: the string to convert to a comment
    :param Optional[bool] ensure_ascii: if true, all non-ASCII characters will
        be replaced with ``\\uXXXX`` escape sequences in the output; if `None`,
        only non-Latin-1 characters will be escaped; if false, no characters
        will be escaped
    :rtype: str
    """
    comment = NON_N_EOL_RGX.sub("\n", comment)
    comment = NEWLINE_OLD_COMMENT_RGX.sub("\n#", comment)
    if ensure_ascii is None:
        comment = NON_LATIN1_RGX.sub(_esc, comment)
    elif ensure_ascii:
        comment = NON_ASCII_RGX.sub(_esc, comment)
    return "#" + comment




[docs]
def join_key_value(
    key: str,
    value: str,
    separator: str = "=",
    ensure_ascii: bool = True,
) -> str:
    r"""
    Join a key and value together into a single line suitable for adding to a
    simple line-oriented ``.properties`` file.  No trailing newline is added.

    >>> join_key_value('possible separators', '= : space')
    'possible\\ separators=\\= \\: space'

    .. versionchanged:: 0.6.0
        ``ensure_ascii`` parameter added

    :param str key: the key
    :param str value: the value
    :param str separator: the string to use for separating the key & value.
        Only ``" "``, ``"="``, and ``":"`` (possibly with added whitespace)
        should ever be used as the separator.
    :param bool ensure_ascii: if true, all non-ASCII characters will be
        replaced with ``\\uXXXX`` escape sequences in the output; if false,
        non-ASCII characters will be passed through as-is
    :rtype: str
    """
    # Escapes `key` and `value` the same way as java.util.Properties.store()
    value = _base_escape(value, ensure_ascii=ensure_ascii)
    if value.startswith(" "):
        value = "\\" + value
    return escape(key, ensure_ascii=ensure_ascii) + separator + value



_escapes = {
    "\t": r"\t",
    "\n": r"\n",
    "\f": r"\f",
    "\r": r"\r",
    "!": r"\!",
    "#": r"\#",
    ":": r"\:",
    "=": r"\=",
    "\\": r"\\",
}


def _esc(m: re.Match[str]) -> str:
    c = m.group()
    try:
        return _escapes[c]
    except KeyError:
        return _to_u_escape(c)


def _to_u_escape(c: str) -> str:
    co = ord(c)
    if co > 0xFFFF:
        # Does Python really not have a decent builtin way to calculate
        # surrogate pairs?
        assert co <= 0x10FFFF
        co -= 0x10000
        return f"\\u{0xD800 + (co >> 10):04x}\\u{0xDC00 + (co & 0x3FF):04x}"
    else:
        return f"\\u{co:04x}"


NEEDS_ESCAPE_ASCII_RGX = re.compile(r"[^\x20-\x7E]|[\\#!=:]")
NEEDS_ESCAPE_UNICODE_RGX = re.compile(r"[\x00-\x1F\x7F]|[\\#!=:]")


def _base_escape(field: str, ensure_ascii: bool = True) -> str:
    rgx = NEEDS_ESCAPE_ASCII_RGX if ensure_ascii else NEEDS_ESCAPE_UNICODE_RGX
    return rgx.sub(_esc, field)



[docs]
def escape(field: str, ensure_ascii: bool = True) -> str:
    """
    Escape a string so that it can be safely used as either a key or value in a
    ``.properties`` file.  All non-ASCII characters, all nonprintable or space
    characters, and the characters ``\\ # ! = :`` are all escaped using either
    the single-character escapes recognized by `unescape` (when they exist) or
    ``\\uXXXX`` escapes (after converting non-BMP characters to surrogate
    pairs).

    .. versionchanged:: 0.6.0
        ``ensure_ascii`` parameter added

    :param str field: the string to escape
    :param bool ensure_ascii: if true, all non-ASCII characters will be
        replaced with ``\\uXXXX`` escape sequences in the output; if false,
        non-ASCII characters will be passed through as-is
    :rtype: str
    """
    return _base_escape(field, ensure_ascii=ensure_ascii).replace(" ", r"\ ")



DAYS_OF_WEEK = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]

MONTHS = [
    "Jan",
    "Feb",
    "Mar",
    "Apr",
    "May",
    "Jun",
    "Jul",
    "Aug",
    "Sep",
    "Oct",
    "Nov",
    "Dec",
]



[docs]
def java_timestamp(timestamp: None | bool | float | datetime = True) -> str:
    """
    .. versionadded:: 0.2.0

    Returns a timestamp in the format produced by |date_tostring|_, e.g.::

        Mon Sep 02 14:00:54 EDT 2016

    If ``timestamp`` is `True` (the default), the current date & time is
    returned.

    If ``timestamp`` is `None` or `False`, an empty string is returned.

    If ``timestamp`` is a number, it is converted from seconds since the epoch
    to local time.

    If ``timestamp`` is a `datetime.datetime` object, its value is used
    directly, with naïve objects assumed to be in the local timezone.

    The timestamp is always constructed using the C locale.

    :param timestamp: the date & time to display
    :type timestamp: `None`, `bool`, number, or `datetime.datetime`
    :rtype: str

    .. |date_tostring| replace:: Java 8's ``Date.toString()``
    .. _date_tostring: https://docs.oracle.com/javase/8/docs/api/java/util/Date.html#toString--
    """
    if timestamp is None or timestamp is False:
        return ""
    if isinstance(timestamp, datetime) and timestamp.tzinfo is not None:
        timebits = timestamp.timetuple()
        # Assumes `timestamp.tzinfo.tzname()` is meaningful/useful
        tzname = timestamp.tzname()
    else:
        ### TODO: Reimplement this using datetime.astimezone() to convert
        ### everything to an aware datetime?
        ts: float | None
        if timestamp is True:
            ts = None
        elif isinstance(timestamp, datetime):
            # Use `datetime.timestamp()`, as it (unlike `datetime.timetuple()`)
            # takes `fold` into account for naïve datetimes.
            ts = timestamp.timestamp()
        else:
            # If it's not a number, it's localtime()'s problem now.
            ts = timestamp
        timebits = time.localtime(ts)
        tzname = timebits.tm_zone
    assert 1 <= timebits.tm_mon <= 12, "invalid month"
    assert 0 <= timebits.tm_wday <= 6, "invalid day of week"
    return (
        "{wday} {mon} {t.tm_mday:02d}"
        " {t.tm_hour:02d}:{t.tm_min:02d}:{t.tm_sec:02d}"
        " {tz} {t.tm_year:04d}".format(
            t=timebits,
            tz=tzname,
            mon=MONTHS[timebits.tm_mon - 1],
            wday=DAYS_OF_WEEK[timebits.tm_wday],
        )
    )




[docs]
def javapropertiesreplace_errors(e: UnicodeError) -> tuple[str, int]:
    """
    .. versionadded:: 0.6.0

    Implements the ``'javapropertiesreplace'`` error handling (for text
    encodings only): unencodable characters are replaced by ``\\uXXXX`` escape
    sequences (with non-BMP characters converted to surrogate pairs first)
    """
    if isinstance(e, UnicodeEncodeError):
        return ("".join(map(_to_u_escape, e.object[e.start : e.end])), e.end)
    else:
        raise e  # pragma: no cover