Source code for shuup.utils.text

# -*- coding: utf-8 -*-
# This file is part of Shuup.
#
# Copyright (c) 2012-2021, Shuup Commerce Inc. All rights reserved.
#
# This source code is licensed under the OSL-3.0 license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import unicode_literals

import re
import unicodedata
from django.utils.encoding import force_str, force_text

__all__ = (
    "camel_case",
    "flatten",
    "identifierify",
    "kebab_case",
    "snake_case",
    "space_case",
)

WHITESPACE_RE = re.compile(r"\s+", re.UNICODE)
WORD_SEP_RE = re.compile(r"[\s_-]+", re.UNICODE)


[docs]def flatten(str, whitespace="-"): """ Flatten the given text into lowercase ASCII, removing diacriticals etc. Replace runs of whitespace with the given whitespace replacement. >>> print(flatten("hellö, wörld")) hello,-world :param str: The string to massage :type str: str :param whitespace: The string to replace whitespace with :type whitespace: str :return: A flattened string :rtype: str """ str = force_text(str).strip().lower() str = force_text(unicodedata.normalize("NFKD", str).encode("ascii", "ignore")) str = re.sub(WHITESPACE_RE, whitespace, str) return str
[docs]def identifierify(value, sep="_"): """ Identifierify the given text (keep only alphanumerics and the given separator(s). :param value: The text to identifierify :type value: str :param sep: The separator(s) to keep :type sep: str :return: An identifierified string :rtype: str """ return "".join(c for c in value if c.isalnum() or c in sep)
[docs]def snake_case(value): """ Snake_case the given value (join words with underscores). No other treatment is done; use `identifierify` for that. """ return "_".join(s.lower() for s in WORD_SEP_RE.split(force_text(value)) if s)
[docs]def kebab_case(value): """ Kebab-case the given value (join words with dashes). No other treatment is done; use `identifierify` for that. """ return "-".join(s.lower() for s in WORD_SEP_RE.split(force_text(value)) if s)
[docs]def camel_case(value): """ CamelCase the given value (join capitalized words). No other treatment is done; use `identifierify` for that. """ return "".join(s.title() for s in WORD_SEP_RE.split(force_text(value)) if s)
[docs]def space_case(value): """ Space case the given value (join words that may have been otherwise separated with spaces). No other treatment is done; use `identifierify` for that. """ return " ".join(s.lower() for s in WORD_SEP_RE.split(force_text(value)) if s)
def force_ascii(string, method="backslashreplace"): """ Force given string to ASCII str. :param string: String to convert :type string: str|unicode|bytes :param method: How to handle non-ASCII characters. Accepted values are 'backslashreplace' (default), 'xmlcharrefreplace', 'replace' and 'ignore'. :type method: str :rtype: str """ return force_str(force_text(string).encode("ascii", errors=method))