Source code for plydata.cat_tools

"""
Functions for categoricals
"""
from itertools import chain, product

import numpy as np
import pandas as pd
import pandas.api.types as pdtypes
from pandas.core.algorithms import value_counts

from .utils import last2

__all__ = [
    'cat_anon',
    'cat_collapse',
    'cat_concat',
    'cat_drop',
    'cat_expand',
    'cat_explicit_na',
    'cat_infreq',
    'cat_inorder',
    'cat_inseq',
    'cat_lump',
    'cat_lump_lowfreq',
    'cat_lump_min',
    'cat_lump_n',
    'cat_lump_prop',
    'cat_move',
    'cat_other',
    'cat_recode',
    'cat_relabel',
    'cat_relevel',
    'cat_rename',
    'cat_reorder',
    'cat_reorder2',
    'cat_rev',
    'cat_shift',
    'cat_shuffle',
    'cat_unify',
    'cat_zip',
]


[docs]def cat_infreq(c, ordered=None):
    """
    Reorder categorical by frequency of the values

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    ordered : bool
        If ``True``, the categorical is ordered.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> x = ['d', 'a', 'b', 'b', 'c', 'c', 'c']
    >>> cat_infreq(x)
    ['d', 'a', 'b', 'b', 'c', 'c', 'c']
    Categories (4, object): ['c', 'b', 'd', 'a']
    >>> cat_infreq(x, ordered=True)
    ['d', 'a', 'b', 'b', 'c', 'c', 'c']
    Categories (4, object): ['c' < 'b' < 'd' < 'a']

    When two or more values occur the same number of times, if the
    categorical is ordered, the order is preserved. If it is not
    not ordered, the order depends on that of the values. Above 'd'
    comes before 'a', and below 'a' comes before 'a'.

    >>> c = pd.Categorical(
    ...     x, categories=['a', 'c', 'b', 'd']
    ... )
    >>> cat_infreq(c)
    ['d', 'a', 'b', 'b', 'c', 'c', 'c']
    Categories (4, object): ['c', 'b', 'a', 'd']
    >>> cat_infreq(c.set_ordered(True))
    ['d', 'a', 'b', 'b', 'c', 'c', 'c']
    Categories (4, object): ['c' < 'b' < 'a' < 'd']
    """
    kwargs = {} if ordered is None else {'ordered': ordered}
    counts = value_counts(c)
    if pdtypes.is_categorical_dtype(c):
        original_cat_order = c.categories
    else:
        original_cat_order = pd.unique(c)
    counts = counts.reindex(index=original_cat_order)
    cats = (_stable_series_sort(counts, ascending=False)
            .index
            .to_list())
    return pd.Categorical(c, categories=cats, **kwargs)


[docs]def cat_inorder(c, ordered=None):
    """
    Reorder categorical by appearance

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    ordered : bool
        If ``True``, the categorical is ordered.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> import numpy as np
    >>> x = [4, 1, 3, 4, 4, 7, 3]
    >>> cat_inorder(x)
    [4, 1, 3, 4, 4, 7, 3]
    Categories (4, int64): [4, 1, 3, 7]
    >>> arr = np.array(x)
    >>> cat_inorder(arr)
    [4, 1, 3, 4, 4, 7, 3]
    Categories (4, int64): [4, 1, 3, 7]
    >>> c = ['b', 'f', 'c', None, 'c', 'a', 'b', 'e']
    >>> cat_inorder(c)
    ['b', 'f', 'c', NaN, 'c', 'a', 'b', 'e']
    Categories (5, object): ['b', 'f', 'c', 'a', 'e']
    >>> s = pd.Series(c)
    >>> cat_inorder(s)
    ['b', 'f', 'c', NaN, 'c', 'a', 'b', 'e']
    Categories (5, object): ['b', 'f', 'c', 'a', 'e']
    >>> cat = pd.Categorical(c)
    >>> cat_inorder(cat)
    ['b', 'f', 'c', NaN, 'c', 'a', 'b', 'e']
    Categories (5, object): ['b', 'f', 'c', 'a', 'e']
    >>> cat_inorder(cat, ordered=True)
    ['b', 'f', 'c', NaN, 'c', 'a', 'b', 'e']
    Categories (5, object): ['b' < 'f' < 'c' < 'a' < 'e']

    By default, ordered categories remain ordered.

    >>> ocat = pd.Categorical(cat, ordered=True)
    >>> ocat
    ['b', 'f', 'c', NaN, 'c', 'a', 'b', 'e']
    Categories (5, object): ['a' < 'b' < 'c' < 'e' < 'f']
    >>> cat_inorder(ocat)
    ['b', 'f', 'c', NaN, 'c', 'a', 'b', 'e']
    Categories (5, object): ['b' < 'f' < 'c' < 'a' < 'e']
    >>> cat_inorder(ocat, ordered=False)
    ['b', 'f', 'c', NaN, 'c', 'a', 'b', 'e']
    Categories (5, object): ['b', 'f', 'c', 'a', 'e']

    Notes
    -----
    ``NaN`` or ``None`` are ignored when creating the categories.
    """
    kwargs = {} if ordered is None else {'ordered': ordered}
    if isinstance(c, (pd.Series, pd.Categorical)):
        cats = c[~pd.isnull(c)].unique()
        if hasattr(cats, 'to_list'):
            cats = cats.to_list()
    elif hasattr(c, 'dtype'):
        cats = pd.unique(c[~pd.isnull(c)])
    else:
        cats = pd.unique([
            x for x, keep in zip(c, ~pd.isnull(c))
            if keep
        ])
    return pd.Categorical(c, categories=cats, **kwargs)


[docs]def cat_inseq(c, ordered=None):
    """
    Reorder categorical by numerical order

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    ordered : bool
        If ``True``, the categorical is ordered.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> x = pd.Categorical([5, 1, 3, 2, 4])
    >>> cat_inseq(x)
    [5, 1, 3, 2, 4]
    Categories (5, int64): [1, 2, 3, 4, 5]
    >>> x = pd.Categorical([5, 1, '3', 2, 4])
    >>> cat_inseq(x)
    [5, 1, 3, 2, 4]
    Categories (5, int64): [1, 2, 3, 4, 5]

    Values that cannot be coerced to numerical turn in ``NaN``,
    and categories cannot be ``NaN``.

    >>> x = pd.Categorical([5, 1, 'three', 2, 4])
    >>> cat_inseq(x)
    [5, 1, NaN, 2, 4]
    Categories (4, int64): [1, 2, 4, 5]

    Coerces values to numerical

    >>> x = [5, 1, '3', 2, 4]
    >>> cat_inseq(x, ordered=True)
    [5, 1, 3, 2, 4]
    Categories (5, int64): [1 < 2 < 3 < 4 < 5]
    >>> x = [5, 1, '3', 2, '4.5']
    >>> cat_inseq(x)
    [5.0, 1.0, 3.0, 2.0, 4.5]
    Categories (5, float64): [1.0, 2.0, 3.0, 4.5, 5.0]

    Atleast one of the values must be coercible to the integer

    >>> x = ['five', 'one', 'three', 'two', 'four']
    >>> cat_inseq(x)
    Traceback (most recent call last):
        ...
    ValueError: Atleast one existing category must be a number.
    >>> x = ['five', 'one', '3', 'two', 'four']
    >>> cat_inseq(x)
    [NaN, NaN, 3, NaN, NaN]
    Categories (1, int64): [3]
    """
    c = as_categorical(c)
    # one value at a time to avoid turning integers into floats
    # when some values create nans
    numerical_cats = []
    for x in c.categories:
        _x = pd.to_numeric(x, 'coerce')
        if not pd.isnull(_x):
            numerical_cats.append(_x)

    if len(numerical_cats) == 0 and len(c) > 0:
        raise ValueError(
            "Atleast one existing category must be a number."
        )

    # Change the original categories to numerical ones, making sure
    # to rename the existing ones i.e '3' becomes 3. Only after that,
    # change to order.
    c = (c.set_categories(numerical_cats, rename=True)
         .reorder_categories(sorted(numerical_cats)))

    if ordered is not None:
        c.set_ordered(ordered, inplace=True)
    return c


[docs]def cat_reorder(c, x, fun=np.median, ascending=True):
    """
    Reorder categorical by sorting along another variable

    It is the order of the categories that changes. Values in x
    are grouped by categories and summarised to determine the
    new order.

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    x : list-like
        Values by which ``c`` will be ordered.
    fun : callable
        Summarising function to ``x`` for each category in ``c``.
        Default is the *median*.
    ascending : bool
        If ``True``, the ``c`` is ordered in ascending order of ``x``.

    Examples
    --------
    >>> c = list('abbccc')
    >>> x = [11, 2, 2, 3, 33, 3]
    >>> cat_reorder(c, x)
    ['a', 'b', 'b', 'c', 'c', 'c']
    Categories (3, object): ['b', 'c', 'a']
    >>> cat_reorder(c, x, fun=max)
    ['a', 'b', 'b', 'c', 'c', 'c']
    Categories (3, object): ['b', 'a', 'c']
    >>> cat_reorder(c, x, fun=max, ascending=False)
    ['a', 'b', 'b', 'c', 'c', 'c']
    Categories (3, object): ['c', 'a', 'b']
    >>> c_ordered = pd.Categorical(c, ordered=True)
    >>> cat_reorder(c_ordered, x)
    ['a', 'b', 'b', 'c', 'c', 'c']
    Categories (3, object): ['b' < 'c' < 'a']
    >>> cat_reorder(c + ['d'], x)
    Traceback (most recent call last):
        ...
    ValueError: Lengths are not equal. len(c) is 7 and len(x) is 6.
    """
    if len(c) != len(x):
        raise ValueError(
            "Lengths are not equal. len(c) is {} and len(x) is {}.".format(
                len(c), len(x)
            )
        )
    summary = (pd.Series(x)
               .groupby(c)
               .apply(fun)
               .sort_values(ascending=ascending)
               )
    cats = summary.index.to_list()
    return pd.Categorical(c, categories=cats)


[docs]def cat_reorder2(c, x, y, *args, fun=last2, ascending=False, **kwargs):
    """
    Reorder categorical by sorting along another variable

    It is the order of the categories that changes. Values in x
    are grouped by categories and summarised to determine the
    new order.

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    x : list-like
        Values by which ``c`` will be ordered.
    y : list-like
        Values by which ``c`` will be ordered.
    *args : tuple
        Position arguments passed to function fun.
    fun : callable
        Summarising function to ``x`` for each category in ``c``.
        Default is the *median*.
    ascending : bool
        If ``True``, the ``c`` is ordered in ascending order of ``x``.
    **kwargs : dict
        Keyword arguments passed to ``fun``.

    Examples
    --------

    Order stocks by the price in the latest year. This type of ordering
    can be used to order line plots so that the ends match the order of
    the legend.

    >>> stocks = list('AAABBBCCC')
    >>> year = [1980, 1990, 2000] * 3
    >>> price = [12.34, 12.90, 13.55, 10.92, 14.73, 11.08, 9.02, 12.44, 15.65]
    >>> cat_reorder2(stocks, year, price)
    ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C']
    Categories (3, object): ['C', 'A', 'B']
    """
    if len(c) != len(x) or len(x) != len(y):
        raise ValueError(
            "Lengths are not equal. len(c) is {}, len(x) is {} and "
            "len(y) is {}.".format(len(c), len(x), len(y))
        )

    # Wrap two argument function fun with a function that
    # takes a dataframe, put x and y into a dataframe, then
    # use dataframe.groupby
    def _fun(cat_df):
        return fun(cat_df['x'], cat_df['y'], *args, **kwargs)

    summary = (pd.DataFrame({'x': x, 'y': y})
               .groupby(c)
               .apply(_fun)
               .sort_values(ascending=ascending)
               )
    cats = summary.index.to_list()
    return pd.Categorical(c, categories=cats)


def cat_move(c, *args, to=0):
    """
    Reorder categories explicitly

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    *args : tuple
        Categories to reorder. Any categories not mentioned
        will be left in existing order.
    to : int or inf
        Position where to place the categories. ``inf``, puts
        them at the end (highest value).

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> c = ['a', 'b', 'c', 'd', 'e']
    >>> cat_move(c, 'e', 'b')
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['e', 'b', 'a', 'c', 'd']
    >>> cat_move(c, 'c', to=np.inf)
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['a', 'b', 'd', 'e', 'c']
    >>> cat_move(pd.Categorical(c, ordered=True), 'a', 'c', 'e', to=1)
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['b' < 'a' < 'c' < 'e' < 'd']
    """
    c = as_categorical(c)
    if np.isinf(to):
        to = len(c.categories)

    args = list(args)
    unmoved_cats = c.categories.drop(args).to_list()
    cats = unmoved_cats[0:to] + args + unmoved_cats[to:]
    c.reorder_categories(cats, inplace=True)
    return c


[docs]def cat_rev(c):
    """
    Reverse order of categories

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> c = ['a', 'b', 'c']
    >>> cat_rev(c)
    ['a', 'b', 'c']
    Categories (3, object): ['c', 'b', 'a']
    >>> cat_rev(pd.Categorical(c))
    ['a', 'b', 'c']
    Categories (3, object): ['c', 'b', 'a']
    """
    c = as_categorical(c)
    c.reorder_categories(c.categories[::-1], inplace=True)
    return c


[docs]def cat_shift(c, n=1):
    """
    Shift and wrap-around categories to the left or right

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.

    n : int
        Number of times to shift. If positive, shift to
        the left, if negative shift to the right.
        Default is 1.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> c = ['a', 'b', 'c', 'd', 'e']
    >>> cat_shift(c)
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['b', 'c', 'd', 'e', 'a']
    >>> cat_shift(c, 2)
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['c', 'd', 'e', 'a', 'b']
    >>> cat_shift(c, -2)
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['d', 'e', 'a', 'b', 'c']
    >>> cat_shift(pd.Categorical(c, ordered=True), -3)
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['c' < 'd' < 'e' < 'a' < 'b']
    """
    c = as_categorical(c)
    cats = c.categories.to_list()
    cats_extended = cats + cats
    m = len(cats)
    n = n % m
    cats = cats_extended[n:m] + cats_extended[:n]
    c.reorder_categories(cats, inplace=True)
    return c


[docs]def cat_shuffle(c, random_state=None):
    """
    Reverse order of categories

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    random_state : int or ~numpy.random.RandomState, optional
        Seed or Random number generator to use. If ``None``, then
        numpy global generator :class:`numpy.random` is used.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> np.random.seed(123)
    >>> c = ['a', 'b', 'c', 'd', 'e']
    >>> cat_shuffle(c)
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['b', 'd', 'e', 'a', 'c']
    >>> cat_shuffle(pd.Categorical(c, ordered=True), 321)
    ['a', 'b', 'c', 'd', 'e']
    Categories (5, object): ['d' < 'b' < 'a' < 'c' < 'e']
    """
    c = as_categorical(c)
    if random_state is None:
        random_state = np.random
    elif isinstance(random_state, int):
        random_state = np.random.RandomState(random_state)
    elif not isinstance(random_state, np.random.RandomState):
        raise TypeError(
            "Unknown type `{}` of random_state".format(type(random_state))
        )

    cats = c.categories.to_list()
    random_state.shuffle(cats)
    c.reorder_categories(cats, inplace=True)
    return c


# Change the value of categories

[docs]def cat_anon(c, prefix='', random_state=None):
    """
    Anonymise categories

    Neither the value nor the order of the categories is preserved.

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.

    random_state : int or ~numpy.random.RandomState, optional
        Seed or Random number generator to use. If ``None``, then
        numpy global generator :class:`numpy.random` is used.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> np.random.seed(123)
    >>> c = ['a', 'b', 'b', 'c', 'c', 'c']
    >>> cat_anon(c)
    ['0', '1', '1', '2', '2', '2']
    Categories (3, object): ['1', '0', '2']
    >>> cat_anon(c, 'c-', 321)
    ['c-1', 'c-2', 'c-2', 'c-0', 'c-0', 'c-0']
    Categories (3, object): ['c-0', 'c-2', 'c-1']
    >>> cat_anon(pd.Categorical(c, ordered=True), 'c-', 321)
    ['c-1', 'c-2', 'c-2', 'c-0', 'c-0', 'c-0']
    Categories (3, object): ['c-0' < 'c-2' < 'c-1']
    """
    c = as_categorical(c)
    if random_state is None:
        random_state = np.random
    elif isinstance(random_state, int):
        random_state = np.random.RandomState(random_state)
    elif not isinstance(random_state, np.random.RandomState):
        raise TypeError(
            "Unknown type `{}` of random_state".format(type(random_state))
        )

    # Shuffle two times,
    #   1. to prevent predicable sequence to category mapping
    #   2. to prevent reversing of the new categories to the old ones
    fmt = '{}{}'.format
    cats = [fmt(prefix, i) for i in range(len(c.categories))]
    random_state.shuffle(cats)
    c.rename_categories(cats, inplace=True)
    cats = c.categories.to_list()
    random_state.shuffle(cats)
    c.reorder_categories(cats, inplace=True)
    return c


[docs]def cat_collapse(c, mapping, group_other=False):
    """
    Collapse categories into manually defined groups

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    mapping : dict
        New categories and the old categories contained in them.
    group_other : False
        If ``True``, a category is created to contain all other
        categories that have not been explicitly collapsed.
        The name of the other categories is ``other``, it may be
        postfixed by the first available integer starting from
        2 if there is a category with a similar name.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> c = ['a', 'b', 'c', 'd', 'e', 'f']
    >>> mapping = {'first_2': ['a', 'b'], 'second_2': ['c', 'd']}
    >>> cat_collapse(c, mapping)
    ['first_2', 'first_2', 'second_2', 'second_2', 'e', 'f']
    Categories (4, object): ['first_2', 'second_2', 'e', 'f']
    >>> cat_collapse(c, mapping, group_other=True)
    ['first_2', 'first_2', 'second_2', 'second_2', 'other', 'other']
    Categories (3, object): ['first_2', 'second_2', 'other']

    Collapsing preserves the order

    >>> cat_rev(c)
    ['a', 'b', 'c', 'd', 'e', 'f']
    Categories (6, object): ['f', 'e', 'd', 'c', 'b', 'a']
    >>> cat_collapse(cat_rev(c), mapping)
    ['first_2', 'first_2', 'second_2', 'second_2', 'e', 'f']
    Categories (4, object): ['f', 'e', 'second_2', 'first_2']
    >>> mapping = {'other': ['a', 'b'], 'another': ['c', 'd']}
    >>> cat_collapse(c, mapping, group_other=True)
    ['other', 'other', 'another', 'another', 'other2', 'other2']
    Categories (3, object): ['other', 'another', 'other2']
    """
    def make_other_name():
        """
        Generate unique name for the other category
        """
        if 'other' not in mapping:
            return 'other'

        for i in range(2, len(mapping)+2):
            other = 'other' + str(i)
            if other not in mapping:
                return other

    c = as_categorical(c)
    if group_other:
        mapping = mapping.copy()
        other = make_other_name()
        mapped_categories = list(chain(*mapping.values()))
        unmapped_categories = c.categories.difference(mapped_categories)
        mapping[other] = list(unmapped_categories)

    inverted_mapping = {
        cat: new_cat
        for new_cat, old_cats in mapping.items()
        for cat in old_cats
    }

    # Convert old categories to new values in order and remove
    # any duplicates. The preserves the order
    new_cats = pd.unique([
        inverted_mapping.get(x, x)
        for x in c.categories
    ])

    c = pd.Categorical(
        [inverted_mapping.get(x, x) for x in c],
        categories=new_cats,
        ordered=c.ordered
    )
    return c


[docs]def cat_other(c, keep=None, drop=None, other_category='other'):
    """
    Replace categories with 'other'

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    keep : list-like
        Categories to preserve. Only one of ``keep`` or ``drop``
        should be specified.
    drop : list-like
        Categories to drop. Only one of ``keep`` or ``drop``
        should be specified.
    other_category : object
        Value used for the 'other' values. It is placed at
        the end of the categories.

    Returns
    -------
    out : categorical
        Values

    Examples
    --------
    >>> c = ['a', 'b', 'a', 'c', 'b', 'b', 'b', 'd', 'c']
    >>> cat_other(c, keep=['a', 'b'])
    ['a', 'b', 'a', 'other', 'b', 'b', 'b', 'other', 'other']
    Categories (3, object): ['a', 'b', 'other']
    >>> cat_other(c, drop=['a', 'b'])
    ['other', 'other', 'other', 'c', 'other', 'other', 'other', 'd', 'c']
    Categories (3, object): ['c', 'd', 'other']
    >>> cat_other(pd.Categorical(c, ordered=True), drop=['a', 'b'])
    ['other', 'other', 'other', 'c', 'other', 'other', 'other', 'd', 'c']
    Categories (3, object): ['c' < 'd' < 'other']
    """
    if keep is None and drop is None:
        raise ValueError(
            "Missing columns to `keep` or those to `drop`."
        )
    elif keep is not None and drop is not None:
        raise ValueError(
            "Only one of `keep` or `drop` should be given."
        )
    c = as_categorical(c)
    cats = c.categories

    if keep is not None:
        if not pdtypes.is_list_like(keep):
            keep = [keep]
    elif drop is not None:
        if not pdtypes.is_list_like(drop):
            drop = [drop]
        keep = cats.difference(drop)

    inverted_mapping = {
        cat: other_category
        for cat in cats.difference(keep)
    }
    inverted_mapping.update({x: x for x in keep})
    new_cats = cats.intersection(keep).to_list() + [other_category]
    c = pd.Categorical(
        [inverted_mapping.get(x, x) for x in c],
        categories=new_cats,
        ordered=c.ordered
    )
    return c


def _lump(lump_it, c, other_category):
    """
    Return a categorical of lumped

    Helper for cat_lump_* functions

    Parameters
    ----------
    lump_it : sequence[(obj, bool)]
        Sequence of (category, lump_category)
    c : cateorical
        Original categorical.
    other_category : object (default: 'other')
        Value used for the 'other' values. It is placed at
        the end of the categories.

    Returns
    -------
    out : categorical
        Values
    """
    lookup = {
        cat: other_category if lump else cat
        for cat, lump in lump_it
    }
    new_cats = (
        c.categories
        .intersection(lookup.values())
        .insert(len(c), other_category)
    )

    c = pd.Categorical(
        [lookup[value] for value in c],
        categories=new_cats,
        ordered=c.ordered
    )
    return c


[docs]def cat_lump(
    c,
    n=None,
    prop=None,
    w=None,
    other_category='other',
    ties_method='min'
):
    """
    Lump together least or most common categories

    This is a general method that calls one of
    :func:`~plydata.cat_tools.cat_lump_n`
    :func:`~plydata.cat_tools.cat_lump_prop` or
    :func:`~plydata.cat_tools.cat_lump_lowfreq`
    depending on the parameters.

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    n : int (optional)
        Number of most/least common values to preserve (not lumped
        together). Positive ``n`` preserves the most common,
        negative ``n`` preserves the least common.

        Lumping happens on condition that the lumped category "other"
        will have the smallest number of items.
        You should only specify one of ``n`` or ``prop``
    prop : float (optional)
        Proportion above/below which the values of a category will be
        preserved (not lumped together). Positive ``prop`` preserves
        categories whose proportion of values is *more* than ``prop``.
        Negative ``prop`` preserves categories whose proportion of
        values is *less* than ``prop``.

        Lumping happens on condition that the lumped category "other"
        will have the smallest number of items.
        You should only specify one of ``n`` or ``prop``
    w : list[int|float] (optional)
        Weights for the frequency of each value. It should be the same
        length as ``c``.
    other_category : object (default: 'other')
        Value used for the 'other' values. It is placed at
        the end of the categories.
    ties_method : {'min', 'max', 'average', 'first', 'dense'} (default: min)
        How to treat categories that occur the same number of times
        (i.e. ties):
        * min: lowest rank in the group
        * max: highest rank in the group
        * average: average rank of the group
        * first: ranks assigned in order they appear in the array
        * dense: like 'min', but rank always increases by 1 between groups.

    Examples
    --------
    >>> cat_lump(list('abbccc'))
    ['other', 'b', 'b', 'c', 'c', 'c']
    Categories (3, object): ['b', 'c', 'other']

    When the least categories put together are not less than the next
    smallest group.

    >>> cat_lump(list('abcddd'))
    ['a', 'b', 'c', 'd', 'd', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']
    >>> cat_lump(list('abcdddd'))
    ['other', 'other', 'other', 'd', 'd', 'd', 'd']
    Categories (2, object): ['d', 'other']

    >>> c = pd.Categorical(list('abccdd'))
    >>> cat_lump(c, n=1)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['c', 'd', 'other']

    >>> cat_lump(c, n=2)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['c', 'd', 'other']

    ``n`` Least common categories

    >>> cat_lump(c, n=-2)
    ['a', 'b', 'other', 'other', 'other', 'other']
    Categories (3, object): ['a', 'b', 'other']

    There are fewer than ``n`` categories that are the most/least common.

    >>> cat_lump(c, n=3)
    ['a', 'b', 'c', 'c', 'd', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']
    >>> cat_lump(c, n=-3)
    ['a', 'b', 'c', 'c', 'd', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']

    By proportions, categories that make up *more* than ``prop`` fraction
    of the items.

    >>> cat_lump(c, prop=1/3.01)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['c', 'd', 'other']
    >>> cat_lump(c, prop=-1/3.01)
    ['a', 'b', 'other', 'other', 'other', 'other']
    Categories (3, object): ['a', 'b', 'other']
    >>> cat_lump(c, prop=1/2)
    ['other', 'other', 'other', 'other', 'other', 'other']
    Categories (1, object): ['other']

    Order of categoricals is maintained

    >>> c = pd.Categorical(
    ...     list('abccdd'),
    ...     categories=list('adcb'),
    ...     ordered=True
    ... )
    >>> cat_lump(c, n=2)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['d' < 'c' < 'other']

    **Weighted lumping**

    >>> c = list('abcd')
    >>> weights = [3, 2, 1, 1]
    >>> cat_lump(c, n=2)  # No lumping
    ['a', 'b', 'c', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']
    >>> cat_lump(c, n=2, w=weights)
    ['a', 'b', 'other', 'other']
    Categories (3, object): ['a', 'b', 'other']
    """
    if n is not None:
        return cat_lump_n(c, n, w, other_category, ties_method)
    elif prop is not None:
        return cat_lump_prop(c, prop, w, other_category)
    else:
        return cat_lump_lowfreq(c, other_category)


[docs]def cat_lump_n(
    c,
    n,
    w=None,
    other_category='other',
    ties_method='min'
):
    """
    Lump together most/least common n categories

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    n : int
        Number of most/least common values to preserve (not lumped
        together). Positive ``n`` preserves the most common,
        negative ``n`` preserves the least common.

        Lumping happens on condition that the lumped category "other"
        will have the smallest number of items.
        You should only specify one of ``n`` or ``prop``
    w : list[int|float] (optional)
        Weights for the frequency of each value. It should be the same
        length as ``c``.
    other_category : object (default: 'other')
        Value used for the 'other' values. It is placed at
        the end of the categories.
    ties_method : {'min', 'max', 'average', 'first', 'dense'} (default: min)
        How to treat categories that occur the same number of times
        (i.e. ties):
        * min: lowest rank in the group
        * max: highest rank in the group
        * average: average rank of the group
        * first: ranks assigned in order they appear in the array
        * dense: like 'min', but rank always increases by 1 between groups.

    Examples
    --------
    >>> c = pd.Categorical(list('abccdd'))
    >>> cat_lump_n(c, 1)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['c', 'd', 'other']

    >>> cat_lump_n(c, 2)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['c', 'd', 'other']

    ``n`` Least common categories

    >>> cat_lump_n(c, -2)
    ['a', 'b', 'other', 'other', 'other', 'other']
    Categories (3, object): ['a', 'b', 'other']

    There are fewer than ``n`` categories that are the most/least common.

    >>> cat_lump_n(c, 3)
    ['a', 'b', 'c', 'c', 'd', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']
    >>> cat_lump_n(c, -3)
    ['a', 'b', 'c', 'c', 'd', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']

    Order of categoricals is maintained

    >>> c = pd.Categorical(
    ...     list('abccdd'),
    ...     categories=list('adcb'),
    ...     ordered=True
    ... )
    >>> cat_lump_n(c, 2)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['d' < 'c' < 'other']

    **Weighted lumping**

    >>> c = list('abcd')
    >>> weights = [3, 2, 1, 1]
    >>> cat_lump_n(c, n=2)  # No lumping
    ['a', 'b', 'c', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']
    >>> cat_lump_n(c, n=2, w=weights)
    ['a', 'b', 'other', 'other']
    Categories (3, object): ['a', 'b', 'other']
    """
    c = as_categorical(c)
    if len(c) == 0:
        return c

    if w is None:
        counts = c.value_counts().sort_values(ascending=False)
    else:
        counts = (
            pd.Series(w)
            .groupby(c)
            .apply(np.sum)
            .sort_values(ascending=False)
        )

    if n < 0:
        rank = counts.rank(method=ties_method)
        n = -n
    else:
        rank = (-counts).rank(method=ties_method)

    # Less than n categories outside the lumping,
    if not (rank > n).any():
        return c

    lump_it = zip(rank.index, rank > n)
    return _lump(lump_it, c, other_category)


[docs]def cat_lump_prop(
    c,
    prop,
    w=None,
    other_category='other',
):
    """
    Lump together least or most common categories by proportion

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    prop : float
        Proportion above/below which the values of a category will be
        preserved (not lumped together). Positive ``prop`` preserves
        categories whose proportion of values is *more* than ``prop``.
        Negative ``prop`` preserves categories whose proportion of
        values is *less* than ``prop``.

        Lumping happens on condition that the lumped category "other"
        will have the smallest number of items.
        You should only specify one of ``n`` or ``prop``
    w : list[int|float] (optional)
        Weights for the frequency of each value. It should be the same
        length as ``c``.
    other_category : object (default: 'other')
        Value used for the 'other' values. It is placed at
        the end of the categories.

    Examples
    --------
    By proportions, categories that make up *more* than ``prop`` fraction
    of the items.

    >>> c = pd.Categorical(list('abccdd'))
    >>> cat_lump_prop(c, 1/3.01)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['c', 'd', 'other']
    >>> cat_lump_prop(c, -1/3.01)
    ['a', 'b', 'other', 'other', 'other', 'other']
    Categories (3, object): ['a', 'b', 'other']
    >>> cat_lump_prop(c, 1/2)
    ['other', 'other', 'other', 'other', 'other', 'other']
    Categories (1, object): ['other']
    """
    c = as_categorical(c)
    if len(c) == 0:
        return c

    if w is None:
        counts = c.value_counts().sort_values(ascending=False)
        total = len(c)
    else:
        counts = (
            pd.Series(w)
            .groupby(c)
            .apply(np.sum)
            .sort_values(ascending=False)
        )
        total = counts.sum()

    # For each category findout whether to lump it or keep it
    # Create a generator of the form ((cat, lump), ...)
    props = counts / total
    if prop < 0:
        if not (props > -prop).any():
            # No proportion more than target, so no lumping
            # the most common
            return c
        else:
            lump_it = zip(props.index, props > -prop)
    else:
        if not (props <= prop).any():
            # No proportion less than target, so no lumping
            # the least common
            return c
        else:
            lump_it = zip(props.index, props <= prop)

    return _lump(lump_it, c, other_category)


[docs]def cat_lump_lowfreq(
    c,
    other_category='other',
):
    """
    Lump together least categories

    Ensures that the "other" category is still the smallest.

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    other_category : object (default: 'other')
        Value used for the 'other' values. It is placed at
        the end of the categories.

    Examples
    --------
    >>> cat_lump_lowfreq(list('abbccc'))
    ['other', 'b', 'b', 'c', 'c', 'c']
    Categories (3, object): ['b', 'c', 'other']

    When the least categories put together are not less than the next
    smallest group.

    >>> cat_lump_lowfreq(list('abcddd'))
    ['a', 'b', 'c', 'd', 'd', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']
    >>> cat_lump_lowfreq(list('abcdddd'))
    ['other', 'other', 'other', 'd', 'd', 'd', 'd']
    Categories (2, object): ['d', 'other']
    """
    c = as_categorical(c)
    if len(c) == 0:
        return c

    # For each category findout whether to lump it or keep it
    # Create a generator of the form ((cat, lump), ...)
    counts = c.value_counts().sort_values(ascending=False)
    if len(counts) == 1:
        return c

    unique_counts = pd.unique(counts)
    smallest = unique_counts[-1]
    next_smallest = unique_counts[-2]
    smallest_counts = counts[counts == smallest]
    smallest_total = smallest_counts.sum()
    smallest_cats = smallest_counts.index

    if not smallest_total < next_smallest:
        return c

    lump_it = (
        (cat, True) if cat in smallest_cats else (cat, False)
        for cat in counts.index
    )
    return _lump(lump_it, c, other_category)


[docs]def cat_lump_min(
    c,
    min,
    w=None,
    other_category='other',
):
    """
    Lump catogeries, preserving those that appear min number of times

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    min : int
        Minum number of times a category must be represented to be
        preserved.
    w : list[int|float] (optional)
        Weights for the frequency of each value. It should be the same
        length as ``c``.
    other_category : object (default: 'other')
        Value used for the 'other' values. It is placed at
        the end of the categories.

    Examples
    --------
    >>> c = list('abccdd')
    >>> cat_lump_min(c, min=1)
    ['a', 'b', 'c', 'c', 'd', 'd']
    Categories (4, object): ['a', 'b', 'c', 'd']
    >>> cat_lump_min(c, min=2)
    ['other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['c', 'd', 'other']

    **Weighted Lumping**

    >>> weights = [2, 2, .5, .5, 1, 1]
    >>> cat_lump_min(c, min=2, w=weights)
    ['a', 'b', 'other', 'other', 'd', 'd']
    Categories (4, object): ['a', 'b', 'd', 'other']

    Unlike :func:`~plydata.cat_tools.cat_lump`,  :func:`cat_lump_min`
    can lump together and create a category larger than the preserved
    categories.

    >>> c = list('abxyzccdd')
    >>> cat_lump_min(c, min=2)
    ['other', 'other', 'other', 'other', 'other', 'c', 'c', 'd', 'd']
    Categories (3, object): ['c', 'd', 'other']
    """
    c = as_categorical(c)
    if len(c) == 0:
        return c

    if w is None:
        counts = c.value_counts().sort_values(ascending=False)
    else:
        counts = (
            pd.Series(w)
            .groupby(c)
            .apply(np.sum)
            .sort_values(ascending=False)
        )

    if (counts >= min).all():
        return c

    lookup = {
        cat: cat if freq >= min else other_category
        for cat, freq in counts.items()
    }
    new_cats = (
        c.categories
        .intersection(lookup.values())
        .insert(len(c), other_category)
    )

    c = pd.Categorical(
        [lookup[value] for value in c],
        categories=new_cats,
        ordered=c.ordered
    )
    return c


[docs]def cat_rename(c, mapping=None, **kwargs):
    """
    Change/rename categories manually

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    mapping : dict (optional)
        Mapping of the form ``{old_name: new_name}`` for how to rename
        the categories. Setting a value to ``None`` removes the category.
        This arguments is useful if the old names are not valid
        python parameters. Otherwise, ``kwargs`` can be used.
    **kwargs : dict
        Mapping to rename categories. Setting a value to ``None`` removes
        the category.

    Examples
    --------
    >>> c = list('abcd')
    >>> cat_rename(c, a='A')
    ['A', 'b', 'c', 'd']
    Categories (4, object): ['A', 'b', 'c', 'd']
    >>> c = pd.Categorical(
    ...     list('abcd'),
    ...     categories=list('bacd'),
    ...     ordered=True
    ... )
    >>> cat_rename(c, b='B', d='D')
    ['a', 'B', 'c', 'D']
    Categories (4, object): ['B' < 'a' < 'c' < 'D']

    Remove categories by setting them to ``None``.

    >>> cat_rename(c, b='B', d=None)
    ['a', 'B', 'c']
    Categories (3, object): ['B' < 'a' < 'c']
    """
    c = as_categorical(c)
    if mapping is not None and len(kwargs):
        raise ValueError("Use only one of `new` or the ``kwargs``.")

    lookup = mapping or kwargs

    if not lookup:
        return c

    # Remove categories set to None
    remove = [
        old
        for old, new in lookup.items()
        if new is None
    ]
    if remove:
        for cat in remove:
            del lookup[cat]
        c = c.remove_categories(remove).dropna()

    # Separately change values (inplace) and the categories (using an
    # array) old to the new names. Then reconcile the two lists.
    categories = c.categories.to_numpy().copy()
    c.add_categories(
        pd.Index(lookup.values()).difference(c.categories),
        inplace=True
    )
    for old, new in lookup.items():
        if old not in c.categories:
            raise IndexError("Unknown category '{}'.".format(old))
        c[c == old] = new
        categories[categories == old] = new

    new_categories = pd.unique(categories)
    c.remove_unused_categories(inplace=True)
    c.set_categories(new_categories, inplace=True)
    return c


[docs]def cat_relabel(c, func=None, *args, **kwargs):
    """
    Change/rename categories and collapse as necessary

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    func : callable
        Function to create the new name. The first argument to
        the function will be a category to be renamed.
    *args : tuple
        Positional arguments passed to ``func``.
    *kwargs : dict
        Keyword arguments passed to ``func``.

    Examples
    --------
    >>> c = list('abcde')
    >>> cat_relabel(c, str.upper)
    ['A', 'B', 'C', 'D', 'E']
    Categories (5, object): ['A', 'B', 'C', 'D', 'E']
    >>> c = pd.Categorical([0, 1, 2, 1, 1, 0])
    >>> def func(x):
    ...     if x == 0:
    ...         return 'low'
    ...     elif x == 1:
    ...         return 'mid'
    ...     elif x == 2:
    ...         return 'high'
    >>> cat_relabel(c, func)
    ['low', 'mid', 'high', 'mid', 'mid', 'low']
    Categories (3, object): ['low', 'mid', 'high']

    When the function yields the same output for 2 or more
    different categories, those categories are collapsed.

    >>> def first(x):
    ...     return x[0]
    >>> c = pd.Categorical(['aA', 'bB', 'aC', 'dD'],
    ...     categories=['bB', 'aA', 'dD', 'aC'],
    ...     ordered=True
    ... )
    >>> cat_relabel(c, first)
    ['a', 'b', 'a', 'd']
    Categories (3, object): ['b' < 'a' < 'd']
    """
    c = as_categorical(c)
    new_categories = [func(x, *args, **kwargs) for x in c.categories]
    new_categories_uniq = pd.unique(new_categories)
    if len(new_categories_uniq) < len(c.categories):
        # Collapse
        lookup = dict(zip(c.categories, new_categories))
        c = pd.Categorical(
            [lookup[value] for value in c],
            categories=new_categories_uniq,
            ordered=c.ordered
        )
    else:
        c.categories = new_categories

    return c


[docs]def cat_expand(c, *args):
    """
    Add additional categories to a categorical

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    *args : tuple
        Categories to add.

    Examples
    --------
    >>> cat_expand(list('abc'), 'd', 'e')
    ['a', 'b', 'c']
    Categories (5, object): ['a', 'b', 'c', 'd', 'e']
    >>> c = pd.Categorical(list('abcd'), ordered=True)
    >>> cat_expand(c, 'e', 'f')
    ['a', 'b', 'c', 'd']
    Categories (6, object): ['a' < 'b' < 'c' < 'd' < 'e' < 'f']
    """
    c = as_categorical(c)
    c.add_categories(
        pd.Index(args).difference(c.categories),
        inplace=True
    )
    return c


[docs]def cat_explicit_na(c, na_category='(missing)'):
    """
    Give missing values an explicity category

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    na_category : object (default: '(missing)')
        Category for missing values

    Examples
    --------
    >>> c = pd.Categorical(
    ...     ['a', 'b', None, 'c', None, 'd', 'd'],
    ...     ordered=True
    ... )
    >>> c
    ['a', 'b', NaN, 'c', NaN, 'd', 'd']
    Categories (4, object): ['a' < 'b' < 'c' < 'd']
    >>> cat_explicit_na(c)
    ['a', 'b', '(missing)', 'c', '(missing)', 'd', 'd']
    Categories (5, object): ['a' < 'b' < 'c' < 'd' < '(missing)']
    """
    c = as_categorical(c)
    bool_idx = pd.isnull(c)
    if any(bool_idx):
        c.add_categories([na_category], inplace=True)
        c[bool_idx] = na_category
    return c


[docs]def cat_remove_unused(c, only=None):
    """
    Remove unused categories

    Parameters
    ----------
    c : list-like
        Values that will make up the categorical.
    only : list-like (optional)
        The categories to remove *if* they are empty. If not given,
        all unused categories are dropped.

    Examples
    --------
    >>> c = pd.Categorical(list('abcdd'), categories=list('bacdefg'))
    >>> c
    ['a', 'b', 'c', 'd', 'd']
    Categories (7, object): ['b', 'a', 'c', 'd', 'e', 'f', 'g']
    >>> cat_remove_unused(c)
    ['a', 'b', 'c', 'd', 'd']
    Categories (4, object): ['b', 'a', 'c', 'd']
    >>> cat_remove_unused(c, only=['a', 'e', 'g'])
    ['a', 'b', 'c', 'd', 'd']
    Categories (5, object): ['b', 'a', 'c', 'd', 'f']
    """
    if not pdtypes.is_categorical_dtype(c):
        # All categories are used
        c = pd.Categorical(c)
        return c
    else:
        c = c.copy()

    if only is None:
        only = c.categories

    used_idx = pd.unique(c.codes)
    used_categories = c.categories[used_idx]
    c = c.remove_categories(
        c.categories
        .difference(used_categories)
        .intersection(only)
    )
    return c


[docs]def cat_unify(cs, categories=None):
    """
    Unify (union of all) the categories in a list of categoricals

    Parameters
    ----------
    cs : list-like
        Categoricals
    categories : list-like
        Extra categories to apply to very categorical.

    Examples
    --------
    >>> c1 = pd.Categorical(['a', 'b'], categories=list('abc'))
    >>> c2 = pd.Categorical(['d', 'e'], categories=list('edf'))
    >>> c1_new, c2_new = cat_unify([c1, c2])
    >>> c1_new
    ['a', 'b']
    Categories (6, object): ['a', 'b', 'c', 'e', 'd', 'f']
    >>> c2_new
    ['d', 'e']
    Categories (6, object): ['a', 'b', 'c', 'e', 'd', 'f']
    >>> c1_new, c2_new = cat_unify([c1, c2], categories=['z', 'y'])
    >>> c1_new
    ['a', 'b']
    Categories (8, object): ['a', 'b', 'c', 'e', 'd', 'f', 'z', 'y']
    >>> c2_new
    ['d', 'e']
    Categories (8, object): ['a', 'b', 'c', 'e', 'd', 'f', 'z', 'y']
    """
    cs = [as_categorical(c) for c in cs]
    all_cats = list(chain(*(c.categories.to_list() for c in cs)))
    if categories is None:
        categories = pd.unique(all_cats)
    else:
        categories = pd.unique(all_cats + categories)

    cs = [c.set_categories(categories) for c in cs]
    return cs


[docs]def cat_concat(*args):
    """
    Concatenate categoricals and combine the categories

    Parameters
    ----------
    *args : tuple
        Categoricals to be concatenated

    Examples
    --------
    >>> c1 = pd.Categorical(['a', 'b'], categories=['b', 'a'])
    >>> c2 = pd.Categorical(['d', 'a', 'c'])
    >>> cat_concat(c1, c2)
    ['a', 'b', 'd', 'a', 'c']
    Categories (4, object): ['b', 'a', 'c', 'd']

    Notes
    -----
    The resulting category is not ordered.
    """
    categories = pd.unique(list(chain(*(
        c.categories if pdtypes.is_categorical_dtype(c) else c
        for c in args
    ))))
    cs = pd.Categorical(
        list(chain(*(c for c in args))),
        categories=categories
    )
    return cs


[docs]def cat_zip(*args, sep=':', keep_empty=False):
    """
    Create a new categorical (zip style) combined from two or more

    Parameters
    ----------
    *args : tuple
        Categoricals to be concatenated.
    sep : str (default: ':')
        Separator for the combined categories.
    keep_empty : bool (default: False)
        If ``True``, include all combinations of categories
        even those without observations.

    Examples
    --------
    >>> c1 = pd.Categorical(list('aba'))
    >>> c2 = pd.Categorical(list('122'))
    >>> cat_zip(c1, c2)
    ['a:1', 'b:2', 'a:2']
    Categories (3, object): ['a:1', 'a:2', 'b:2']
    >>> cat_zip(c1, c2, keep_empty=True)
    ['a:1', 'b:2', 'a:2']
    Categories (4, object): ['a:1', 'a:2', 'b:1', 'b:2']
    """
    values = [sep.join(items) for items in zip(*args)]
    cs = [
        c if pdtypes.is_categorical_dtype(c) else pd.Categorical(c)
        for c in args
    ]
    categories = [
        sep.join(items)
        for items in product(*(c.categories for c in cs))
    ]

    c = pd.Categorical(values, categories=categories)

    if not keep_empty:
        c.remove_unused_categories(inplace=True)

    return c


# helpers
def as_categorical(c, copy=True):
    """
    Convert input to a categorical

    Parameters
    ----------
    c : categorical_like
        Sequence of objects
    copy : bool
        If `True` and c is alread a categorical, return
        a copy of `c` otherwise return `c`.

    Returns
    -------
    out : categorical
        Categorical made out of `c` or copy of `c`
        if it was a categorical

    """
    if not pdtypes.is_categorical_dtype(c):
        c = pd.Categorical(c)
    elif copy:
        c = c.copy()
    return c


# Temporary functions

def _stable_series_sort(ser, ascending):
    """
    Stable sort for pandas series

    Temporary Solution until
        https://github.com/pandas-dev/pandas/issues/28697
        https://github.com/pandas-dev/pandas/pull/28698
    are resolved
    """
    from pandas.core.sorting import nargsort
    values = ser._values
    indexer = nargsort(
        values, kind='mergesort', ascending=ascending, na_position='last')
    return pd.Series(values[indexer], index=ser.index[indexer])


cat_relevel = cat_move
cat_recode = cat_rename
cat_drop = cat_remove_unused