Source code for libtsm.pitchshift

"""
Description: libtsm pitch-shifting functions
Contributors: Sebastian Rosenzweig, Simon Schwär, Jonathan Driedger, Meinard Müller
License: The MIT license, https://opensource.org/licenses/MIT
This file is part of libtsm (https://www.audiolabs-erlangen.de/resources/MIR/2021-DAFX-AdaptivePitchShifting)
"""

import numpy as np
import scipy as sc
import scipy.signal
import scipy.interpolate
from fractions import Fraction as frac
from .tsm import hps_tsm
from .utils import normalize_length


[docs]def pitch_shift_original(x, n, Fs=22050) -> np.ndarray:
    """
    Pitch modification algorithm via time-scale modification. The input signal is stretched via TSM and then resampled.
    The code closely follows the Matlab implementation.

    Parameters
    ----------
    x : np.ndarray [shape=(N, )], real - valued
        Signal to be transformed

    n : int
        Amount of pitch shifting to be applied, given in cents. Positive n indicates pitch rising, negative n a pitch
        lowering

    Fs : int
        Sampling rate of the input audio signal x

    Returns
    -------
    y : np.ndarray [shape=(L,1)], real - valued
        The time-scale modified output signal
    """

    if len(x.shape) == 1:
        x = x.reshape(-1, 1)

    alpha = np.power(np.power(2, 1 / 12), (n / 100))
    y_tsm = hps_tsm(x, alpha, Fs=Fs)
    const = Fs / np.around(alpha * Fs)
    f = frac(str(const)).limit_denominator(100)
    p = f.numerator
    q = f.denominator
    x_f = sc.signal.resample_poly(y_tsm, int(p), int(q), axis=0)  # deviations from Matlab's resample()
    y = normalize_length(x_f, x.shape[0])

    return y


[docs]def pitch_shift(x, p, t_p=None, Fs=22050, order="res-tsm", **kwargs) -> np.ndarray:
    """
    (Non-linear) pitch-shifting via time-scale modification and resampling.

    Parameters
    ----------
    x : np.ndarray [shape=(N, )], real - valued
        Signal to be transformed

    p : float or np.ndarray [shape=(M,)], real - valued
        Amount of pitch shifting to be applied, given in cents. Positive p indicates pitch rising, negative p a pitch
        lowering.

    t_p : np.ndarray [shape=(M,)], real - valued
        Array of time instances in seconds for adaptive pitch shifting, same length as p. If t==None, a fixed
        pitch-shift is assumed.

    Fs : int
        Sampling rate of the input audio signal x

    order : Order of TSM and resampling, either "res-tsm" or "tsm-res".

    **kwargs : Parameters for hps_tsm

    Returns
    -------
    y : np.ndarray [shape=(L,1)], real - valued
        The time-scale modified output signal
    """

    if len(x.shape) == 1:
        x = x.reshape(-1, 1)

    t_x = np.linspace(0, (len(x) - 1) / Fs, len(x))

    if not np.isscalar(p):
        if t_p is None:
            raise Exception("t must be specified if p is an array!")
        if len(p) != len(t_p):
            raise Exception("t must have the same length as p!")
        if t_p[0] != 0:  # time axis should start with 0
            t_p = t_p.astype(float)
            t_p = np.insert(t_p, 0, 0)
            p = np.insert(p, 0, 0)
        if t_p[-1] != t_x[-1]:  # time axis should end with the last time instance
            t_p = t_p.astype(float)
            t_p = np.insert(t_p, len(t_p), t_x[-1])
            p = np.insert(p, len(p), 0)

    # account for sign change when order of resampling and TSM is exchanged
    if order == "res-tsm":
        alpha = 2 ** (-p / 1200)
    elif order == "tsm-res":
        alpha = 2 ** (p / 1200)
    else:
        raise Exception("Order must be either res-tsm or tsm-res!")

    # convert pitch shift in cents to (non-linear) time-stretch function tau
    if np.isscalar(p):
        tau = np.array([[0, 0], [x.shape[0] - 1, x.shape[0] * alpha - 1]]) / Fs  # given in seconds
    else:
        # compute tau
        tau = np.zeros((len(alpha), 2))
        tau[:, 0] = t_p

        for i in range(1, len(alpha)):
            dt = tau[i, 0] - tau[i - 1, 0]
            tau[i, 1] = dt * alpha[i-1] + tau[i - 1, 1]

    # Pitch-shifting
    if order == "res-tsm":
        # (Non-linear) Resampling
        fi = sc.interpolate.interp1d(tau[:, 0], tau[:, 1], kind='linear', fill_value="extrapolate")
        time_input = fi(t_x)
        fi = sc.interpolate.interp1d(time_input, x[:, 0], kind='cubic', fill_value="extrapolate")
        t_res = np.arange(0, tau[-1, 1] + 1 / Fs, 1 / Fs)
        y_ps = fi(t_res)

        tau_inv = np.hstack((time_input.reshape(-1, 1), t_x.reshape(-1, 1)))
        anchor_points = np.ceil(tau_inv * Fs).astype(int)
        anchor_points = np.flip(anchor_points, axis=0)
        anchor_points = anchor_points[np.unique(anchor_points[:, 0],
                                                return_index=True)[1], :]  # only keep unique indices

        # Time-Scale Modification
        y_ps = hps_tsm(y_ps, anchor_points, Fs=Fs, **kwargs)

    elif order == "tsm-res":
        # compute anchor points
        anchor_points = np.ceil(tau * Fs).astype(int)
        anchor_points = anchor_points[np.unique(anchor_points[:, 1],
                                                return_index=True)[1], :]  # only keep unique indices

        # Time-Scale Modification
        y_tsm = hps_tsm(x, anchor_points, Fs=Fs, **kwargs)

        # (Non-linear) resampling
        time_output = np.linspace(0, (y_tsm.shape[0] - 1) / Fs, y_tsm.shape[0])
        fi = sc.interpolate.interp1d(tau[:, 1], tau[:, 0], kind='linear', fill_value="extrapolate")
        time_input = fi(time_output)
        fi = sc.interpolate.interp1d(time_input, y_tsm[:, 0], kind='cubic', fill_value="extrapolate")
        y_ps = fi(t_x)

    # crop if pitch-shifted signal is longer than x
    y_ps = y_ps.reshape(-1, 1)[:len(x), :]

    return y_ps