Source code for libfmp.c8.c8s3_nmf

"""
Module: libfmp.c8.c8s3_nmf
Author: Meinard Müller, Tim Zunner
License: The MIT license, https://opensource.org/licenses/MIT

This file is part of the FMP Notebooks (https://www.audiolabs-erlangen.de/FMP)
"""

import numpy as np
import matplotlib.pyplot as plt
from numba import jit


[docs]@jit(nopython=True) def nmf(V, R, thresh=0.001, L=1000, W=None, H=None, norm=False, report=False): """NMF algorithm with Euclidean distance Notebook: C8/C8S3_NMFbasic.ipynb Args: V (np.ndarray): Nonnegative matrix of size K x N R (int): Rank parameter thresh (float): Threshold used as stop criterion (Default value = 0.001) L (int): Maximal number of iteration (Default value = 1000) W (np.ndarray): Nonnegative matrix of size K x R used for initialization (Default value = None) H (np.ndarray): Nonnegative matrix of size R x N used for initialization (Default value = None) norm (bool): Applies max-normalization of columns of final W (Default value = False) report (bool): Reports errors during runtime (Default value = False) Returns: W (np.ndarray): Nonnegative matrix of size K x R H (np.ndarray): Nonnegative matrix of size R x N V_approx (np.ndarray): Nonnegative matrix W*H of size K x N V_approx_err (float): Error between V and V_approx H_W_error (np.ndarray): History of errors of subsequent H and W matrices """ K = V.shape[0] N = V.shape[1] if W is None: W = np.random.rand(K, R) if H is None: H = np.random.rand(R, N) V = V.astype(np.float64) W = W.astype(np.float64) H = H.astype(np.float64) H_W_error = np.zeros((2, L)) ell = 1 below_thresh = False eps_machine = np.finfo(np.float32).eps while not below_thresh and ell <= L: H_ell = H W_ell = W H = H * (W.transpose().dot(V) / (W.transpose().dot(W).dot(H) + eps_machine)) W = W * (V.dot(H.transpose()) / (W.dot(H).dot(H.transpose()) + eps_machine)) # H+1 = H *p ((W^T * V) / p (W^T * W * H)) # H = np.multiply(H, np.divide(np.matmul(np.transpose(W), V), # np.matmul(np.matmul(np.transpose(W), W), H))) # W+1 = W *p ((V * H^T) / p (W * H * H^T)) # W = np.multiply(W, np.divide(np.matmul(V, np.transpose(H)), # np.matmul(np.matmul(W, H), np.transpose(H)))) H_error = np.linalg.norm(H-H_ell, ord=2) W_error = np.linalg.norm(W - W_ell, ord=2) H_W_error[:, ell-1] = [H_error, W_error] if report: print('Iteration: ', ell, ', H_error: ', H_error, ', W_error: ', W_error) if H_error < thresh and W_error < thresh: below_thresh = True H_W_error = H_W_error[:, 0:ell] ell += 1 if norm: for r in range(R): v_max = np.max(W[:, r]) if v_max > 0: W[:, r] = W[:, r] / v_max H[r, :] = H[r, :] * v_max V_approx = W.dot(H) V_approx_err = np.linalg.norm(V-V_approx, ord=2) return W, H, V_approx, V_approx_err, H_W_error
[docs]def plot_nmf_factors(W, H, V, Fs, N_fft, H_fft, freq_max, label_pitch=None, title_W='W', title_H='H', title_V='V', figsize=(13, 3)): """Plots the factore of an NMF-based spectral decomposition Notebook: C8/C8S3_NMFSpecFac.ipynb Args: W: Template matrix H: Activation matrix V: Reconstructed input matrix Fs: Sampling frequency N_fft: FFT length H_fft: Hopsize freq_max: Maximum frequency to be plotted label_pitch: Labels for the different pitches (Default value = None) title_W: Title for imshow of matrix W (Default value = 'W') title_H: Title for imshow of matrix H (Default value = 'H') title_V: Title for imshow of matrix V (Default value = 'V') figsize: Size of the figure (Default value = (13, 3)) """ R = W.shape[1] N = H.shape[1] # cmap = libfmp.b.compressed_gray_cmap(alpha=5) cmap = 'gray_r' dur_sec = (N-1) * H_fft / Fs if label_pitch is None: label_pitch = np.arange(R) plt.figure(figsize=figsize) plt.subplot(131) plt.imshow(W, cmap=cmap, origin='lower', aspect='auto', extent=[0, R, 0, Fs/2], interpolation='nearest') plt.ylim([0, freq_max]) plt.colorbar() plt.xticks(np.arange(R) + 0.5, label_pitch) plt.xlabel('Pitch') plt.ylabel('Frequency (Hz)') plt.title(title_W) plt.subplot(132) plt.imshow(H, cmap=cmap, origin='lower', aspect='auto', extent=[0, dur_sec, 0, R], interpolation='nearest') plt.colorbar() plt.yticks(np.arange(R) + 0.5, label_pitch) plt.xlabel('Time (seconds)') plt.ylabel('Pitch') plt.title(title_H) plt.subplot(133) plt.imshow(V, cmap=cmap, origin='lower', aspect='auto', extent=[0, dur_sec, 0, Fs/2], interpolation='nearest') plt.ylim([0, freq_max]) plt.colorbar() plt.xlabel('Time (seconds)') plt.ylabel('Frequency (Hz)') plt.title(title_V) plt.tight_layout() plt.show()
[docs]def pitch_from_annotation(annotation): """Extract set of occurring pitches from annotation Notebook: C8/C8S3_NMFSpecFac.ipynb Args: annotation (list): Annotation data Returns: pitch_set (np.ndarray): Set of occurring pitches """ pitch_all = np.array([c[2] for c in annotation]) pitch_set = np.unique(pitch_all) return pitch_set
[docs]def template_pitch(K, pitch, freq_res, tol_pitch=0.05): """Defines spectral template for a given pitch Notebook: C8/C8S3_NMFSpecFac.ipynb Args: K (int): Number of frequency points pitch (float): Fundamental pitch freq_res (float): Frequency resolution tol_pitch (float): Relative frequency tolerance for the harmonics (Default value = 0.05) Returns: template (np.ndarray): Nonnegative template vector of size K """ max_freq = K * freq_res pitch_freq = 2**((pitch - 69) / 12) * 440 max_order = int(np.ceil(max_freq / ((1 - tol_pitch) * pitch_freq))) template = np.zeros(K) for m in range(1, max_order + 1): min_idx = max(0, int((1 - tol_pitch) * m * pitch_freq / freq_res)) max_idx = min(K-1, int((1 + tol_pitch) * m * pitch_freq / freq_res)) template[min_idx:max_idx+1] = 1 / m return template
[docs]def init_nmf_template_pitch(K, pitch_set, freq_res, tol_pitch=0.05): """Initializes template matrix for a given set of pitches Notebook: C8/C8S3_NMFSpecFac.ipynb Args: K (int): Number of frequency points pitch_set (np.ndarray): Set of fundamental pitches freq_res (float): Frequency resolution tol_pitch (float): Relative frequency tolerance for the harmonics (Default value = 0.05) Returns: W (np.ndarray): Nonnegative matrix of size K x R with R = len(pitch_set) """ R = len(pitch_set) W = np.zeros((K, R)) for r in range(R): W[:, r] = template_pitch(K, pitch_set[r], freq_res, tol_pitch=tol_pitch) return W
[docs]def init_nmf_activation_score(N, annotation, frame_res, tol_note=[0.2, 0.5], pitch_set=None): """Initializes activation matrix for given score annotations Notebook: C8/C8S3_NMFSpecFac.ipynb Args: N (int): Number of frames annotation (list): Annotation data frame_res (time): Time resolution tol_note (list or np.ndarray): Tolerance (seconds) for beginning and end of a note (Default value = [0.2, 0.5]) pitch_set (np.ndarray): Set of occurring pitches (Default value = None) Returns: H (np.ndarray): Nonnegative matrix of size R x N pitch_set (np.ndarray): Set of occurring pitches """ note_start = np.array([c[0] for c in annotation]) note_dur = np.array([c[1] for c in annotation]) pitch_all = np.array([c[2] for c in annotation]) if pitch_set is None: pitch_set = np.unique(pitch_all) R = len(pitch_set) H = np.zeros((R, N)) for i in range(len(note_start)): start_idx = max(0, int((note_start[i] - tol_note[0]) / frame_res)) end_idx = min(N, int((note_start[i] + note_dur[i] + tol_note[1]) / frame_res)) pitch_idx = np.argwhere(pitch_set == pitch_all[i]) H[pitch_idx, start_idx:end_idx] = 1 return H, pitch_set
[docs]def init_nmf_template_pitch_onset(K, pitch_set, freq_res, tol_pitch=0.05): """Initializes template matrix with onsets for a given set of pitches Notebook: C8/C8S3_NMFSpecFac.ipynb Args: K (int): Number of frequency points pitch_set (np.ndarray): Set of fundamental pitches freq_res (float): Frequency resolution tol_pitch (float): Relative frequency tolerance for the harmonics (Default value = 0.05) Returns: W (np.ndarray): Nonnegative matrix of size K x (2R) with R = len(pitch_set) """ R = len(pitch_set) W = np.zeros((K, 2*R)) for r in range(R): W[:, 2*r] = 0.1 W[:, 2*r+1] = template_pitch(K, pitch_set[r], freq_res, tol_pitch=tol_pitch) return W
[docs]def init_nmf_activation_score_onset(N, annotation, frame_res, tol_note=[0.2, 0.5], tol_onset=[0.3, 0.1], pitch_set=None): """Initializes activation matrix with onsets for given score annotations Notebook: C8/C8S3_NMFSpecFac.ipynb Args: N (int): Number of frames annotation (list): Annotation data frame_res (float): Time resolution tol_note (list or np.ndarray): Tolerance (seconds) for beginning and end of a note (Default value = [0.2, 0.5]) tol_onset (list or np.ndarray): Tolerance (seconds) for beginning and end of an onset (Default value = [0.3, 0.1]) pitch_set (np.ndarray): Set of occurring pitches (Default value = None) Returns: H (np.ndarray): Nonnegative matrix of size (2R) x N pitch_set (np.ndarray): Set of occurring pitches label_pitch (np.ndarray): Pitch labels for the templates """ note_start = np.array([c[0] for c in annotation]) note_dur = np.array([c[1] for c in annotation]) pitch_all = np.array([c[2] for c in annotation]) if pitch_set is None: pitch_set = np.unique(pitch_all) R = len(pitch_set) H = np.zeros((2*R, N)) for i in range(len(note_start)): start_idx = max(0, int((note_start[i] - tol_note[0]) / frame_res)) end_idx = min(N, int((note_start[i] + note_dur[i] + tol_note[1]) / frame_res)) start_onset_idx = max(0, int((note_start[i] - tol_onset[0]) / frame_res)) end_onset_idx = min(N, int((note_start[i] + tol_onset[1]) / frame_res)) pitch_idx = np.argwhere(pitch_set == pitch_all[i]) H[2*pitch_idx, start_onset_idx:end_onset_idx] = 1 H[2*pitch_idx+1, start_idx:end_idx] = 1 label_pitch = np.zeros(2*len(pitch_set), dtype=int) for k in range(len(pitch_set)): label_pitch[2*k] = pitch_set[k] label_pitch[2*k+1] = pitch_set[k] return H, pitch_set, label_pitch
[docs]def split_annotation_lh_rh(ann): """Splitting of the annotation data in left and right hand Notebook: C8/C8S3_NMFAudioDecomp.ipynb Args: ann (list): Annotation data Returns: ann_lh (list): Annotation data for left hand ann_rh (list): Annotation data for right hand """ ann_lh = [] ann_rh = [] for a in ann: if a[4] == 'lh': ann_lh = ann_lh + [a] if a[4] == 'rh': ann_rh = ann_rh + [a] return ann_lh, ann_rh