Source code for libfmp.c5.c5s2_chord_rec_template

"""
Module: libfmp.c5.c5s2_chord_rec_template
Author: Meinard Müller, Christof Weiss
License: The MIT license, https://opensource.org/licenses/MIT

This file is part of the FMP Notebooks (https://www.audiolabs-erlangen.de/FMP)
"""

import copy
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import colors
import librosa

import libfmp.c3
import libfmp.c4


[docs]def compute_chromagram_from_filename(fn_wav, Fs=22050, N=4096, H=2048, gamma=None, version='STFT', norm='2'): """Compute chromagram for WAV file specified by filename Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: fn_wav (str): Filenname of WAV Fs (scalar): Sampling rate (Default value = 22050) N (int): Window size (Default value = 4096) H (int): Hop size (Default value = 2048) gamma (float): Constant for logarithmic compression (Default value = None) version (str): Technique used for front-end decomposition ('STFT', 'IIS', 'CQT') (Default value = 'STFT') norm (str): If not 'None', chroma vectors are normalized by norm as specified ('1', '2', 'max') (Default value = '2') Returns: X (np.ndarray): Chromagram Fs_X (scalar): Feature reate of chromagram x (np.ndarray): Audio signal Fs (scalar): Sampling rate of audio signal x_dur (float): Duration (seconds) of audio signal """ x, Fs = librosa.load(fn_wav, sr=Fs) x_dur = x.shape[0] / Fs if version == 'STFT': # Compute chroma features with STFT X = librosa.stft(x, n_fft=N, hop_length=H, pad_mode='constant', center=True) if gamma is not None: X = np.log(1 + gamma * np.abs(X) ** 2) else: X = np.abs(X) ** 2 X = librosa.feature.chroma_stft(S=X, sr=Fs, tuning=0, norm=None, hop_length=H, n_fft=N) if version == 'CQT': # Compute chroma features with CQT decomposition X = librosa.feature.chroma_cqt(y=x, sr=Fs, hop_length=H, norm=None) if version == 'IIR': # Compute chroma features with filter bank (using IIR elliptic filter) X = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0) if gamma is not None: X = np.log(1.0 + gamma * X) X = librosa.feature.chroma_cqt(C=X, bins_per_octave=12, n_octaves=7, fmin=librosa.midi_to_hz(24), norm=None) if norm is not None: X = libfmp.c3.normalize_feature_sequence(X, norm=norm) Fs_X = Fs / H return X, Fs_X, x, Fs, x_dur
[docs]def plot_chromagram_annotation(ax, X, Fs_X, ann, color_ann, x_dur, cmap='gray_r', title=''): """Plot chromagram and annotation Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: ax: Axes handle X: Feature representation Fs_X: Feature rate ann: Annotations color_ann: Color for annotations x_dur: Duration of feature representation cmap: Color map for imshow (Default value = 'gray_r') title: Title for figure (Default value = '') """ libfmp.b.plot_chromagram(X, Fs=Fs_X, ax=ax, chroma_yticks=[0, 4, 7, 11], clim=[0, 1], cmap=cmap, title=title, ylabel='Chroma', colorbar=True) libfmp.b.plot_segments_overlay(ann, ax=ax[0], time_max=x_dur, print_labels=False, colors=color_ann, alpha=0.1)
[docs]def get_chord_labels(ext_minor='m', nonchord=False): """Generate chord labels for major and minor triads (and possibly nonchord label) Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: ext_minor (str): Extension for minor chords (Default value = 'm') nonchord (bool): If "True" then add nonchord label (Default value = False) Returns: chord_labels (list): List of chord labels """ chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] chord_labels_maj = chroma_labels chord_labels_min = [s + ext_minor for s in chroma_labels] chord_labels = chord_labels_maj + chord_labels_min if nonchord is True: chord_labels = chord_labels + ['N'] return chord_labels
[docs]def generate_chord_templates(nonchord=False): """Generate chord templates of major and minor triads (and possibly nonchord) Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: nonchord (bool): If "True" then add nonchord template (Default value = False) Returns: chord_templates (np.ndarray): Matrix containing chord_templates as columns """ template_cmaj = np.array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]).T template_cmin = np.array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]).T num_chord = 24 if nonchord: num_chord = 25 chord_templates = np.ones((12, num_chord)) for shift in range(12): chord_templates[:, shift] = np.roll(template_cmaj, shift) chord_templates[:, shift+12] = np.roll(template_cmin, shift) return chord_templates
[docs]def chord_recognition_template(X, norm_sim='1', nonchord=False): """Conducts template-based chord recognition with major and minor triads (and possibly nonchord) Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: X (np.ndarray): Chromagram norm_sim (str): Specifies norm used for normalizing chord similarity matrix (Default value = '1') nonchord (bool): If "True" then add nonchord template (Default value = False) Returns: chord_sim (np.ndarray): Chord similarity matrix chord_max (np.ndarray): Binarized chord similarity matrix only containing maximizing chord """ chord_templates = generate_chord_templates(nonchord=nonchord) X_norm = libfmp.c3.normalize_feature_sequence(X, norm='2') chord_templates_norm = libfmp.c3.normalize_feature_sequence(chord_templates, norm='2') chord_sim = np.matmul(chord_templates_norm.T, X_norm) if norm_sim is not None: chord_sim = libfmp.c3.normalize_feature_sequence(chord_sim, norm=norm_sim) # chord_max = (chord_sim == chord_sim.max(axis=0)).astype(int) chord_max_index = np.argmax(chord_sim, axis=0) chord_max = np.zeros(chord_sim.shape).astype(np.int32) for n in range(chord_sim.shape[1]): chord_max[chord_max_index[n], n] = 1 return chord_sim, chord_max
[docs]def convert_chord_label(ann): """Replace for segment-based annotation in each chord label the string ':min' by 'm' and convert flat chords into sharp chords using enharmonic equivalence Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: ann (list): Segment-based annotation with chord labels Returns: ann_conv (list): Converted segment-based annotation with chord labels """ ann_conv = copy.deepcopy(ann) for k in range(len(ann)): ann_conv[k][2] = ann_conv[k][2].replace(':min', 'm') ann_conv[k][2] = ann_conv[k][2].replace('Db', 'C#') ann_conv[k][2] = ann_conv[k][2].replace('Eb', 'D#') ann_conv[k][2] = ann_conv[k][2].replace('Gb', 'F#') ann_conv[k][2] = ann_conv[k][2].replace('Ab', 'G#') ann_conv[k][2] = ann_conv[k][2].replace('Bb', 'A#') return ann_conv
[docs]def convert_sequence_ann(seq, Fs=1): """Convert label sequence into segment-based annotation Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: seq (list): Label sequence Fs (scalar): Feature rate (Default value = 1) Returns: ann (list): Segment-based annotation for label sequence """ ann = [] for m in range(len(seq)): ann.append([(m-0.5) / Fs, (m+0.5) / Fs, seq[m]]) return ann
[docs]def convert_chord_ann_matrix(fn_ann, chord_labels, Fs=1, N=None, last=False): """Convert segment-based chord annotation into various formats Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: fn_ann (str): Filename of segment-based chord annotation chord_labels (list): List of chord labels Fs (scalar): Feature rate (Default value = 1) N (int): Number of frames to be generated (by cutting or extending). Only enforced for ann_matrix, ann_frame, ann_seg_frame (Default value = None) last (bool): If 'True' uses for extension last chord label, otherwise uses nonchord label 'N' (Default value = False) Returns: ann_matrix (np.ndarray): Encoding of label sequence in form of a binary time-chord representation ann_frame (list): Label sequence (specified on the frame level) ann_seg_frame (list): Encoding of label sequence as segment-based annotation (given in indices) ann_seg_ind (list): Segment-based annotation with segments (given in indices) ann_seg_sec (list): Segment-based annotation with segments (given in seconds) """ ann_seg_sec, _ = libfmp.c4.read_structure_annotation(fn_ann) ann_seg_sec = convert_chord_label(ann_seg_sec) ann_seg_ind, _ = libfmp.c4.read_structure_annotation(fn_ann, Fs=Fs, index=True) ann_seg_ind = convert_chord_label(ann_seg_ind) ann_frame = libfmp.c4.convert_ann_to_seq_label(ann_seg_ind) if N is None: N = len(ann_frame) if N < len(ann_frame): ann_frame = ann_frame[:N] if N > len(ann_frame): if last: pad_symbol = ann_frame[-1] else: pad_symbol = 'N' ann_frame = ann_frame + [pad_symbol] * (N-len(ann_frame)) ann_seg_frame = convert_sequence_ann(ann_frame, Fs=1) num_chords = len(chord_labels) ann_matrix = np.zeros((num_chords, N)) for n in range(N): label = ann_frame[n] # Generates a one-entry only for labels that are contained in "chord_labels" if label in chord_labels: label_index = chord_labels.index(label) ann_matrix[label_index, n] = 1 return ann_matrix, ann_frame, ann_seg_frame, ann_seg_ind, ann_seg_sec
[docs]def compute_eval_measures(I_ref, I_est): """Compute evaluation measures including precision, recall, and F-measure Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: I_ref (np.ndarray): Reference set of items I_est (np.ndarray): Set of estimated items Returns: P (float): Precision R (float): Recall F (float): F-measure num_TP (int): Number of true positives num_FN (int): Number of false negatives num_FP (int): Number of false positives """ assert I_ref.shape == I_est.shape, "Dimension of input matrices must agree" TP = np.sum(np.logical_and(I_ref, I_est)) FP = np.sum(I_est > 0, axis=None) - TP FN = np.sum(I_ref > 0, axis=None) - TP P = 0 R = 0 F = 0 if TP > 0: P = TP / (TP + FP) R = TP / (TP + FN) F = 2 * P * R / (P + R) return P, R, F, TP, FP, FN
[docs]def plot_matrix_chord_eval(I_ref, I_est, Fs=1, xlabel='Time (seconds)', ylabel='Chord', title='', chord_labels=None, ax=None, grid=True, figsize=(9, 3.5)): """Plots TP-, FP-, and FN-items in a color-coded form in time–chord grid Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: I_ref: Reference set of items I_est: Set of estimated items Fs: Feature rate (Default value = 1) xlabel: Label for x-axis (Default value = 'Time (seconds)') ylabel: Label for y-axis (Default value = 'Chord') title: Title of figure (Default value = '') chord_labels: List of chord labels used for vertical axis (Default value = None) ax: Array of axes (Default value = None) grid: If "True" the plot grid (Default value = True) figsize: Size of figure (if axes are not specified) (Default value = (9, 3.5)) Returns: fig: The created matplotlib figure or None if ax was given. ax: The used axes im: The image plot """ fig = None if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) ax = [ax] I_TP = np.sum(np.logical_and(I_ref, I_est)) I_FP = I_est - I_TP I_FN = I_ref - I_TP I_vis = 3 * I_TP + 2 * I_FN + 1 * I_FP eval_cmap = colors.ListedColormap([[1, 1, 1], [1, 0.3, 0.3], [1, 0.7, 0.7], [0, 0, 0]]) eval_bounds = np.array([0, 1, 2, 3, 4])-0.5 eval_norm = colors.BoundaryNorm(eval_bounds, 4) eval_ticks = [0, 1, 2, 3] T_coef = np.arange(I_vis.shape[1]) / Fs F_coef = np.arange(I_vis.shape[0]) x_ext1 = (T_coef[1] - T_coef[0]) / 2 x_ext2 = (T_coef[-1] - T_coef[-2]) / 2 y_ext1 = (F_coef[1] - F_coef[0]) / 2 y_ext2 = (F_coef[-1] - F_coef[-2]) / 2 extent = [T_coef[0] - x_ext1, T_coef[-1] + x_ext2, F_coef[0] - y_ext1, F_coef[-1] + y_ext2] im = ax[0].imshow(I_vis, origin='lower', aspect='auto', cmap=eval_cmap, norm=eval_norm, extent=extent, interpolation='nearest') if len(ax) == 2: cbar = plt.colorbar(im, cax=ax[1], cmap=eval_cmap, norm=eval_norm, boundaries=eval_bounds, ticks=eval_ticks) elif len(ax) == 1: plt.sca(ax[0]) cbar = plt.colorbar(im, cmap=eval_cmap, norm=eval_norm, boundaries=eval_bounds, ticks=eval_ticks) cbar.ax.set_yticklabels(['TN', 'FP', 'FN', 'TP']) ax[0].set_xlabel(xlabel) ax[0].set_ylabel(ylabel) ax[0].set_title(title) if chord_labels is not None: ax[0].set_yticks(np.arange(len(chord_labels))) ax[0].set_yticklabels(chord_labels) if grid is True: ax[0].grid() return fig, ax, im