Module: libfmp.c5.c5s2_chord_rec_template
Author: Meinard Müller, Christof Weiss
License: The MIT license,

This file is part of the FMP Notebooks

import copy
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import colors
import librosa

import libfmp.c3
import libfmp.c4

[docs]def compute_chromagram_from_filename(fn_wav, Fs=22050, N=4096, H=2048, gamma=None, version='STFT', norm='2'): """Compute chromagram for WAV file specified by filename Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: fn_wav (str): Filenname of WAV Fs (scalar): Sampling rate (Default value = 22050) N (int): Window size (Default value = 4096) H (int): Hop size (Default value = 2048) gamma (float): Constant for logarithmic compression (Default value = None) version (str): Technique used for front-end decomposition ('STFT', 'IIS', 'CQT') (Default value = 'STFT') norm (str): If not 'None', chroma vectors are normalized by norm as specified ('1', '2', 'max') (Default value = '2') Returns: X (np.ndarray): Chromagram Fs_X (scalar): Feature reate of chromagram x (np.ndarray): Audio signal Fs (scalar): Sampling rate of audio signal x_dur (float): Duration (seconds) of audio signal """ x, Fs = librosa.load(fn_wav, sr=Fs) x_dur = x.shape[0] / Fs if version == 'STFT': # Compute chroma features with STFT X = librosa.stft(x, n_fft=N, hop_length=H, pad_mode='constant', center=True) if gamma is not None: X = np.log(1 + gamma * np.abs(X) ** 2) else: X = np.abs(X) ** 2 X = librosa.feature.chroma_stft(S=X, sr=Fs, tuning=0, norm=None, hop_length=H, n_fft=N) if version == 'CQT': # Compute chroma features with CQT decomposition X = librosa.feature.chroma_cqt(y=x, sr=Fs, hop_length=H, norm=None) if version == 'IIR': # Compute chroma features with filter bank (using IIR elliptic filter) X = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0) if gamma is not None: X = np.log(1.0 + gamma * X) X = librosa.feature.chroma_cqt(C=X, bins_per_octave=12, n_octaves=7, fmin=librosa.midi_to_hz(24), norm=None) if norm is not None: X = libfmp.c3.normalize_feature_sequence(X, norm=norm) Fs_X = Fs / H return X, Fs_X, x, Fs, x_dur
[docs]def plot_chromagram_annotation(ax, X, Fs_X, ann, color_ann, x_dur, cmap='gray_r', title=''): """Plot chromagram and annotation Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: ax: Axes handle X: Feature representation Fs_X: Feature rate ann: Annotations color_ann: Color for annotations x_dur: Duration of feature representation cmap: Color map for imshow (Default value = 'gray_r') title: Title for figure (Default value = '') """ libfmp.b.plot_chromagram(X, Fs=Fs_X, ax=ax, chroma_yticks=[0, 4, 7, 11], clim=[0, 1], cmap=cmap, title=title, ylabel='Chroma', colorbar=True) libfmp.b.plot_segments_overlay(ann, ax=ax[0], time_max=x_dur, print_labels=False, colors=color_ann, alpha=0.1)
[docs]def get_chord_labels(ext_minor='m', nonchord=False): """Generate chord labels for major and minor triads (and possibly nonchord label) Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: ext_minor (str): Extension for minor chords (Default value = 'm') nonchord (bool): If "True" then add nonchord label (Default value = False) Returns: chord_labels (list): List of chord labels """ chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'] chord_labels_maj = chroma_labels chord_labels_min = [s + ext_minor for s in chroma_labels] chord_labels = chord_labels_maj + chord_labels_min if nonchord is True: chord_labels = chord_labels + ['N'] return chord_labels
[docs]def generate_chord_templates(nonchord=False): """Generate chord templates of major and minor triads (and possibly nonchord) Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: nonchord (bool): If "True" then add nonchord template (Default value = False) Returns: chord_templates (np.ndarray): Matrix containing chord_templates as columns """ template_cmaj = np.array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]).T template_cmin = np.array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]).T num_chord = 24 if nonchord: num_chord = 25 chord_templates = np.ones((12, num_chord)) for shift in range(12): chord_templates[:, shift] = np.roll(template_cmaj, shift) chord_templates[:, shift+12] = np.roll(template_cmin, shift) return chord_templates
[docs]def chord_recognition_template(X, norm_sim='1', nonchord=False): """Conducts template-based chord recognition with major and minor triads (and possibly nonchord) Notebook: C5/C5S2_ChordRec_Templates.ipynb Args: X (np.ndarray): Chromagram norm_sim (str): Specifies norm used for normalizing chord similarity matrix (Default value = '1') nonchord (bool): If "True" then add nonchord template (Default value = False) Returns: chord_sim (np.ndarray): Chord similarity matrix chord_max (np.ndarray): Binarized chord similarity matrix only containing maximizing chord """ chord_templates = generate_chord_templates(nonchord=nonchord) X_norm = libfmp.c3.normalize_feature_sequence(X, norm='2') chord_templates_norm = libfmp.c3.normalize_feature_sequence(chord_templates, norm='2') chord_sim = np.matmul(chord_templates_norm.T, X_norm) if norm_sim is not None: chord_sim = libfmp.c3.normalize_feature_sequence(chord_sim, norm=norm_sim) # chord_max = (chord_sim == chord_sim.max(axis=0)).astype(int) chord_max_index = np.argmax(chord_sim, axis=0) chord_max = np.zeros(chord_sim.shape).astype(np.int32) for n in range(chord_sim.shape[1]): chord_max[chord_max_index[n], n] = 1 return chord_sim, chord_max
[docs]def convert_chord_label(ann): """Replace for segment-based annotation in each chord label the string ':min' by 'm' and convert flat chords into sharp chords using enharmonic equivalence Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: ann (list): Segment-based annotation with chord labels Returns: ann_conv (list): Converted segment-based annotation with chord labels """ ann_conv = copy.deepcopy(ann) for k in range(len(ann)): ann_conv[k][2] = ann_conv[k][2].replace(':min', 'm') ann_conv[k][2] = ann_conv[k][2].replace('Db', 'C#') ann_conv[k][2] = ann_conv[k][2].replace('Eb', 'D#') ann_conv[k][2] = ann_conv[k][2].replace('Gb', 'F#') ann_conv[k][2] = ann_conv[k][2].replace('Ab', 'G#') ann_conv[k][2] = ann_conv[k][2].replace('Bb', 'A#') return ann_conv
[docs]def convert_sequence_ann(seq, Fs=1): """Convert label sequence into segment-based annotation Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: seq (list): Label sequence Fs (scalar): Feature rate (Default value = 1) Returns: ann (list): Segment-based annotation for label sequence """ ann = [] for m in range(len(seq)): ann.append([(m-0.5) / Fs, (m+0.5) / Fs, seq[m]]) return ann
[docs]def convert_chord_ann_matrix(fn_ann, chord_labels, Fs=1, N=None, last=False): """Convert segment-based chord annotation into various formats Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: fn_ann (str): Filename of segment-based chord annotation chord_labels (list): List of chord labels Fs (scalar): Feature rate (Default value = 1) N (int): Number of frames to be generated (by cutting or extending). Only enforced for ann_matrix, ann_frame, ann_seg_frame (Default value = None) last (bool): If 'True' uses for extension last chord label, otherwise uses nonchord label 'N' (Default value = False) Returns: ann_matrix (np.ndarray): Encoding of label sequence in form of a binary time-chord representation ann_frame (list): Label sequence (specified on the frame level) ann_seg_frame (list): Encoding of label sequence as segment-based annotation (given in indices) ann_seg_ind (list): Segment-based annotation with segments (given in indices) ann_seg_sec (list): Segment-based annotation with segments (given in seconds) """ ann_seg_sec, _ = libfmp.c4.read_structure_annotation(fn_ann) ann_seg_sec = convert_chord_label(ann_seg_sec) ann_seg_ind, _ = libfmp.c4.read_structure_annotation(fn_ann, Fs=Fs, index=True) ann_seg_ind = convert_chord_label(ann_seg_ind) ann_frame = libfmp.c4.convert_ann_to_seq_label(ann_seg_ind) if N is None: N = len(ann_frame) if N < len(ann_frame): ann_frame = ann_frame[:N] if N > len(ann_frame): if last: pad_symbol = ann_frame[-1] else: pad_symbol = 'N' ann_frame = ann_frame + [pad_symbol] * (N-len(ann_frame)) ann_seg_frame = convert_sequence_ann(ann_frame, Fs=1) num_chords = len(chord_labels) ann_matrix = np.zeros((num_chords, N)) for n in range(N): label = ann_frame[n] # Generates a one-entry only for labels that are contained in "chord_labels" if label in chord_labels: label_index = chord_labels.index(label) ann_matrix[label_index, n] = 1 return ann_matrix, ann_frame, ann_seg_frame, ann_seg_ind, ann_seg_sec
[docs]def compute_eval_measures(I_ref, I_est): """Compute evaluation measures including precision, recall, and F-measure Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: I_ref (np.ndarray): Reference set of items I_est (np.ndarray): Set of estimated items Returns: P (float): Precision R (float): Recall F (float): F-measure num_TP (int): Number of true positives num_FN (int): Number of false negatives num_FP (int): Number of false positives """ assert I_ref.shape == I_est.shape, "Dimension of input matrices must agree" TP = np.sum(np.logical_and(I_ref, I_est)) FP = np.sum(I_est > 0, axis=None) - TP FN = np.sum(I_ref > 0, axis=None) - TP P = 0 R = 0 F = 0 if TP > 0: P = TP / (TP + FP) R = TP / (TP + FN) F = 2 * P * R / (P + R) return P, R, F, TP, FP, FN
[docs]def plot_matrix_chord_eval(I_ref, I_est, Fs=1, xlabel='Time (seconds)', ylabel='Chord', title='', chord_labels=None, ax=None, grid=True, figsize=(9, 3.5)): """Plots TP-, FP-, and FN-items in a color-coded form in time–chord grid Notebook: C5/C5S2_ChordRec_Eval.ipynb Args: I_ref: Reference set of items I_est: Set of estimated items Fs: Feature rate (Default value = 1) xlabel: Label for x-axis (Default value = 'Time (seconds)') ylabel: Label for y-axis (Default value = 'Chord') title: Title of figure (Default value = '') chord_labels: List of chord labels used for vertical axis (Default value = None) ax: Array of axes (Default value = None) grid: If "True" the plot grid (Default value = True) figsize: Size of figure (if axes are not specified) (Default value = (9, 3.5)) Returns: fig: The created matplotlib figure or None if ax was given. ax: The used axes im: The image plot """ fig = None if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) ax = [ax] I_TP = np.sum(np.logical_and(I_ref, I_est)) I_FP = I_est - I_TP I_FN = I_ref - I_TP I_vis = 3 * I_TP + 2 * I_FN + 1 * I_FP eval_cmap = colors.ListedColormap([[1, 1, 1], [1, 0.3, 0.3], [1, 0.7, 0.7], [0, 0, 0]]) eval_bounds = np.array([0, 1, 2, 3, 4])-0.5 eval_norm = colors.BoundaryNorm(eval_bounds, 4) eval_ticks = [0, 1, 2, 3] T_coef = np.arange(I_vis.shape[1]) / Fs F_coef = np.arange(I_vis.shape[0]) x_ext1 = (T_coef[1] - T_coef[0]) / 2 x_ext2 = (T_coef[-1] - T_coef[-2]) / 2 y_ext1 = (F_coef[1] - F_coef[0]) / 2 y_ext2 = (F_coef[-1] - F_coef[-2]) / 2 extent = [T_coef[0] - x_ext1, T_coef[-1] + x_ext2, F_coef[0] - y_ext1, F_coef[-1] + y_ext2] im = ax[0].imshow(I_vis, origin='lower', aspect='auto', cmap=eval_cmap, norm=eval_norm, extent=extent, interpolation='nearest') if len(ax) == 2: cbar = plt.colorbar(im, cax=ax[1], cmap=eval_cmap, norm=eval_norm, boundaries=eval_bounds, ticks=eval_ticks) elif len(ax) == 1:[0]) cbar = plt.colorbar(im, cmap=eval_cmap, norm=eval_norm, boundaries=eval_bounds, ticks=eval_ticks)['TN', 'FP', 'FN', 'TP']) ax[0].set_xlabel(xlabel) ax[0].set_ylabel(ylabel) ax[0].set_title(title) if chord_labels is not None: ax[0].set_yticks(np.arange(len(chord_labels))) ax[0].set_yticklabels(chord_labels) if grid is True: ax[0].grid() return fig, ax, im