"""
Module: libfmp.c5.c5s2_chord_rec_template
Author: Meinard Müller, Christof Weiss
License: The MIT license, https://opensource.org/licenses/MIT
This file is part of the FMP Notebooks (https://www.audiolabs-erlangen.de/FMP)
"""
import copy
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import colors
import librosa
import libfmp.c3
import libfmp.c4
[docs]def compute_chromagram_from_filename(fn_wav, Fs=22050, N=4096, H=2048, gamma=None, version='STFT', norm='2'):
"""Compute chromagram for WAV file specified by filename
Notebook: C5/C5S2_ChordRec_Templates.ipynb
Args:
fn_wav (str): Filenname of WAV
Fs (scalar): Sampling rate (Default value = 22050)
N (int): Window size (Default value = 4096)
H (int): Hop size (Default value = 2048)
gamma (float): Constant for logarithmic compression (Default value = None)
version (str): Technique used for front-end decomposition ('STFT', 'IIS', 'CQT') (Default value = 'STFT')
norm (str): If not 'None', chroma vectors are normalized by norm as specified ('1', '2', 'max')
(Default value = '2')
Returns:
X (np.ndarray): Chromagram
Fs_X (scalar): Feature reate of chromagram
x (np.ndarray): Audio signal
Fs (scalar): Sampling rate of audio signal
x_dur (float): Duration (seconds) of audio signal
"""
x, Fs = librosa.load(fn_wav, sr=Fs)
x_dur = x.shape[0] / Fs
if version == 'STFT':
# Compute chroma features with STFT
X = librosa.stft(x, n_fft=N, hop_length=H, pad_mode='constant', center=True)
if gamma is not None:
X = np.log(1 + gamma * np.abs(X) ** 2)
else:
X = np.abs(X) ** 2
X = librosa.feature.chroma_stft(S=X, sr=Fs, tuning=0, norm=None, hop_length=H, n_fft=N)
if version == 'CQT':
# Compute chroma features with CQT decomposition
X = librosa.feature.chroma_cqt(y=x, sr=Fs, hop_length=H, norm=None)
if version == 'IIR':
# Compute chroma features with filter bank (using IIR elliptic filter)
X = librosa.iirt(y=x, sr=Fs, win_length=N, hop_length=H, center=True, tuning=0.0)
if gamma is not None:
X = np.log(1.0 + gamma * X)
X = librosa.feature.chroma_cqt(C=X, bins_per_octave=12, n_octaves=7,
fmin=librosa.midi_to_hz(24), norm=None)
if norm is not None:
X = libfmp.c3.normalize_feature_sequence(X, norm=norm)
Fs_X = Fs / H
return X, Fs_X, x, Fs, x_dur
[docs]def plot_chromagram_annotation(ax, X, Fs_X, ann, color_ann, x_dur, cmap='gray_r', title=''):
"""Plot chromagram and annotation
Notebook: C5/C5S2_ChordRec_Templates.ipynb
Args:
ax: Axes handle
X: Feature representation
Fs_X: Feature rate
ann: Annotations
color_ann: Color for annotations
x_dur: Duration of feature representation
cmap: Color map for imshow (Default value = 'gray_r')
title: Title for figure (Default value = '')
"""
libfmp.b.plot_chromagram(X, Fs=Fs_X, ax=ax,
chroma_yticks=[0, 4, 7, 11], clim=[0, 1], cmap=cmap,
title=title, ylabel='Chroma', colorbar=True)
libfmp.b.plot_segments_overlay(ann, ax=ax[0], time_max=x_dur,
print_labels=False, colors=color_ann, alpha=0.1)
[docs]def get_chord_labels(ext_minor='m', nonchord=False):
"""Generate chord labels for major and minor triads (and possibly nonchord label)
Notebook: C5/C5S2_ChordRec_Templates.ipynb
Args:
ext_minor (str): Extension for minor chords (Default value = 'm')
nonchord (bool): If "True" then add nonchord label (Default value = False)
Returns:
chord_labels (list): List of chord labels
"""
chroma_labels = ['C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
chord_labels_maj = chroma_labels
chord_labels_min = [s + ext_minor for s in chroma_labels]
chord_labels = chord_labels_maj + chord_labels_min
if nonchord is True:
chord_labels = chord_labels + ['N']
return chord_labels
[docs]def generate_chord_templates(nonchord=False):
"""Generate chord templates of major and minor triads (and possibly nonchord)
Notebook: C5/C5S2_ChordRec_Templates.ipynb
Args:
nonchord (bool): If "True" then add nonchord template (Default value = False)
Returns:
chord_templates (np.ndarray): Matrix containing chord_templates as columns
"""
template_cmaj = np.array([1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]).T
template_cmin = np.array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0]).T
num_chord = 24
if nonchord:
num_chord = 25
chord_templates = np.ones((12, num_chord))
for shift in range(12):
chord_templates[:, shift] = np.roll(template_cmaj, shift)
chord_templates[:, shift+12] = np.roll(template_cmin, shift)
return chord_templates
[docs]def chord_recognition_template(X, norm_sim='1', nonchord=False):
"""Conducts template-based chord recognition
with major and minor triads (and possibly nonchord)
Notebook: C5/C5S2_ChordRec_Templates.ipynb
Args:
X (np.ndarray): Chromagram
norm_sim (str): Specifies norm used for normalizing chord similarity matrix (Default value = '1')
nonchord (bool): If "True" then add nonchord template (Default value = False)
Returns:
chord_sim (np.ndarray): Chord similarity matrix
chord_max (np.ndarray): Binarized chord similarity matrix only containing maximizing chord
"""
chord_templates = generate_chord_templates(nonchord=nonchord)
X_norm = libfmp.c3.normalize_feature_sequence(X, norm='2')
chord_templates_norm = libfmp.c3.normalize_feature_sequence(chord_templates, norm='2')
chord_sim = np.matmul(chord_templates_norm.T, X_norm)
if norm_sim is not None:
chord_sim = libfmp.c3.normalize_feature_sequence(chord_sim, norm=norm_sim)
# chord_max = (chord_sim == chord_sim.max(axis=0)).astype(int)
chord_max_index = np.argmax(chord_sim, axis=0)
chord_max = np.zeros(chord_sim.shape).astype(np.int32)
for n in range(chord_sim.shape[1]):
chord_max[chord_max_index[n], n] = 1
return chord_sim, chord_max
[docs]def convert_chord_label(ann):
"""Replace for segment-based annotation in each chord label the string ':min' by 'm'
and convert flat chords into sharp chords using enharmonic equivalence
Notebook: C5/C5S2_ChordRec_Eval.ipynb
Args:
ann (list): Segment-based annotation with chord labels
Returns:
ann_conv (list): Converted segment-based annotation with chord labels
"""
ann_conv = copy.deepcopy(ann)
for k in range(len(ann)):
ann_conv[k][2] = ann_conv[k][2].replace(':min', 'm')
ann_conv[k][2] = ann_conv[k][2].replace('Db', 'C#')
ann_conv[k][2] = ann_conv[k][2].replace('Eb', 'D#')
ann_conv[k][2] = ann_conv[k][2].replace('Gb', 'F#')
ann_conv[k][2] = ann_conv[k][2].replace('Ab', 'G#')
ann_conv[k][2] = ann_conv[k][2].replace('Bb', 'A#')
return ann_conv
[docs]def convert_sequence_ann(seq, Fs=1):
"""Convert label sequence into segment-based annotation
Notebook: C5/C5S2_ChordRec_Eval.ipynb
Args:
seq (list): Label sequence
Fs (scalar): Feature rate (Default value = 1)
Returns:
ann (list): Segment-based annotation for label sequence
"""
ann = []
for m in range(len(seq)):
ann.append([(m-0.5) / Fs, (m+0.5) / Fs, seq[m]])
return ann
[docs]def convert_chord_ann_matrix(fn_ann, chord_labels, Fs=1, N=None, last=False):
"""Convert segment-based chord annotation into various formats
Notebook: C5/C5S2_ChordRec_Eval.ipynb
Args:
fn_ann (str): Filename of segment-based chord annotation
chord_labels (list): List of chord labels
Fs (scalar): Feature rate (Default value = 1)
N (int): Number of frames to be generated (by cutting or extending).
Only enforced for ann_matrix, ann_frame, ann_seg_frame (Default value = None)
last (bool): If 'True' uses for extension last chord label, otherwise uses nonchord label 'N'
(Default value = False)
Returns:
ann_matrix (np.ndarray): Encoding of label sequence in form of a binary time-chord representation
ann_frame (list): Label sequence (specified on the frame level)
ann_seg_frame (list): Encoding of label sequence as segment-based annotation (given in indices)
ann_seg_ind (list): Segment-based annotation with segments (given in indices)
ann_seg_sec (list): Segment-based annotation with segments (given in seconds)
"""
ann_seg_sec, _ = libfmp.c4.read_structure_annotation(fn_ann)
ann_seg_sec = convert_chord_label(ann_seg_sec)
ann_seg_ind, _ = libfmp.c4.read_structure_annotation(fn_ann, Fs=Fs, index=True)
ann_seg_ind = convert_chord_label(ann_seg_ind)
ann_frame = libfmp.c4.convert_ann_to_seq_label(ann_seg_ind)
if N is None:
N = len(ann_frame)
if N < len(ann_frame):
ann_frame = ann_frame[:N]
if N > len(ann_frame):
if last:
pad_symbol = ann_frame[-1]
else:
pad_symbol = 'N'
ann_frame = ann_frame + [pad_symbol] * (N-len(ann_frame))
ann_seg_frame = convert_sequence_ann(ann_frame, Fs=1)
num_chords = len(chord_labels)
ann_matrix = np.zeros((num_chords, N))
for n in range(N):
label = ann_frame[n]
# Generates a one-entry only for labels that are contained in "chord_labels"
if label in chord_labels:
label_index = chord_labels.index(label)
ann_matrix[label_index, n] = 1
return ann_matrix, ann_frame, ann_seg_frame, ann_seg_ind, ann_seg_sec
[docs]def compute_eval_measures(I_ref, I_est):
"""Compute evaluation measures including precision, recall, and F-measure
Notebook: C5/C5S2_ChordRec_Eval.ipynb
Args:
I_ref (np.ndarray): Reference set of items
I_est (np.ndarray): Set of estimated items
Returns:
P (float): Precision
R (float): Recall
F (float): F-measure
num_TP (int): Number of true positives
num_FN (int): Number of false negatives
num_FP (int): Number of false positives
"""
assert I_ref.shape == I_est.shape, "Dimension of input matrices must agree"
TP = np.sum(np.logical_and(I_ref, I_est))
FP = np.sum(I_est > 0, axis=None) - TP
FN = np.sum(I_ref > 0, axis=None) - TP
P = 0
R = 0
F = 0
if TP > 0:
P = TP / (TP + FP)
R = TP / (TP + FN)
F = 2 * P * R / (P + R)
return P, R, F, TP, FP, FN
[docs]def plot_matrix_chord_eval(I_ref, I_est, Fs=1, xlabel='Time (seconds)', ylabel='Chord',
title='', chord_labels=None, ax=None, grid=True, figsize=(9, 3.5)):
"""Plots TP-, FP-, and FN-items in a color-coded form in time–chord grid
Notebook: C5/C5S2_ChordRec_Eval.ipynb
Args:
I_ref: Reference set of items
I_est: Set of estimated items
Fs: Feature rate (Default value = 1)
xlabel: Label for x-axis (Default value = 'Time (seconds)')
ylabel: Label for y-axis (Default value = 'Chord')
title: Title of figure (Default value = '')
chord_labels: List of chord labels used for vertical axis (Default value = None)
ax: Array of axes (Default value = None)
grid: If "True" the plot grid (Default value = True)
figsize: Size of figure (if axes are not specified) (Default value = (9, 3.5))
Returns:
fig: The created matplotlib figure or None if ax was given.
ax: The used axes
im: The image plot
"""
fig = None
if ax is None:
fig, ax = plt.subplots(1, 1, figsize=figsize)
ax = [ax]
I_TP = np.sum(np.logical_and(I_ref, I_est))
I_FP = I_est - I_TP
I_FN = I_ref - I_TP
I_vis = 3 * I_TP + 2 * I_FN + 1 * I_FP
eval_cmap = colors.ListedColormap([[1, 1, 1], [1, 0.3, 0.3], [1, 0.7, 0.7], [0, 0, 0]])
eval_bounds = np.array([0, 1, 2, 3, 4])-0.5
eval_norm = colors.BoundaryNorm(eval_bounds, 4)
eval_ticks = [0, 1, 2, 3]
T_coef = np.arange(I_vis.shape[1]) / Fs
F_coef = np.arange(I_vis.shape[0])
x_ext1 = (T_coef[1] - T_coef[0]) / 2
x_ext2 = (T_coef[-1] - T_coef[-2]) / 2
y_ext1 = (F_coef[1] - F_coef[0]) / 2
y_ext2 = (F_coef[-1] - F_coef[-2]) / 2
extent = [T_coef[0] - x_ext1, T_coef[-1] + x_ext2, F_coef[0] - y_ext1, F_coef[-1] + y_ext2]
im = ax[0].imshow(I_vis, origin='lower', aspect='auto', cmap=eval_cmap, norm=eval_norm, extent=extent,
interpolation='nearest')
if len(ax) == 2:
cbar = plt.colorbar(im, cax=ax[1], cmap=eval_cmap, norm=eval_norm, boundaries=eval_bounds, ticks=eval_ticks)
elif len(ax) == 1:
plt.sca(ax[0])
cbar = plt.colorbar(im, cmap=eval_cmap, norm=eval_norm, boundaries=eval_bounds, ticks=eval_ticks)
cbar.ax.set_yticklabels(['TN', 'FP', 'FN', 'TP'])
ax[0].set_xlabel(xlabel)
ax[0].set_ylabel(ylabel)
ax[0].set_title(title)
if chord_labels is not None:
ax[0].set_yticks(np.arange(len(chord_labels)))
ax[0].set_yticklabels(chord_labels)
if grid is True:
ax[0].grid()
return fig, ax, im