Source code for libfmp.c7.c7s1_audio_id

"""
Module: libfmp.c7.c7s1_audio_id
Author: Meinard Mueller, Patricio Lopez-Serrano
License: The MIT license, https://opensource.org/licenses/MIT

This file is part of the FMP Notebooks (https://www.audiolabs-erlangen.de/FMP)
"""
import numpy as np
from scipy import ndimage
from matplotlib import pyplot as plt
from numba import jit


[docs]@jit(nopython=True) def compute_constellation_map_naive(Y, dist_freq=7, dist_time=7, thresh=0.01): """Compute constellation map (naive implementation) Notebook: C7/C7S1_AudioIdentification.ipynb Args: Y (np.ndarray): Spectrogram (magnitude) dist_freq (int): Neighborhood parameter for frequency direction (kappa) (Default value = 7) dist_time (int): Neighborhood parameter for time direction (tau) (Default value = 7) thresh (float): Threshold parameter for minimal peak magnitude (Default value = 0.01) Returns: Cmap (np.ndarray): Boolean mask for peak structure (same size as Y) """ # spectrogram dimensions if Y.ndim > 1: (K, N) = Y.shape else: K = Y.shape[0] N = 1 Cmap = np.zeros((K, N), dtype=np.bool8) # loop over spectrogram for k in range(K): f1 = max(k - dist_freq, 0) f2 = min(k + dist_freq + 1, K) for n in range(N): t1 = max(n - dist_time, 0) t2 = min(n + dist_time + 1, N) curr_mag = Y[k, n] curr_rect = Y[f1:f2, t1:t2] c_max = np.max(curr_rect) if ((curr_mag == c_max) and (curr_mag > thresh)): Cmap[k, n] = True return Cmap
[docs]def plot_constellation_map(Cmap, Y=None, xlim=None, ylim=None, title='', xlabel='Time (sample)', ylabel='Frequency (bins)', s=5, color='r', marker='o', figsize=(7, 3), dpi=72): """Plot constellation map Notebook: C7/C7S1_AudioIdentification.ipynb Args: Cmap: Constellation map given as boolean mask for peak structure Y: Spectrogram representation (Default value = None) xlim: Limits for x-axis (Default value = None) ylim: Limits for y-axis (Default value = None) title: Title for plot (Default value = '') xlabel: Label for x-axis (Default value = 'Time (sample)') ylabel: Label for y-axis (Default value = 'Frequency (bins)') s: Size of dots in scatter plot (Default value = 5) color: Color used for scatter plot (Default value = 'r') marker: Marker for peaks (Default value = 'o') figsize: Width, height in inches (Default value = (7, 3)) dpi: Dots per inch (Default value = 72) Returns: fig: The created matplotlib figure ax: The used axes. im: The image plot """ if Cmap.ndim > 1: (K, N) = Cmap.shape else: K = Cmap.shape[0] N = 1 if Y is None: Y = np.zeros((K, N)) fig, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi) im = ax.imshow(Y, origin='lower', aspect='auto', cmap='gray_r', interpolation='nearest') ax.set_xlabel(xlabel) ax.set_ylabel(ylabel) ax.set_title(title) Fs = 1 if xlim is None: xlim = [-0.5/Fs, (N-0.5)/Fs] if ylim is None: ylim = [-0.5/Fs, (K-0.5)/Fs] ax.set_xlim(xlim) ax.set_ylim(ylim) n, k = np.argwhere(Cmap == 1).T ax.scatter(k, n, color=color, s=s, marker=marker) plt.tight_layout() return fig, ax, im
[docs]def compute_constellation_map(Y, dist_freq=7, dist_time=7, thresh=0.01): """Compute constellation map (implementation using image processing) Notebook: C7/C7S1_AudioIdentification.ipynb Args: Y (np.ndarray): Spectrogram (magnitude) dist_freq (int): Neighborhood parameter for frequency direction (kappa) (Default value = 7) dist_time (int): Neighborhood parameter for time direction (tau) (Default value = 7) thresh (float): Threshold parameter for minimal peak magnitude (Default value = 0.01) Returns: Cmap (np.ndarray): Boolean mask for peak structure (same size as Y) """ result = ndimage.maximum_filter(Y, size=[2*dist_freq+1, 2*dist_time+1], mode='constant') Cmap = np.logical_and(Y == result, result > thresh) return Cmap
[docs]def match_binary_matrices_tol(C_ref, C_est, tol_freq=0, tol_time=0): """| Compare binary matrices with tolerance | Note: The tolerance parameters should be smaller than the minimum distance of peaks (1-entries in C_ref ad C_est) to obtain meaningful TP, FN, FP values Notebook: C7/C7S1_AudioIdentification.ipynb Args: C_ref (np.ndarray): Binary matrix used as reference C_est (np.ndarray): Binary matrix used as estimation tol_freq (int): Tolerance in frequency direction (vertical) (Default value = 0) tol_time (int): Tolerance in time direction (horizontal) (Default value = 0) Returns: TP (int): True positives FN (int): False negatives FP (int): False positives C_AND (np.ndarray): Boolean mask of AND of C_ref and C_est (with tolerance) """ assert C_ref.shape == C_est.shape, "Dimensions need to agree" N = np.sum(C_ref) M = np.sum(C_est) # Expand C_est with 2D-max-filter using the tolerance parameters C_est_max = ndimage.maximum_filter(C_est, size=(2*tol_freq+1, 2*tol_time+1), mode='constant') C_AND = np.logical_and(C_est_max, C_ref) TP = np.sum(C_AND) FN = N - TP FP = M - TP return TP, FN, FP, C_AND
[docs]def compute_matching_function(C_D, C_Q, tol_freq=1, tol_time=1): """Computes matching function for constellation maps Notebook: C7/C7S1_AudioIdentification.ipynb Args: C_D (np.ndarray): Binary matrix used as dababase document C_Q (np.ndarray): Binary matrix used as query document tol_freq (int): Tolerance in frequency direction (vertical) (Default value = 1) tol_time (int): Tolerance in time direction (horizontal) (Default value = 1) Returns: Delta (np.ndarray): Matching function shift_max (int): Optimal shift position maximizing Delta """ L = C_D.shape[1] N = C_Q.shape[1] M = L - N assert M >= 0, "Query must be shorter than document" Delta = np.zeros(L) for m in range(M + 1): C_D_crop = C_D[:, m:m+N] TP, FN, FP, C_AND = match_binary_matrices_tol(C_D_crop, C_Q, tol_freq=tol_freq, tol_time=tol_time) Delta[m] = TP shift_max = np.argmax(Delta) return Delta, shift_max