"""
Module: libfmp.c7.c7s1_audio_id
Author: Meinard Mueller, Patricio Lopez-Serrano
License: The MIT license, https://opensource.org/licenses/MIT
This file is part of the FMP Notebooks (https://www.audiolabs-erlangen.de/FMP)
"""
import numpy as np
from scipy import ndimage
from matplotlib import pyplot as plt
from numba import jit
[docs]@jit(nopython=True)
def compute_constellation_map_naive(Y, dist_freq=7, dist_time=7, thresh=0.01):
"""Compute constellation map (naive implementation)
Notebook: C7/C7S1_AudioIdentification.ipynb
Args:
Y (np.ndarray): Spectrogram (magnitude)
dist_freq (int): Neighborhood parameter for frequency direction (kappa) (Default value = 7)
dist_time (int): Neighborhood parameter for time direction (tau) (Default value = 7)
thresh (float): Threshold parameter for minimal peak magnitude (Default value = 0.01)
Returns:
Cmap (np.ndarray): Boolean mask for peak structure (same size as Y)
"""
# spectrogram dimensions
if Y.ndim > 1:
(K, N) = Y.shape
else:
K = Y.shape[0]
N = 1
Cmap = np.zeros((K, N), dtype=np.bool8)
# loop over spectrogram
for k in range(K):
f1 = max(k - dist_freq, 0)
f2 = min(k + dist_freq + 1, K)
for n in range(N):
t1 = max(n - dist_time, 0)
t2 = min(n + dist_time + 1, N)
curr_mag = Y[k, n]
curr_rect = Y[f1:f2, t1:t2]
c_max = np.max(curr_rect)
if ((curr_mag == c_max) and (curr_mag > thresh)):
Cmap[k, n] = True
return Cmap
[docs]def plot_constellation_map(Cmap, Y=None, xlim=None, ylim=None, title='',
xlabel='Time (sample)', ylabel='Frequency (bins)',
s=5, color='r', marker='o', figsize=(7, 3), dpi=72):
"""Plot constellation map
Notebook: C7/C7S1_AudioIdentification.ipynb
Args:
Cmap: Constellation map given as boolean mask for peak structure
Y: Spectrogram representation (Default value = None)
xlim: Limits for x-axis (Default value = None)
ylim: Limits for y-axis (Default value = None)
title: Title for plot (Default value = '')
xlabel: Label for x-axis (Default value = 'Time (sample)')
ylabel: Label for y-axis (Default value = 'Frequency (bins)')
s: Size of dots in scatter plot (Default value = 5)
color: Color used for scatter plot (Default value = 'r')
marker: Marker for peaks (Default value = 'o')
figsize: Width, height in inches (Default value = (7, 3))
dpi: Dots per inch (Default value = 72)
Returns:
fig: The created matplotlib figure
ax: The used axes.
im: The image plot
"""
if Cmap.ndim > 1:
(K, N) = Cmap.shape
else:
K = Cmap.shape[0]
N = 1
if Y is None:
Y = np.zeros((K, N))
fig, ax = plt.subplots(1, 1, figsize=figsize, dpi=dpi)
im = ax.imshow(Y, origin='lower', aspect='auto', cmap='gray_r', interpolation='nearest')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_title(title)
Fs = 1
if xlim is None:
xlim = [-0.5/Fs, (N-0.5)/Fs]
if ylim is None:
ylim = [-0.5/Fs, (K-0.5)/Fs]
ax.set_xlim(xlim)
ax.set_ylim(ylim)
n, k = np.argwhere(Cmap == 1).T
ax.scatter(k, n, color=color, s=s, marker=marker)
plt.tight_layout()
return fig, ax, im
[docs]def compute_constellation_map(Y, dist_freq=7, dist_time=7, thresh=0.01):
"""Compute constellation map (implementation using image processing)
Notebook: C7/C7S1_AudioIdentification.ipynb
Args:
Y (np.ndarray): Spectrogram (magnitude)
dist_freq (int): Neighborhood parameter for frequency direction (kappa) (Default value = 7)
dist_time (int): Neighborhood parameter for time direction (tau) (Default value = 7)
thresh (float): Threshold parameter for minimal peak magnitude (Default value = 0.01)
Returns:
Cmap (np.ndarray): Boolean mask for peak structure (same size as Y)
"""
result = ndimage.maximum_filter(Y, size=[2*dist_freq+1, 2*dist_time+1], mode='constant')
Cmap = np.logical_and(Y == result, result > thresh)
return Cmap
[docs]def match_binary_matrices_tol(C_ref, C_est, tol_freq=0, tol_time=0):
"""| Compare binary matrices with tolerance
| Note: The tolerance parameters should be smaller than the minimum distance of
peaks (1-entries in C_ref ad C_est) to obtain meaningful TP, FN, FP values
Notebook: C7/C7S1_AudioIdentification.ipynb
Args:
C_ref (np.ndarray): Binary matrix used as reference
C_est (np.ndarray): Binary matrix used as estimation
tol_freq (int): Tolerance in frequency direction (vertical) (Default value = 0)
tol_time (int): Tolerance in time direction (horizontal) (Default value = 0)
Returns:
TP (int): True positives
FN (int): False negatives
FP (int): False positives
C_AND (np.ndarray): Boolean mask of AND of C_ref and C_est (with tolerance)
"""
assert C_ref.shape == C_est.shape, "Dimensions need to agree"
N = np.sum(C_ref)
M = np.sum(C_est)
# Expand C_est with 2D-max-filter using the tolerance parameters
C_est_max = ndimage.maximum_filter(C_est, size=(2*tol_freq+1, 2*tol_time+1),
mode='constant')
C_AND = np.logical_and(C_est_max, C_ref)
TP = np.sum(C_AND)
FN = N - TP
FP = M - TP
return TP, FN, FP, C_AND
[docs]def compute_matching_function(C_D, C_Q, tol_freq=1, tol_time=1):
"""Computes matching function for constellation maps
Notebook: C7/C7S1_AudioIdentification.ipynb
Args:
C_D (np.ndarray): Binary matrix used as dababase document
C_Q (np.ndarray): Binary matrix used as query document
tol_freq (int): Tolerance in frequency direction (vertical) (Default value = 1)
tol_time (int): Tolerance in time direction (horizontal) (Default value = 1)
Returns:
Delta (np.ndarray): Matching function
shift_max (int): Optimal shift position maximizing Delta
"""
L = C_D.shape[1]
N = C_Q.shape[1]
M = L - N
assert M >= 0, "Query must be shorter than document"
Delta = np.zeros(L)
for m in range(M + 1):
C_D_crop = C_D[:, m:m+N]
TP, FN, FP, C_AND = match_binary_matrices_tol(C_D_crop, C_Q,
tol_freq=tol_freq, tol_time=tol_time)
Delta[m] = TP
shift_max = np.argmax(Delta)
return Delta, shift_max