Source code for dcase_util.processors.features
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import print_function, absolute_import
from six import iteritems
import copy
from dcase_util.containers import FeatureContainer, FeatureRepository
from dcase_util.features import MelExtractor, MfccStaticExtractor, MfccDeltaExtractor, \
MfccAccelerationExtractor, ZeroCrossingRateExtractor, RMSEnergyExtractor, SpectralCentroidExtractor, \
OpenL3Extractor, TorchOpenL3Extractor, EdgeL3Extractor
from dcase_util.processors import Processor, ProcessingChainItemType, ProcessingChain
from dcase_util.utils import get_class_inheritors
[docs]class FeatureReadingProcessor(Processor):
input_type = ProcessingChainItemType.NONE #: Input data type
output_type = ProcessingChainItemType.DATA_CONTAINER #: Output data type
[docs] def __init__(self, *args, **kwargs):
"""Constructor"""
# Run super init to call init of mixins too
super(FeatureReadingProcessor, self).__init__(*args, **kwargs)
[docs] def process(self,
data=None, filename=None,
focus_start=None, focus_stop=None, focus_duration=None,
focus_start_seconds=None, focus_stop_seconds=None, focus_duration_seconds=None,
store_processing_chain=False,
**kwargs):
"""Data reading.
Parameters
----------
data : FeatureContainer
Input feature data.
Default value None
filename : str
Filename of the feature container to load.
Default value None
focus_start : int, optional
Segment start, frame index of focus segment start.
Default value None
focus_stop : int, optional
Segment end, Frame index of focus segment stop.
Default value None
focus_duration : int, optional
Segment duration, Frame count of focus segment.
Default value None
focus_start_seconds : float > 0.0
Segment start, seconds.
Default value None
focus_stop_seconds : float > 0.0
Segment end, seconds.
Default value None
focus_duration_seconds : float
Segment duration, seconds.
Default value None
store_processing_chain : bool
Store processing chain to data container returned.
Default value False
Returns
-------
self
"""
if data is None and self.input_type == ProcessingChainItemType.NONE:
container = FeatureContainer()
if filename:
# Load features from disk
container.load(
filename=filename
)
if focus_start is not None and focus_duration is not None:
# Set focus segment and channel
container.set_focus(
start=focus_start,
duration=focus_duration
)
elif focus_start is not None and focus_stop is not None:
# Set focus segment and channel
container.set_focus(
start=focus_start,
stop=focus_stop
)
elif focus_start_seconds is not None and focus_duration_seconds is not None:
# Set focus segment and channel
container.set_focus(
start_seconds=focus_start_seconds,
duration_seconds=focus_duration_seconds
)
elif focus_start_seconds is not None and focus_stop_seconds is not None:
# Set focus segment and channel
container.set_focus(
start_seconds=focus_start_seconds,
stop_seconds=focus_stop_seconds
)
if store_processing_chain and not container.processing_chain:
# Insert Reader processor only if processing chain is empty
processing_chain_item = self.get_processing_chain_item()
if 'process_parameters' not in processing_chain_item:
processing_chain_item['process_parameters'] = {}
processing_chain_item['process_parameters']['filename'] = filename
processing_chain_item['process_parameters']['focus_start'] = focus_start
processing_chain_item['process_parameters']['focus_duration'] = focus_duration
processing_chain_item['process_parameters']['focus_start'] = focus_start
processing_chain_item['process_parameters']['focus_stop'] = focus_stop
processing_chain_item['process_parameters']['focus_start_seconds'] = focus_start_seconds
processing_chain_item['process_parameters']['focus_duration_seconds'] = focus_duration_seconds
processing_chain_item['process_parameters']['focus_start_seconds'] = focus_start_seconds
processing_chain_item['process_parameters']['focus_stop_seconds'] = focus_stop_seconds
container.push_processing_chain_item(**processing_chain_item)
return container
else:
message = '{name}: Wrong input data type, type required [{input_type}].'.format(
name=self.__class__.__name__,
input_type=self.input_type)
self.logger.exception(message)
raise ValueError(message)
[docs]class FeatureWritingProcessor(Processor):
input_type = ProcessingChainItemType.DATA_CONTAINER #: Input data type
output_type = ProcessingChainItemType.NONE #: Output data type
[docs] def __init__(self, *args, **kwargs):
"""Constructor"""
# Run super init to call init of mixins too
super(FeatureWritingProcessor, self).__init__(*args, **kwargs)
[docs] def process(self,
data=None, output_filename=None, store_processing_chain=False,
**kwargs):
"""Data writing.
Parameters
----------
data : FeatureContainer
Input feature data.
Default value None
output_filename : str
Filename of the feature container to save.
Default value None
store_processing_chain : bool
Store processing chain to data container
Default value False
Returns
-------
self
"""
if data:
container = FeatureContainer(data=data)
if store_processing_chain:
container.processing_chain = data.processing_chain
if output_filename:
# Load features from disk
container.save(
filename=output_filename
)
return container
else:
message = '{name}: No input data.'.format(
name=self.__class__.__name__
)
self.logger.exception(message)
raise ValueError(message)
[docs]class RepositoryFeatureReadingProcessor(Processor):
input_type = ProcessingChainItemType.NONE #: Input data type
output_type = ProcessingChainItemType.DATA_REPOSITORY #: Output data type
[docs] def __init__(self, *args, **kwargs):
"""Constructor"""
# Run super init to call init of mixins too
super(RepositoryFeatureReadingProcessor, self).__init__(*args, **kwargs)
[docs] def process(self,
data=None, filename=None,
store_processing_chain=False,
**kwargs):
"""Data repository reading.
Parameters
----------
data : FeatureContainer
Input feature data.
Default value None
filename : str
Filename of the feature container to load.
Default value None
store_processing_chain : bool
Store processing chain to data container returned.
Default value False
Returns
-------
self
"""
if data is None and self.input_type == ProcessingChainItemType.NONE:
container = FeatureRepository()
if filename:
container.load(
filename=filename
)
if store_processing_chain:
processing_chain_item = self.get_processing_chain_item()
if 'process_parameters' not in processing_chain_item:
processing_chain_item['process_parameters'] = {}
processing_chain_item['process_parameters']['filename'] = filename
container.push_processing_chain_item(**processing_chain_item)
return container
else:
message = '{name}: Wrong input data type, type required [{input_type}].'.format(
name=self.__class__.__name__,
input_type=self.input_type)
self.logger.exception(message)
raise ValueError(message)
[docs]class RepositoryFeatureWritingProcessor(Processor):
input_type = ProcessingChainItemType.DATA_REPOSITORY #: Input data type
output_type = ProcessingChainItemType.NONE #: Output data type
[docs] def __init__(self, *args, **kwargs):
"""Constructor"""
# Run super init to call init of mixins too
super(RepositoryFeatureWritingProcessor, self).__init__(*args, **kwargs)
[docs] def process(self,
data=None, output_filename=None, store_processing_chain=False,
**kwargs):
"""Data repository writing.
Parameters
----------
data : FeatureContainer
Input feature data.
Default value None
output_filename : str
Filename of the feature container to save.
Default value None
store_processing_chain : bool
Store processing chain to data container
Default value False
Returns
-------
self
"""
if data:
repository = FeatureRepository(data=data)
if store_processing_chain:
repository.processing_chain = data.processing_chain
if output_filename:
# Load features from disk
repository.save(
filename=output_filename
)
return repository
else:
message = '{name}: No input data.'.format(
name=self.__class__.__name__
)
self.logger.exception(message)
raise ValueError(message)
[docs]class FeatureExtractorProcessor(Processor):
input_type = ProcessingChainItemType.AUDIO #: Input data type
output_type = ProcessingChainItemType.DATA_CONTAINER #: Output data type
[docs] def __init__(self, *args, **kwargs):
"""Constructor"""
# Run super init to call init of mixins too
super(FeatureExtractorProcessor, self).__init__(*args, **kwargs)
[docs] def process(self, data=None, store_processing_chain=False, **kwargs):
"""Extract features
Parameters
----------
data : AudioContainer
Audio data to extract features
store_processing_chain : bool
Store processing chain to data container returned
Default value False
Returns
-------
FeatureContainer
"""
from dcase_util.containers import FeatureContainer, AudioContainer
if isinstance(data, AudioContainer):
if store_processing_chain:
if hasattr(data, 'processing_chain') and data.processing_chain.chain_item_exists(
processor_name='dcase_util.processors.' + self.__class__.__name__):
# Current processor is already in the processing chain, get that
processing_chain_item = data.processing_chain.chain_item(
processor_name='dcase_util.processors.' + self.__class__.__name__
)
else:
# Create a new processing chain item
processing_chain_item = self.get_processing_chain_item()
processing_chain_item.update({
'process_parameters': kwargs
})
if hasattr(data, 'processing_chain'):
data.processing_chain.push_processor(**processing_chain_item)
processing_chain = data.processing_chain
else:
processing_chain = ProcessingChain().push_processor(**processing_chain_item)
else:
processing_chain = None
return FeatureContainer(
data=self.extract(y=data.get_focused()),
time_resolution=self.hop_length_seconds,
processing_chain=processing_chain
)
else:
message = '{name}: Wrong input data type, type required [{input_type}].'.format(
name=self.__class__.__name__,
input_type=self.input_type)
self.logger.exception(message)
raise ValueError(message)
[docs]class RepositoryFeatureExtractorProcessor(Processor):
input_type = ProcessingChainItemType.AUDIO #: Input data type
output_type = ProcessingChainItemType.DATA_REPOSITORY #: Output data type
[docs] def __init__(self, parameters=None, **kwargs):
"""Constructor
Parameters
----------
parameters : dict
Extraction parameters, extractor label as key and parameters as value.
"""
if parameters is None:
parameters = {}
kwargs.update(
{
'parameters': parameters
}
)
# Run super init to call init of mixins too
super(RepositoryFeatureExtractorProcessor, self).__init__(**kwargs)
self.parameters = kwargs.get('parameters', {})
self.label_to_class = {}
for processor in get_class_inheritors(FeatureExtractorProcessor):
self.label_to_class[processor.label] = processor
def __getstate__(self):
return {
'parameters': self.parameters
}
def __setstate__(self, d):
self.parameters = d['parameters']
self.label_to_class = {}
for processor in get_class_inheritors(FeatureExtractorProcessor):
self.label_to_class[processor.label] = processor
[docs] def process(self, data=None, store_processing_chain=False, **kwargs):
"""Extract features
Parameters
----------
data : AudioContainer
Audio data to extract features
store_processing_chain : bool
Store processing chain to data container returned
Default value False
Returns
-------
FeatureRepository
"""
from dcase_util.containers import FeatureRepository, AudioContainer
if isinstance(data, AudioContainer):
if store_processing_chain:
if hasattr(data, 'processing_chain') and data.processing_chain.chain_item_exists(
processor_name='dcase_util.processors.' + self.__class__.__name__):
# Current processor is already in the processing chain, get that
processing_chain_item = data.processing_chain.chain_item(
processor_name='dcase_util.processors.' + self.__class__.__name__
)
else:
# Create a new processing chain item
processing_chain_item = self.get_processing_chain_item()
# Update current processing parameters into chain item
processing_chain_item.update({
'process_parameters': kwargs
})
# Create processing chain to be stored in the container, and push chain item into it
if hasattr(data, 'processing_chain'):
data.processing_chain.push_processor(**processing_chain_item)
processing_chain = data.processing_chain
else:
processing_chain = ProcessingChain().push_processor(**processing_chain_item)
else:
processing_chain = None
# Create repository container
repository = FeatureRepository(
processing_chain=processing_chain
)
# Make local copy of data
current_data = copy.deepcopy(data)
if data.streams == 1:
# We have single channel audio input
for label, parameters in iteritems(self.parameters):
if label in self.label_to_class:
# Get processor
processor = self.label_to_class[label](**parameters)
# Reset processing chain
current_data.processing_chain = ProcessingChain()
# Extract features
extracted = processor.process(data=current_data)
repository.set_container(
container=extracted,
label=label
)
else:
message = '{name}: Unknown label [{label}], no corresponding class found.'.format(
name=self.__class__.__name__,
label=label)
self.logger.exception(message)
raise AssertionError(message)
elif data.streams > 1:
# We have multi-channel audio input
for stream_id in range(0, data.streams):
for label, parameters in iteritems(self.parameters):
if label in self.label_to_class:
# Get processor
processor = self.label_to_class[label](**parameters)
# Reset processing chain
current_data.processing_chain = ProcessingChain()
# Set focus to the current stream
current_data.focus_channel = stream_id
# Extract features
extracted = processor.process(data=current_data)
# Add extracted features to the repository
repository.set_container(
container=extracted,
label=label,
stream_id=stream_id
)
else:
message = '{name}: Unknown label [{label}], no corresponding class found.'.format(
name=self.__class__.__name__,
label=label)
self.logger.exception(message)
raise AssertionError(message)
return repository
[docs]class MelExtractorProcessor(FeatureExtractorProcessor, MelExtractor):
[docs] def __init__(self,
fs=44100,
win_length_samples=None, hop_length_samples=None, win_length_seconds=0.04, hop_length_seconds=0.02,
spectrogram_type='magnitude', n_fft=2048, window_type='hamming_asymmetric',
n_mels=40, fmin=0, fmax=None, normalize_mel_bands=False, htk=False, logarithmic=True,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
win_length_samples : int
Window length in samples.
hop_length_samples : int
Hop length in samples.
win_length_seconds : float
Window length in seconds.
hop_length_seconds : float
Hop length in seconds.
spectrogram_type : str
Spectrogram type, magnitude or power spectrogram.
n_fft : int
Length of the FFT window.
window_type : str
Window function type.
n_mels : int
Number of mel bands to generate
fmin : int
Lowest frequency in mel bands (in Hz)
fmax : int
Highest frequency in mel bands (in Hz), if None, fmax = fs/2.0
normalize_mel_bands : bool
Normalize mel band to have peak at 1.0
htk : bool
Use HTK formula for mel band creation instead of Slaney
logarithmic : bool
Switch for log mel-band energies
"""
kwargs.update({
'fs': fs,
'win_length_samples': win_length_samples,
'hop_length_samples': hop_length_samples,
'win_length_seconds': win_length_seconds,
'hop_length_seconds': hop_length_seconds,
'spectrogram_type': spectrogram_type,
'n_fft': n_fft,
'window_type': window_type,
'n_mels': n_mels,
'fmin': fmin,
'fmax': fmax,
'normalize_mel_bands': normalize_mel_bands,
'htk': htk,
'logarithmic': logarithmic
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run MelExtractor init
MelExtractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(MelExtractorProcessor, self).__init__(**kwargs)
[docs]class MfccStaticExtractorProcessor(FeatureExtractorProcessor, MfccStaticExtractor):
[docs] def __init__(self,
fs=44100,
win_length_samples=None, hop_length_samples=None, win_length_seconds=0.04, hop_length_seconds=0.02,
spectrogram_type='magnitude', n_fft=2048, window_type='hamming_asymmetric',
n_mfcc=20, n_mels=40, fmin=0, fmax=None, normalize_mel_bands=False, htk=False,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
win_length_samples : int
Window length in samples.
hop_length_samples : int
Hop length in samples.
win_length_seconds : float
Window length in seconds.
hop_length_seconds : float
Hop length in seconds.
spectrogram_type : str
Spectrogram type, magnitude or power spectrogram.
n_fft : int
Length of the FFT window.
window_type : str
Window function type.
n_mels : int
Number of mel bands to generate
fmin : int
Lowest frequency in mel bands (in Hz)
fmax : int
Highest frequency in mel bands (in Hz), if None, fmax = fs/2.0
normalize_mel_bands : bool
Normalize mel band to have peak at 1.0
htk : bool
Use HTK formula for mel band creation instead of Slaney
n_mfcc : int
Number of MFCC coefficients
"""
kwargs.update({
'fs': fs,
'win_length_samples': win_length_samples,
'hop_length_samples': hop_length_samples,
'win_length_seconds': win_length_seconds,
'hop_length_seconds': hop_length_seconds,
'spectrogram_type': spectrogram_type,
'n_fft': n_fft,
'window_type': window_type,
'n_mfcc': n_mfcc,
'n_mels': n_mels,
'fmin': fmin,
'fmax': fmax,
'normalize_mel_bands': normalize_mel_bands,
'htk': htk
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run MfccStaticExtractor init
MfccStaticExtractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(MfccStaticExtractorProcessor, self).__init__(**kwargs)
[docs]class MfccDeltaExtractorProcessor(FeatureExtractorProcessor, MfccDeltaExtractor):
[docs] def __init__(self,
fs=44100,
win_length_samples=None, hop_length_samples=None, win_length_seconds=0.04, hop_length_seconds=0.02,
spectrogram_type='magnitude', n_fft=2048, window_type='hamming_asymmetric',
n_mfcc=20, n_mels=40, fmin=0, fmax=None, normalize_mel_bands=False, htk=False,
width=9,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
win_length_samples : int
Window length in samples.
hop_length_samples : int
Hop length in samples.
win_length_seconds : float
Window length in seconds.
hop_length_seconds : float
Hop length in seconds.
spectrogram_type : str
Spectrogram type, magnitude or power spectrogram.
n_fft : int
Length of the FFT window.
window_type : str
Window function type.
n_mels : int
Number of mel bands to generate.
fmin : int
Lowest frequency in mel bands (in Hz).
fmax : int
Highest frequency in mel bands (in Hz), if None, fmax = fs/2.0.
normalize_mel_bands : bool
Normalize mel band to have peak at 1.0.
htk : bool
Use HTK formula for mel band creation instead of Slaney.
n_mfcc : int
Number of MFCC coefficients.
width : int
Width of the delta window.
"""
kwargs.update({
'fs': fs,
'win_length_samples': win_length_samples,
'hop_length_samples': hop_length_samples,
'win_length_seconds': win_length_seconds,
'hop_length_seconds': hop_length_seconds,
'spectrogram_type': spectrogram_type,
'n_fft': n_fft,
'window_type': window_type,
'n_mfcc': n_mfcc,
'n_mels': n_mels,
'fmin': fmin,
'fmax': fmax,
'normalize_mel_bands': normalize_mel_bands,
'htk': htk,
'width': width
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run MfccDeltaExtractor init
MfccDeltaExtractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(MfccDeltaExtractorProcessor, self).__init__(**kwargs)
[docs]class MfccAccelerationExtractorProcessor(FeatureExtractorProcessor, MfccAccelerationExtractor):
[docs] def __init__(self,
fs=44100,
win_length_samples=None, hop_length_samples=None, win_length_seconds=0.04, hop_length_seconds=0.02,
spectrogram_type='magnitude', n_fft=2048, window_type='hamming_asymmetric',
n_mfcc=20, n_mels=40, fmin=0, fmax=None, normalize_mel_bands=False, htk=False,
width=9,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
win_length_samples : int
Window length in samples.
hop_length_samples : int
Hop length in samples.
win_length_seconds : float
Window length in seconds.
hop_length_seconds : float
Hop length in seconds.
spectrogram_type : str
Spectrogram type, magnitude or power spectrogram.
n_fft : int
Length of the FFT window.
window_type : str
Window function type.
n_mels : int
Number of mel bands to generate.
fmin : int
Lowest frequency in mel bands (in Hz).
fmax : int
Highest frequency in mel bands (in Hz), if None, fmax = fs/2.0.
normalize_mel_bands : bool
Normalize mel band to have peak at 1.0.
htk : bool
Use HTK formula for mel band creation instead of Slaney.
n_mfcc : int
Number of MFCC coefficients.
width : int
Width of the delta window.
"""
kwargs.update({
'fs': fs,
'win_length_samples': win_length_samples,
'hop_length_samples': hop_length_samples,
'win_length_seconds': win_length_seconds,
'hop_length_seconds': hop_length_seconds,
'spectrogram_type': spectrogram_type,
'n_fft': n_fft,
'window_type': window_type,
'n_mfcc': n_mfcc,
'n_mels': n_mels,
'fmin': fmin,
'fmax': fmax,
'normalize_mel_bands': normalize_mel_bands,
'htk': htk,
'width': width
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run MfccAccelerationExtractor init
MfccAccelerationExtractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(MfccAccelerationExtractorProcessor, self).__init__(**kwargs)
[docs]class ZeroCrossingRateExtractorProcessor(FeatureExtractorProcessor, ZeroCrossingRateExtractor):
[docs] def __init__(self,
fs=44100,
win_length_samples=None, hop_length_samples=None, win_length_seconds=0.04, hop_length_seconds=0.02,
center=True,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
win_length_samples : int
Window length in samples.
hop_length_samples : int
Hop length in samples.
win_length_seconds : float
Window length in seconds.
hop_length_seconds : float
Hop length in seconds.
center : bool
If True, frames are centered by padding the edges of signal.
"""
kwargs.update({
'fs': fs,
'win_length_samples': win_length_samples,
'hop_length_samples': hop_length_samples,
'win_length_seconds': win_length_seconds,
'hop_length_seconds': hop_length_seconds,
'center': center
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run ZeroCrossingRateExtractor init
ZeroCrossingRateExtractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(ZeroCrossingRateExtractorProcessor, self).__init__(**kwargs)
[docs]class RMSEnergyExtractorProcessor(FeatureExtractorProcessor, RMSEnergyExtractor):
[docs] def __init__(self,
fs=44100,
win_length_samples=None, hop_length_samples=None, win_length_seconds=0.04, hop_length_seconds=0.02,
spectrogram_type='magnitude', n_fft=2048, window_type='hamming_asymmetric',
center=True,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
win_length_samples : int
Window length in samples.
hop_length_samples : int
Hop length in samples.
win_length_seconds : float
Window length in seconds.
hop_length_seconds : float
Hop length in seconds.
center : bool
If True, frames are centered by padding the edges of signal.
"""
kwargs.update({
'fs': fs,
'win_length_samples': win_length_samples,
'hop_length_samples': hop_length_samples,
'win_length_seconds': win_length_seconds,
'hop_length_seconds': hop_length_seconds,
'spectrogram_type': spectrogram_type,
'n_fft': n_fft,
'window_type': window_type,
'center': center
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run RMSEnergyExtractor init
RMSEnergyExtractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(RMSEnergyExtractorProcessor, self).__init__(**kwargs)
[docs]class SpectralCentroidExtractorProcessor(FeatureExtractorProcessor, SpectralCentroidExtractor):
[docs] def __init__(self,
fs=44100,
win_length_samples=None, hop_length_samples=None, win_length_seconds=0.04, hop_length_seconds=0.02,
spectrogram_type='magnitude', n_fft=2048, window_type='hamming_asymmetric',
center=True,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
win_length_samples : int
Window length in samples.
hop_length_samples : int
Hop length in samples.
win_length_seconds : float
Window length in seconds.
hop_length_seconds : float
Hop length in seconds.
center : bool
If true, input signal is padded so to the frame is centered at hop length
"""
kwargs.update({
'fs': fs,
'win_length_samples': win_length_samples,
'hop_length_samples': hop_length_samples,
'win_length_seconds': win_length_seconds,
'hop_length_seconds': hop_length_seconds,
'spectrogram_type': spectrogram_type,
'n_fft': n_fft,
'window_type': window_type,
'center': center
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run SpectralCentroidExtractor init
SpectralCentroidExtractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(SpectralCentroidExtractorProcessor, self).__init__(**kwargs)
[docs]class OpenL3ExtractorProcessor(FeatureExtractorProcessor, OpenL3Extractor):
[docs] def __init__(self,
fs=44100,
hop_length_samples=None, hop_length_seconds=0.02,
model=None, input_repr='mel256', content_type="music",
embedding_size=6144,
center=True, batch_size=32, verbose=False,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
hop_length_samples : int
Hop length in samples.
Default value None
hop_length_seconds : float
Hop length in seconds.
Default value 0.02
model : keras.models.Model or None
Loaded model object. If a model is provided, then `input_repr`, `content_type`, and `embedding_size` will be ignored. If None is provided, the model will be loaded using the provided values of `input_repr`, `content_type` and `embedding_size`.
Default value None
input_repr : "linear", "mel128", or "mel256"
Spectrogram representation used for model. Ignored if `model` is
a valid Keras model.
Default value "mel256"
content_type : "music" or "env"
Type of content used to train the embedding model. Ignored if `model` is
a valid Keras model.
Default value "music"
embedding_size : 6144 or 512
Embedding dimensionality. Ignored if `model` is a valid Keras model.
Default value 6144
center : bool
If True, pads beginning of signal so timestamps correspond to center of window.
Default value True
batch_size : int
Batch size used for input to embedding model
Default value 32
verbose : bool
If True, prints verbose messages.
Default value False
"""
kwargs.update({
'fs': fs,
'hop_length_samples': hop_length_samples,
'hop_length_seconds': hop_length_seconds,
'model': model,
'input_repr': input_repr,
'content_type': content_type,
'embedding_size': embedding_size,
'center': center,
'batch_size': batch_size,
'verbose': verbose,
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run SpectralCentroidExtractor init
OpenL3Extractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(OpenL3ExtractorProcessor, self).__init__(**kwargs)
[docs]class TorchOpenL3ExtractorProcessor(FeatureExtractorProcessor, TorchOpenL3Extractor):
[docs] def __init__(self,
fs=44100,
hop_length_samples=None, hop_length_seconds=0.02,
model=None, input_repr='mel256', content_type="music",
embedding_size=6144,
center=True, batch_size=32, verbose=False,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
hop_length_samples : int
Hop length in samples.
Default value None
hop_length_seconds : float
Hop length in seconds.
Default value 0.02
model : keras.models.Model or None
Loaded model object. If a model is provided, then `input_repr`, `content_type`, and `embedding_size` will be ignored. If None is provided, the model will be loaded using the provided values of `input_repr`, `content_type` and `embedding_size`.
Default value None
input_repr : "linear", "mel128", or "mel256"
Spectrogram representation used for model. Ignored if `model` is
a valid Keras model.
Default value "mel256"
content_type : "music" or "env"
Type of content used to train the embedding model. Ignored if `model` is
a valid Keras model.
Default value "music"
embedding_size : 6144 or 512
Embedding dimensionality. Ignored if `model` is a valid Keras model.
Default value 6144
center : bool
If True, pads beginning of signal so timestamps correspond to center of window.
Default value True
batch_size : int
Batch size used for input to embedding model
Default value 32
verbose : bool
If True, prints verbose messages.
Default value False
"""
kwargs.update({
'fs': fs,
'hop_length_samples': hop_length_samples,
'hop_length_seconds': hop_length_seconds,
'model': model,
'input_repr': input_repr,
'content_type': content_type,
'embedding_size': embedding_size,
'center': center,
'batch_size': batch_size,
'verbose': verbose,
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run SpectralCentroidExtractor init
TorchOpenL3Extractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(TorchOpenL3ExtractorProcessor, self).__init__(**kwargs)
[docs]class EdgeL3ExtractorProcessor(FeatureExtractorProcessor, EdgeL3Extractor):
[docs] def __init__(self,
fs=44100,
hop_length_samples=None, hop_length_seconds=0.02,
model=None, retrain_type='ft', sparsity=95.45,
center=True, verbose=False,
**kwargs):
"""Constructor
Parameters
----------
fs : int
Sampling rate of the incoming signal.
hop_length_samples : int
Hop length in samples.
Default value None
hop_length_seconds : float
Hop length in seconds.
Default value 0.02
model : keras.models.Model or None
Loaded model object. If a model is provided, then `sparsity` will be ignored. If None is provided, the model will be loaded using the provided `sparsity` value.
Default value None
retrain_type : {'ft', 'kd'}
Type of retraining for the sparsified weights of L3 audio model. 'ft' chooses the fine-tuning method
and 'kd' returns knowledge distilled model.
Default value "ft"
sparsity : {95.45, 53.5, 63.5, 72.3, 73.5, 81.0, 87.0, 90.5}
The desired sparsity of audio model.
Default value 95.45
center : bool
If True, pads beginning of signal so timestamps correspond to center of window.
Default value True
verbose : bool
If True, prints verbose messages.
Default value False
"""
kwargs.update({
'fs': fs,
'hop_length_samples': hop_length_samples,
'hop_length_seconds': hop_length_seconds,
'model': model,
'retrain_type': retrain_type,
'sparsity': sparsity,
'center': center,
'verbose': verbose,
})
# Run FeatureExtractorProcessor init
FeatureExtractorProcessor.__init__(self, **kwargs)
# Run SpectralCentroidExtractor init
EdgeL3Extractor.__init__(self, **kwargs)
# Run super init to call init of mixins too
super(EdgeL3ExtractorProcessor, self).__init__(**kwargs)