diff --git a/src/audio/mfcc/mfcc.c b/src/audio/mfcc/mfcc.c index 9874edea4be5..656e3d9b7bf7 100644 --- a/src/audio/mfcc/mfcc.c +++ b/src/audio/mfcc/mfcc.c @@ -38,13 +38,13 @@ SOF_DEFINE_REG_UUID(mfcc); __cold_rodata const struct mfcc_func_map mfcc_fm[] = { #if CONFIG_FORMAT_S16LE - {SOF_IPC_FRAME_S16_LE, mfcc_s16_default}, + {SOF_IPC_FRAME_S16_LE, mfcc_s16_default}, #endif /* CONFIG_FORMAT_S16LE */ #if CONFIG_FORMAT_S24LE - {SOF_IPC_FRAME_S24_4LE, NULL}, + {SOF_IPC_FRAME_S24_4LE, mfcc_s24_default}, #endif /* CONFIG_FORMAT_S24LE */ #if CONFIG_FORMAT_S32LE - {SOF_IPC_FRAME_S32_LE, NULL}, + {SOF_IPC_FRAME_S32_LE, mfcc_s32_default}, #endif /* CONFIG_FORMAT_S32LE */ }; diff --git a/src/audio/mfcc/mfcc_common.c b/src/audio/mfcc/mfcc_common.c index 688c7afac9b2..bba1253f9740 100644 --- a/src/audio/mfcc/mfcc_common.c +++ b/src/audio/mfcc/mfcc_common.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause // -// Copyright(c) 2023 Intel Corporation. All rights reserved. +// Copyright(c) 2023-2026 Intel Corporation. // // Author: Andrula Song @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -36,8 +37,10 @@ LOG_MODULE_REGISTER(mfcc_common, CONFIG_SOF_LOG_LEVEL); * The main processing function for MFCC */ -static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *state) +static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_comp_data *cd) { + struct sof_mfcc_config *config = cd->config; + struct mfcc_state *state = &cd->state; struct mfcc_buffer *buf = &state->buf; struct mfcc_fft *fft = &state->fft; int mel_scale_shift; @@ -45,6 +48,10 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *stat int i; int m; int cc_count = 0; + int32_t s; + int16_t mel_value; + int16_t peak; + int16_t clamp_value; /* Phase 1, wait until whole fft_size is filled with valid data. This way * first output cepstral coefficients originate from streamed data and not @@ -119,16 +126,59 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *stat state->mel_spectra->data, mel_scale_shift); #endif - /* Multiply Mel spectra with DCT matrix to get cepstral coefficients */ - mat_init_16b(state->cepstral_coef, 1, state->dct.num_out, 7); /* Q8.7 */ - mat_multiply(state->mel_spectra, state->dct.matrix, state->cepstral_coef); + if (state->mel_only) { + /* In Mel-only mode output Mel log spectra directly */ + cc_count += state->dct.num_in; + + /* Find peak mel value and track state->mmax */ + if (config->dynamic_mmax) { + peak = state->mel_spectra->data[0]; + for (i = 1; i < state->dct.num_in; i++) { + if (state->mel_spectra->data[i] > peak) + peak = state->mel_spectra->data[i]; + } + + /* Jump to peak immediately if higher, decay otherwise */ + if (peak > state->mmax) { + state->mmax = peak; + } else { + /* Q8.7 * Q1.15, result Q8.7. The coefficient is small so + * no need for saturation. + */ + s = (int32_t)peak - state->mmax; + state->mmax += + Q_MULTSR_32X32(s, config->mmax_coef, 7, 15, 7); + } + } + + /* Clamp Mel values lower than mmax - top_db, add offset, and scale */ + clamp_value = state->mmax - config->top_db; + for (i = 0; i < state->dct.num_in; i++) { + mel_value = state->mel_spectra->data[i]; + if (mel_value < clamp_value) + mel_value = clamp_value; + + /* Q8.7 * Q4.12, result 8.7 */ + s = (int32_t)mel_value + config->mel_offset; + state->mel_spectra->data[i] = + sat_int16(Q_MULTSR_32X32(s, config->mel_scale, 7, 12, 7)); + } + + /* Enable this to check mmax decay */ + comp_dbg(dev, "state->mmax = %d", state->mmax); + } else { + /* Multiply Mel spectra with DCT matrix to get cepstral coefficients */ + mat_init_16b(state->cepstral_coef, 1, state->dct.num_out, 7); /* Q8.7 */ + mat_multiply(state->mel_spectra, state->dct.matrix, state->cepstral_coef); - /* Apply cepstral lifter */ - if (state->lifter.cepstral_lifter != 0) - mat_multiply_elementwise(state->cepstral_coef, state->lifter.matrix, - state->cepstral_coef); + /* Apply cepstral lifter */ + if (state->lifter.cepstral_lifter != 0) { + mat_multiply_elementwise(state->cepstral_coef, state->lifter.matrix, + state->cepstral_coef); + } - cc_count += state->dct.num_out; + cc_count += state->dct.num_out; + } /* Output to sink buffer */ } @@ -140,6 +190,44 @@ static int mfcc_stft_process(const struct comp_dev *dev, struct mfcc_state *stat } #if CONFIG_FORMAT_S16LE +static int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, int16_t *w_ptr, + int samples) +{ + int copied; + int nmax; + int n; + + for (copied = 0; copied < samples; copied += n) { + nmax = samples - copied; + n = audio_stream_samples_without_wrap_s16(sink, w_ptr); + n = MIN(n, nmax); + memset(w_ptr, 0, n * sizeof(int16_t)); + w_ptr = audio_stream_wrap(sink, w_ptr + n); + } + + return w_ptr; +} + +static int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, + int samples, int16_t *r_ptr) +{ + int copied; + int nmax; + int n; + + for (copied = 0; copied < samples; copied += n) { + nmax = samples - copied; + n = audio_stream_samples_without_wrap_s16(sink, w_ptr); + n = MIN(n, nmax); + /* Not using memcpy_s() due to speed need */ + memcpy(w_ptr, r_ptr, n * sizeof(int16_t)); + w_ptr = audio_stream_wrap(sink, w_ptr + n); + r_ptr += n; + } + + return w_ptr; +} + void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer *bsource, struct output_stream_buffer *bsink, int frames) { @@ -149,35 +237,212 @@ void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer struct mfcc_buffer *buf = &cd->state.buf; uint32_t magic = MFCC_MAGIC; int16_t *w_ptr = audio_stream_get_wptr(sink); - // int num_magic = sizeof(magic) / sizeof(int16_t); const int num_magic = 2; int num_ceps; - int zero_samples; + int sink_samples; + int to_copy; /* Get samples from source buffer */ mfcc_source_copy_s16(bsource, buf, &state->emph, frames, state->source_channel); - /* Run STFT and processing after FFT: Mel auditory filter and DCT. The sink - * buffer is updated during STDF processing. - */ - num_ceps = mfcc_stft_process(mod->dev, state); + /* Run STFT and processing after FFT: Mel auditory filter and DCT. */ + num_ceps = mfcc_stft_process(mod->dev, cd); - /* Done, copy data to sink. This works only if the period has room for magic (2) - * plus num_ceps int16_t samples. TODO: split ceps over multiple periods. - */ - zero_samples = frames * audio_stream_get_channels(sink); + /* If new output produced, set up pointer into scratch data and mark magic pending */ if (num_ceps > 0) { - zero_samples -= num_ceps + num_magic; + if (state->mel_only) + state->out_data_ptr = state->mel_spectra->data; + else + state->out_data_ptr = state->cepstral_coef->data; + + state->out_remain = num_ceps; + state->magic_pending = true; + } + + /* Write to sink, limited by period size */ + sink_samples = frames * audio_stream_get_channels(sink); + + /* Write magic word first if pending */ + if (state->magic_pending && sink_samples >= num_magic) { w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_magic, (int16_t *)&magic); - w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, num_ceps, state->cepstral_coef->data); + sink_samples -= num_magic; + state->magic_pending = false; } - w_ptr = mfcc_sink_copy_zero_s16(sink, w_ptr, zero_samples); + /* Write cepstral/mel data from scratch buffer */ + to_copy = MIN(state->out_remain, sink_samples); + if (to_copy > 0) { + w_ptr = mfcc_sink_copy_data_s16(sink, w_ptr, to_copy, state->out_data_ptr); + state->out_data_ptr += to_copy; + state->out_remain -= to_copy; + sink_samples -= to_copy; + } + + /* Zero-fill remaining sink samples */ + w_ptr = mfcc_sink_copy_zero_s16(sink, w_ptr, sink_samples); } #endif /* CONFIG_FORMAT_S16LE */ +#if CONFIG_FORMAT_S24LE || CONFIG_FORMAT_S32LE +static int32_t *mfcc_sink_copy_zero_s32(const struct audio_stream *sink, int32_t *w_ptr, + int samples) +{ + int copied; + int nmax; + int n; + + for (copied = 0; copied < samples; copied += n) { + nmax = samples - copied; + n = audio_stream_samples_without_wrap_s32(sink, w_ptr); + n = MIN(n, nmax); + memset(w_ptr, 0, n * sizeof(int32_t)); + w_ptr = audio_stream_wrap(sink, w_ptr + n); + } + + return w_ptr; +} + +static int32_t *mfcc_sink_copy_data_s32(const struct audio_stream *sink, int32_t *w_ptr, + int samples, int32_t *r_ptr) +{ + int copied; + int nmax; + int n; + + for (copied = 0; copied < samples; copied += n) { + nmax = samples - copied; + n = audio_stream_samples_without_wrap_s32(sink, w_ptr); + n = MIN(n, nmax); + /* Not using memcpy_s() due to speed need */ + memcpy(w_ptr, r_ptr, n * sizeof(int32_t)); + w_ptr = audio_stream_wrap(sink, w_ptr + n); + r_ptr += n; + } + + return w_ptr; +} +#endif /* CONFIG_FORMAT_S24LE || CONFIG_FORMAT_S32LE */ + #if CONFIG_FORMAT_S24LE +void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, int frames) +{ + struct audio_stream *sink = bsink->data; + struct mfcc_comp_data *cd = module_get_private_data(mod); + struct mfcc_state *state = &cd->state; + struct mfcc_buffer *buf = &cd->state.buf; + uint32_t magic = MFCC_MAGIC; + int32_t *w_ptr = audio_stream_get_wptr(sink); + const int num_magic = 1; /* one int32_t word for magic */ + int num_ceps; + int sink_samples; + int remain_s32; + int to_copy; + + /* Get samples from source buffer */ + mfcc_source_copy_s24(bsource, buf, &state->emph, frames, state->source_channel); + + /* Run STFT and processing after FFT */ + num_ceps = mfcc_stft_process(mod->dev, cd); + + /* If new output produced, set up pointer into scratch data */ + if (num_ceps > 0) { + if (state->mel_only) + state->out_data_ptr = state->mel_spectra->data; + else + state->out_data_ptr = state->cepstral_coef->data; + + state->out_remain = num_ceps; + state->magic_pending = true; + } + + /* Write to sink, limited by period size */ + sink_samples = frames * audio_stream_get_channels(sink); + + /* Write magic word first if pending */ + if (state->magic_pending && sink_samples >= num_magic) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic); + sink_samples -= num_magic; + state->magic_pending = false; + } + + /* Write cepstral/mel data packed as int32_t from scratch buffer */ + remain_s32 = (state->out_remain + 1) / 2; + to_copy = MIN(remain_s32, sink_samples); + if (to_copy > 0) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy, + (int32_t *)state->out_data_ptr); + state->out_data_ptr += to_copy * 2; + state->out_remain -= to_copy * 2; + if (state->out_remain < 0) + state->out_remain = 0; + + sink_samples -= to_copy; + } + + /* Zero-fill remaining sink samples */ + w_ptr = mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples); +} #endif /* CONFIG_FORMAT_S24LE */ #if CONFIG_FORMAT_S32LE +void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, int frames) +{ + struct audio_stream *sink = bsink->data; + struct mfcc_comp_data *cd = module_get_private_data(mod); + struct mfcc_state *state = &cd->state; + struct mfcc_buffer *buf = &cd->state.buf; + uint32_t magic = MFCC_MAGIC; + int32_t *w_ptr = audio_stream_get_wptr(sink); + const int num_magic = 1; /* one int32_t word for magic */ + int num_ceps; + int sink_samples; + int remain_s32; + int to_copy; + + /* Get samples from source buffer */ + mfcc_source_copy_s32(bsource, buf, &state->emph, frames, state->source_channel); + + /* Run STFT and processing after FFT */ + num_ceps = mfcc_stft_process(mod->dev, cd); + + /* If new output produced, set up pointer into scratch data */ + if (num_ceps > 0) { + if (state->mel_only) + state->out_data_ptr = state->mel_spectra->data; + else + state->out_data_ptr = state->cepstral_coef->data; + + state->out_remain = num_ceps; + state->magic_pending = true; + } + + /* Write to sink, limited by period size */ + sink_samples = frames * audio_stream_get_channels(sink); + + /* Write magic word first if pending */ + if (state->magic_pending && sink_samples >= num_magic) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, num_magic, (int32_t *)&magic); + sink_samples -= num_magic; + state->magic_pending = false; + } + + /* Write cepstral/mel data packed as int32_t from scratch buffer */ + remain_s32 = (state->out_remain + 1) / 2; + to_copy = MIN(remain_s32, sink_samples); + if (to_copy > 0) { + w_ptr = mfcc_sink_copy_data_s32(sink, w_ptr, to_copy, + (int32_t *)state->out_data_ptr); + state->out_data_ptr += to_copy * 2; + state->out_remain -= to_copy * 2; + if (state->out_remain < 0) + state->out_remain = 0; + + sink_samples -= to_copy; + } + + /* Zero-fill remaining sink samples */ + w_ptr = mfcc_sink_copy_zero_s32(sink, w_ptr, sink_samples); +} #endif /* CONFIG_FORMAT_S32LE */ diff --git a/src/audio/mfcc/mfcc_generic.c b/src/audio/mfcc/mfcc_generic.c index ecc95474326b..48d2b2e88997 100644 --- a/src/audio/mfcc/mfcc_generic.c +++ b/src/audio/mfcc/mfcc_generic.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause // -// Copyright(c) 2022 Intel Corporation. All rights reserved. +// Copyright(c) 2022-2026 Intel Corporation. // // Author: Seppo Ingalsuo @@ -26,53 +26,6 @@ * MFCC algorithm code */ -void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, - struct mfcc_pre_emph *emph, int frames, int source_channel) -{ - struct audio_stream *source = bsource->data; - int32_t s; - int16_t *x0; - int16_t *x = audio_stream_get_rptr(source); - int16_t *w = buf->w_ptr; - int copied; - int nmax; - int n1; - int n2; - int n; - int i; - int num_channels = audio_stream_get_channels(source); - - /* Copy from source to pre-buffer for FFT. - * The pre-emphasis filter is done in this step. - */ - for (copied = 0; copied < frames; copied += n) { - nmax = frames - copied; - n1 = audio_stream_frames_without_wrap(source, x); - n2 = mfcc_buffer_samples_without_wrap(buf, w); - n = MIN(n1, n2); - n = MIN(n, nmax); - x0 = x + source_channel; - for (i = 0; i < n; i++) { - if (emph->enable) { - /* Q1.15 x Q1.15 -> Q2.30 */ - s = (int32_t)emph->delay * emph->coef + Q_SHIFT_LEFT(*x0, 15, 30); - *w = sat_int16(Q_SHIFT_RND(s, 30, 15)); - emph->delay = *x0; - } else { - *w = *x0; - } - x0 += num_channels; - w++; - } - - x = audio_stream_wrap(source, x + n * audio_stream_get_channels(source)); - w = mfcc_buffer_wrap(buf, w); - } - buf->s_avail += copied; - buf->s_free -= copied; - buf->w_ptr = w; -} - void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, int prev_data_length) { @@ -189,53 +142,160 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift) } #if CONFIG_FORMAT_S16LE - -int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, - int16_t *w_ptr, int samples) +void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int32_t s; + int16_t *x0; + int16_t *x = audio_stream_get_rptr(source); + int16_t *w = buf->w_ptr; int copied; int nmax; - int i; + int n1; + int n2; int n; + int i; + int num_channels = audio_stream_get_channels(source); - for (copied = 0; copied < samples; copied += n) { - nmax = samples - copied; - n = audio_stream_samples_without_wrap_s16(sink, w_ptr); + /* Copy from source to pre-buffer for FFT. + * The pre-emphasis filter is done in this step. + */ + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n1 = audio_stream_frames_without_wrap(source, x); + n2 = mfcc_buffer_samples_without_wrap(buf, w); + n = MIN(n1, n2); n = MIN(n, nmax); + x0 = x + source_channel; for (i = 0; i < n; i++) { - *w_ptr = 0; - w_ptr++; + if (emph->enable) { + /* Q1.15 x Q1.15 -> Q2.30 */ + s = (int32_t)emph->delay * emph->coef + Q_SHIFT_LEFT(*x0, 15, 30); + *w = sat_int16(Q_SHIFT_RND(s, 30, 15)); + emph->delay = *x0; + } else { + *w = *x0; + } + x0 += num_channels; + w++; } - w_ptr = audio_stream_wrap(sink, w_ptr); + x = audio_stream_wrap(source, x + n * audio_stream_get_channels(source)); + w = mfcc_buffer_wrap(buf, w); } - - return w_ptr; + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = w; } +#endif /* CONFIG_FORMAT_S16LE */ + +#if CONFIG_FORMAT_S24LE -int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, - int samples, int16_t *r_ptr) +void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int32_t tmp, s; + int32_t *x0; + int32_t *x = audio_stream_get_rptr(source); + int16_t *w = buf->w_ptr; int copied; int nmax; - int i; + int n1; + int n2; int n; + int i; + int num_channels = audio_stream_get_channels(source); - for (copied = 0; copied < samples; copied += n) { - nmax = samples - copied; - n = audio_stream_samples_without_wrap_s16(sink, w_ptr); + /* Copy from source to pre-buffer for FFT. + * The pre-emphasis filter is done in this step. + * S24_4LE data is in 32-bit container, shift left by 8 to Q1.31, + * then convert to Q1.15 with rounding. + */ + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n1 = audio_stream_frames_without_wrap(source, x); + n2 = mfcc_buffer_samples_without_wrap(buf, w); + n = MIN(n1, n2); n = MIN(n, nmax); + x0 = x + source_channel; for (i = 0; i < n; i++) { - *w_ptr = *r_ptr; - r_ptr++; - w_ptr++; + if (emph->enable) { + /* Convert to Q1.31, ignore highest byte */ + s = (int32_t)((uint32_t)*x0 << 8); + /* Q1.15 x Q1.15 -> Q2.30 */ + tmp = (int32_t)emph->delay * emph->coef + Q_SHIFT(s, 31, 30); + *w = sat_int16(Q_SHIFT_RND(tmp, 30, 15)); + emph->delay = sat_int16(Q_SHIFT_RND(s, 31, 15)); + } else { + /* Convert to Q1.31, ignore highest byte */ + s = (int32_t)((uint32_t)*x0 << 8); + *w = sat_int16(Q_SHIFT_RND(s, 31, 15)); + } + x0 += num_channels; + w++; } - w_ptr = audio_stream_wrap(sink, w_ptr); + x = audio_stream_wrap(source, x + n * audio_stream_get_channels(source)); + w = mfcc_buffer_wrap(buf, w); } + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = w; +} + +#endif /* CONFIG_FORMAT_S24LE */ + +#if CONFIG_FORMAT_S32LE - return w_ptr; +void mfcc_source_copy_s32(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) +{ + struct audio_stream *source = bsource->data; + int32_t s; + int32_t *x0; + int32_t *x = audio_stream_get_rptr(source); + int16_t *w = buf->w_ptr; + int copied; + int nmax; + int n1; + int n2; + int n; + int i; + int num_channels = audio_stream_get_channels(source); + + /* Copy from source to pre-buffer for FFT. + * The pre-emphasis filter is done in this step. + * S32 data is in 32-bit container, shift right by 16 to get 16-bit. + */ + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n1 = audio_stream_frames_without_wrap(source, x); + n2 = mfcc_buffer_samples_without_wrap(buf, w); + n = MIN(n1, n2); + n = MIN(n, nmax); + x0 = x + source_channel; + for (i = 0; i < n; i++) { + if (emph->enable) { + /* Q1.15 x Q1.15 -> Q2.30 */ + s = (int32_t)emph->delay * emph->coef + Q_SHIFT(*x0, 31, 30); + *w = sat_int16(Q_SHIFT_RND(s, 30, 15)); + emph->delay = sat_int16(Q_SHIFT_RND(*x0, 31, 15)); + } else { + *w = sat_int16(Q_SHIFT_RND(*x0, 31, 15)); + } + x0 += num_channels; + w++; + } + + x = audio_stream_wrap(source, x + n * audio_stream_get_channels(source)); + w = mfcc_buffer_wrap(buf, w); + } + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = w; } +#endif /* CONFIG_FORMAT_S32LE */ -#endif /* CONFIG_FORMAT_S16LE */ -#endif +#endif /* MFCC_GENERIC */ diff --git a/src/audio/mfcc/mfcc_hifi3.c b/src/audio/mfcc/mfcc_hifi3.c index b3b5d99967db..153048d67bf7 100644 --- a/src/audio/mfcc/mfcc_hifi3.c +++ b/src/audio/mfcc/mfcc_hifi3.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause // -// Copyright(c) 2023 Intel Corporation. All rights reserved. +// Copyright(c) 2023-2026 Intel Corporation. // // Author: Andrula Song @@ -35,6 +35,7 @@ static inline void set_circular_buf0(const void *start, const void *end) * MFCC algorithm code */ +#if CONFIG_FORMAT_S16LE void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, struct mfcc_pre_emph *emph, int frames, int source_channel) { @@ -92,6 +93,7 @@ void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffe buf->s_free -= copied; buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S16LE */ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, int prev_data_length) @@ -192,6 +194,7 @@ int mfcc_normalize_fft_buffer(struct mfcc_state *state) return shift; } #endif + void mfcc_apply_window(struct mfcc_state *state, int input_shift) { struct mfcc_fft *fft = &state->fft; @@ -229,65 +232,124 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift) #endif } -#if CONFIG_FORMAT_S16LE - -int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, - int16_t *w_ptr, int samples) +#if CONFIG_FORMAT_S24LE +void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int copied; + int nmax; + int n; int i; - int n = samples >> 2; - int m = samples & 0x03; - ae_int16x4 *out = (ae_int16x4 *)w_ptr; - const int inc = sizeof(ae_int16); - ae_valign outu = AE_ZALIGN64(); - ae_int16x4 zero = AE_ZERO16(); - - set_circular_buf0(sink->addr, sink->end_addr); - - for (i = 0; i < n; i++) - AE_SA16X4_IC(zero, outu, out); + int num_channels = audio_stream_get_channels(source); + ae_int32 *in; + ae_int32 *x = (ae_int32 *)audio_stream_get_rptr(source); + ae_int16 *out = (ae_int16 *)buf->w_ptr; + ae_int32x2 sample32; + ae_int16x4 sample; + ae_int32x2 temp; + ae_int16x4 coef = emph->coef; + ae_int16x4 delay; + const int in_inc = sizeof(ae_int32) * num_channels; - AE_SA64POS_FP(outu, out); - /* process the left samples that less than 4 - * one by one to avoid memory access overrun - */ - for (i = 0; i < m ; i++) - AE_S16_0_XC(zero, (ae_int16 *)out, inc); + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n = audio_stream_frames_without_wrap(source, x); + n = MIN(n, nmax); + nmax = mfcc_buffer_samples_without_wrap(buf, (int16_t *)out); + n = MIN(n, nmax); + in = x + source_channel; + if (emph->enable) { + delay = emph->delay; + for (i = 0; i < n; i++) { + AE_L32_XP(sample32, in, in_inc); + /* S24_4LE: shift right by 8 to get 16-bit, then convert */ + sample32 = AE_SRAI32(sample32, 8); + sample = AE_SAT16X4(sample32, sample32); + /* Q1.15 -> Q1.31 */ + temp = AE_CVT32X2F16_10(sample); + AE_MULAF16SS_00(temp, delay, coef); + delay = sample; + sample = AE_ROUND16X4F32SSYM(temp, temp); + AE_S16_0_IP(sample, out, 2); + } + emph->delay = delay; + } else { + for (i = 0; i < n; i++) { + AE_L32_XP(sample32, in, in_inc); + sample32 = AE_SRAI32(sample32, 8); + sample = AE_SAT16X4(sample32, sample32); + AE_S16_0_IP(sample, out, 2); + } + } - return (int16_t *)out; + x = audio_stream_wrap(source, x + n * num_channels); + out = (ae_int16 *)mfcc_buffer_wrap(buf, (int16_t *)out); + } + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S24LE */ -int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, - int samples, int16_t *r_ptr) +#if CONFIG_FORMAT_S32LE +void mfcc_source_copy_s32(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int copied; + int nmax; + int n; int i; - int n = samples >> 2; - int m = samples & 0x03; - ae_int16x4 *out = (ae_int16x4 *)w_ptr; - ae_int16x4 *in = (ae_int16x4 *)r_ptr; - ae_valign outu = AE_ZALIGN64(); - ae_valign inu = AE_ZALIGN64(); - const int inc = sizeof(ae_int16); - ae_int16x4 in_sample; + int num_channels = audio_stream_get_channels(source); + ae_int32 *in; + ae_int32 *x = (ae_int32 *)audio_stream_get_rptr(source); + ae_int16 *out = (ae_int16 *)buf->w_ptr; + ae_int32x2 sample32; + ae_int16x4 sample; + ae_int32x2 temp; + ae_int16x4 coef = emph->coef; + ae_int16x4 delay; + const int in_inc = sizeof(ae_int32) * num_channels; - set_circular_buf0(sink->addr, sink->end_addr); + for (copied = 0; copied < frames; copied += n) { + nmax = frames - copied; + n = audio_stream_frames_without_wrap(source, x); + n = MIN(n, nmax); + nmax = mfcc_buffer_samples_without_wrap(buf, (int16_t *)out); + n = MIN(n, nmax); + in = x + source_channel; + if (emph->enable) { + delay = emph->delay; + for (i = 0; i < n; i++) { + AE_L32_XP(sample32, in, in_inc); + /* S32: shift right by 16 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + /* Q1.15 -> Q1.31 */ + temp = AE_CVT32X2F16_10(sample); + AE_MULAF16SS_00(temp, delay, coef); + delay = sample; + sample = AE_ROUND16X4F32SSYM(temp, temp); + AE_S16_0_IP(sample, out, 2); + } + emph->delay = delay; + } else { + for (i = 0; i < n; i++) { + AE_L32_XP(sample32, in, in_inc); + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + AE_S16_0_IP(sample, out, 2); + } + } - inu = AE_LA64_PP(in); - for (i = 0; i < n; i++) { - AE_LA16X4_IP(in_sample, inu, in); - AE_SA16X4_IC(in_sample, outu, out); - } - AE_SA64POS_FP(outu, out); - /* process the left samples that less than 4 - * one by one to avoid memory access overrun - */ - for (i = 0; i < m ; i++) { - AE_L16_XP(in_sample, (ae_int16 *)in, inc); - AE_S16_0_XC(in_sample, (ae_int16 *)out, inc); + x = audio_stream_wrap(source, x + n * num_channels); + out = (ae_int16 *)mfcc_buffer_wrap(buf, (int16_t *)out); } - - return (int16_t *)out; + buf->s_avail += copied; + buf->s_free -= copied; + buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S32LE */ -#endif /* CONFIG_FORMAT_S16LE */ -#endif +#endif /* MFCC_HIFI3 */ diff --git a/src/audio/mfcc/mfcc_hifi4.c b/src/audio/mfcc/mfcc_hifi4.c index 60a4de62ec23..c9bd59ada18b 100644 --- a/src/audio/mfcc/mfcc_hifi4.c +++ b/src/audio/mfcc/mfcc_hifi4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: BSD-3-Clause // -// Copyright(c) 2023 Intel Corporation. All rights reserved. +// Copyright(c) 2023-2026 Intel Corporation. // // Author: Andrula Song @@ -41,6 +41,8 @@ static inline void set_circular_buf1(const void *start, const void *end) /* * MFCC algorithm code */ + +#if CONFIG_FORMAT_S16LE void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, struct mfcc_pre_emph *emph, int frames, int source_channel) { @@ -87,6 +89,7 @@ void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffe buf->s_free -= frames; buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S16LE */ void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, int prev_data_length) @@ -225,65 +228,106 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift) #endif } -#if CONFIG_FORMAT_S16LE - -int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, - int16_t *w_ptr, int samples) +#if CONFIG_FORMAT_S24LE +void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int num_channels = audio_stream_get_channels(source); + ae_int32 *in = (ae_int32 *)source->r_ptr + source_channel; + ae_int16 *out = (ae_int16 *)buf->w_ptr; + ae_int32x2 sample32; + ae_int16x4 sample; + ae_int32x2 temp; + ae_int16x4 coef; + ae_int16x4 delay; + const int in_inc = sizeof(ae_int32) * num_channels; + const int out_inc = sizeof(ae_int16); int i; - int n = samples >> 2; - int m = samples & 0x03; - ae_int16x4 *out = (ae_int16x4 *)w_ptr; - const int inc = sizeof(ae_int16); - ae_valign outu = AE_ZALIGN64(); - ae_int16x4 zero = AE_ZERO16(); - - set_circular_buf0(sink->addr, sink->end_addr); - for (i = 0; i < n; i++) - AE_SA16X4_IC(zero, outu, out); + set_circular_buf1(buf->addr, buf->end_addr); + set_circular_buf0(source->addr, source->end_addr); - AE_SA64POS_FP(outu, out); - /* process the left samples that less than 4 - * one by one to avoid memory access overrun - */ - for (i = 0; i < m ; i++) - AE_S16_0_XC(zero, (ae_int16 *)out, inc); + if (emph->enable) { + delay = emph->delay; + coef = emph->coef; + for (i = 0; i < frames; i++) { + AE_L32_XC(sample32, in, in_inc); + /* S24_4LE: shift right by 8 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 8); + sample = AE_SAT16X4(sample32, sample32); + /* Q1.15 -> Q1.31 */ + temp = AE_CVT32X2F16_10(sample); + AE_MULAF16SS_00(temp, delay, coef); + delay = sample; + sample = AE_ROUND16X4F32SSYM(temp, temp); + AE_S16_0_XC1(sample, out, out_inc); + } + emph->delay = delay; + } else { + for (i = 0; i < frames; i++) { + AE_L32_XC(sample32, in, in_inc); + sample32 = AE_SRAI32(sample32, 8); + sample = AE_SAT16X4(sample32, sample32); + AE_S16_0_XC1(sample, out, out_inc); + } + } - return (int16_t *)out; + buf->s_avail += frames; + buf->s_free -= frames; + buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S24LE */ -int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, - int samples, int16_t *r_ptr) +#if CONFIG_FORMAT_S32LE +void mfcc_source_copy_s32(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel) { + struct audio_stream *source = bsource->data; + int num_channels = audio_stream_get_channels(source); + ae_int32 *in = (ae_int32 *)source->r_ptr + source_channel; + ae_int16 *out = (ae_int16 *)buf->w_ptr; + ae_int32x2 sample32; + ae_int16x4 sample; + ae_int32x2 temp; + ae_int16x4 coef; + ae_int16x4 delay; + const int in_inc = sizeof(ae_int32) * num_channels; + const int out_inc = sizeof(ae_int16); int i; - int n = samples >> 2; - int m = samples & 0x03; - ae_int16x4 *out = (ae_int16x4 *)w_ptr; - ae_int16x4 *in = (ae_int16x4 *)r_ptr; - ae_valign outu = AE_ZALIGN64(); - ae_valign inu = AE_ZALIGN64(); - const int inc = sizeof(ae_int16); - ae_int16x4 in_sample; - - set_circular_buf0(sink->addr, sink->end_addr); - - inu = AE_LA64_PP(in); - for (i = 0; i < n; i++) { - AE_LA16X4_IP(in_sample, inu, in); - AE_SA16X4_IC(in_sample, outu, out); - } - AE_SA64POS_FP(outu, out); - /* process the left samples that less than 4 - * one by one to avoid memory access overrun - */ - for (i = 0; i < m ; i++) { - AE_L16_XP(in_sample, (ae_int16 *)in, inc); - AE_S16_0_XC(in_sample, (ae_int16 *)out, inc); + + set_circular_buf1(buf->addr, buf->end_addr); + set_circular_buf0(source->addr, source->end_addr); + + if (emph->enable) { + delay = emph->delay; + coef = emph->coef; + for (i = 0; i < frames; i++) { + AE_L32_XC(sample32, in, in_inc); + /* S32: shift right by 16 to get 16-bit */ + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + /* Q1.15 -> Q1.31 */ + temp = AE_CVT32X2F16_10(sample); + AE_MULAF16SS_00(temp, delay, coef); + delay = sample; + sample = AE_ROUND16X4F32SSYM(temp, temp); + AE_S16_0_XC1(sample, out, out_inc); + } + emph->delay = delay; + } else { + for (i = 0; i < frames; i++) { + AE_L32_XC(sample32, in, in_inc); + sample32 = AE_SRAI32(sample32, 16); + sample = AE_SAT16X4(sample32, sample32); + AE_S16_0_XC1(sample, out, out_inc); + } } - return (int16_t *)out; + buf->s_avail += frames; + buf->s_free -= frames; + buf->w_ptr = (int16_t *)out; } +#endif /* CONFIG_FORMAT_S32LE */ -#endif /* CONFIG_FORMAT_S16LE */ -#endif +#endif /* MFCC_HIFI4 */ diff --git a/src/audio/mfcc/mfcc_setup.c b/src/audio/mfcc/mfcc_setup.c index dded450673ad..0a9fc19f0f53 100644 --- a/src/audio/mfcc/mfcc_setup.c +++ b/src/audio/mfcc/mfcc_setup.c @@ -50,10 +50,12 @@ static int mfcc_get_window(struct mfcc_state *state, enum sof_mfcc_fft_window_ty case MFCC_HAMMING_WINDOW: win_hamming_16b(state->window, fft->fft_size); return 0; + case MFCC_HANN_WINDOW: + win_hann_16b(state->window, fft->fft_size); + return 0; case MFCC_POVEY_WINDOW: win_povey_16b(state->window, fft->fft_size); return 0; - default: return -EINVAL; } @@ -139,10 +141,9 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i return -EINVAL; } - comp_info(dev, "source_channel = %d, stream_channels = %d", - config->channel, channels); - if (config->channel >= channels) { - comp_err(dev, "Illegal channel"); + if (config->channel >= channels || (config->channel < 0 && channels != 1)) { + comp_err(dev, "Illegal source_channel %d for stream channels %d", config->channel, + channels); return -EINVAL; } @@ -151,6 +152,7 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i else state->source_channel = config->channel; + state->mmax = config->mmax_init; state->emph.enable = config->preemphasis_coefficient > 0; state->emph.coef = -config->preemphasis_coefficient; /* Negate config parameter */ fft->fft_size = config->frame_length; @@ -249,23 +251,37 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i goto free_fft_out; } - /* Setup DCT */ - dct->num_in = config->num_mel_bins; - dct->num_out = config->num_ceps; - dct->type = (enum dct_type)config->dct; - dct->ortho = true; - ret = mod_dct_initialize_16(mod, dct); - if (ret < 0) { - comp_err(dev, "Failed DCT init"); - goto free_melfb_data; - } - - state->lifter.num_ceps = config->num_ceps; - state->lifter.cepstral_lifter = config->cepstral_lifter; /* Q7.9 max 64.0*/ - ret = mfcc_get_cepstral_lifter(mod, &state->lifter); - if (ret < 0) { - comp_err(dev, "Failed cepstral lifter"); - goto free_dct_matrix; + /* Setup DCT and cepstral lifter only when num_ceps > 0. + * When num_ceps is zero, skip DCT/lifter and output Mel + * log spectra directly. + */ + if (config->num_ceps > 0) { + dct->num_in = config->num_mel_bins; + dct->num_out = config->num_ceps; + dct->type = (enum dct_type)config->dct; + dct->ortho = true; + ret = mod_dct_initialize_16(mod, dct); + if (ret < 0) { + comp_err(dev, "Failed DCT init"); + goto free_melfb_data; + } + + state->lifter.num_ceps = config->num_ceps; + state->lifter.cepstral_lifter = config->cepstral_lifter; /* Q7.9 max 64.0*/ + ret = mfcc_get_cepstral_lifter(mod, &state->lifter); + if (ret < 0) { + comp_err(dev, "Failed cepstral lifter"); + goto free_dct_matrix; + } + + state->mel_only = false; + } else { + comp_info(dev, "num_ceps is 0, Mel log spectra output mode"); + dct->num_in = config->num_mel_bins; + dct->num_out = 0; + dct->matrix = NULL; + state->lifter.matrix = NULL; + state->mel_only = true; } /* Scratch overlay during runtime @@ -289,12 +305,40 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int sample_rate, i /* Use FFT buffer as scratch for later computed data */ state->power_spectra = (int32_t *)&fft->fft_buf[0]; state->mel_spectra = (struct mat_matrix_16b *)&fft->fft_out[0]; - state->cepstral_coef = (struct mat_matrix_16b *) - &state->mel_spectra->data[state->dct.num_in]; + if (!state->mel_only) { + state->cepstral_coef = + (struct mat_matrix_16b *)&state->mel_spectra->data[state->dct.num_in]; + } else { + state->cepstral_coef = NULL; + } + + /* Allocate output buffer for multi-period output. Size allows for + * current output data plus leftover from previous period. + */ + int max_out_per_hop = state->mel_only ? dct->num_in : dct->num_out; + + /* Check that output data can be drained within the periods spanned by one + * FFT hop. Each hop consumes fft_hop_size input samples and produces + * max_out_per_hop + 2 (magic) int16_t output values. The sink provides at + * least fft_hop_size * channels int16_t samples per hop (worst case s16). + * If output exceeds this, data accumulates and will eventually overflow. + */ + int out_per_hop = max_out_per_hop + 2; + int sink_per_hop = fft->fft_hop_size * channels; + + if (out_per_hop > sink_per_hop) { + comp_err(dev, "Output %d int16 per hop exceeds sink capacity %d (hop %d x ch %d)", + out_per_hop, sink_per_hop, fft->fft_hop_size, channels); + ret = -EINVAL; + goto free_dct_matrix; + } /* Set initial state for STFT */ state->waiting_fill = true; state->prev_samples_valid = false; + state->magic_pending = false; + state->out_data_ptr = NULL; + state->out_remain = 0; comp_dbg(dev, "done"); return 0; diff --git a/src/audio/mfcc/tune/README.txt b/src/audio/mfcc/tune/README.txt index fb8208992ed4..7ea6618896b9 100644 --- a/src/audio/mfcc/tune/README.txt +++ b/src/audio/mfcc/tune/README.txt @@ -16,7 +16,7 @@ The output file is hard-coded to mfcc.raw. The output can be plotted and retrieved with Matlab or Octave command: -[ceps, t, n] = decode_ceps('mfcc.raw', 13); +[ceps, t, n] = decode_ceps('mfcc_s16.raw', 13); In the above it's known from configuration script that MFCC was set up to output 13 cepstral coefficients from each FFT -> Mel -> DCT -> Cepstral @@ -27,3 +27,9 @@ e.g. other sound files found in computer. ./run_mfcc.sh /usr/share/sounds/gnome/default/alerts/bark.ogg ./run_mfcc.sh /usr/share/sounds/gnome/default/alerts/sonar.ogg + +The script runs the same input sample with s16/24/32 formats for +cepstral coefficients data output and Mel frequency spectrogram +output. The 80 bands Mel output can be visualized with command: + +[ceps, t, n] = decode_mel('mel_s16.raw', 80); diff --git a/src/audio/mfcc/tune/decode_mel.m b/src/audio/mfcc/tune/decode_mel.m new file mode 100644 index 000000000000..c52ad4b9f6d9 --- /dev/null +++ b/src/audio/mfcc/tune/decode_mel.m @@ -0,0 +1,101 @@ +% [mel, t, n] = decode_mel(fn, num_mel, num_channels) +% +% Input +% fn - File with MFCC data in .raw or .wav format +% num_mel - number of Mel coefficients per frame +% num_channels - needed for .raw format, omit for .wav +% +% Outputs +% mel - Mel coefficients +% t - time vector for plotting +% n - mel 1..num_mel vector for plotting + +% SPDX-License-Identifier: BSD-3-Clause +% Copyright(c) 2026 Intel Corporation. + +function [mel, t, n] = decode_mel(fn, num_mel, num_channels) + +if nargin < 3 + num_channels = 1; +end + +% MFCC stream +fs = 16e3; +qformat = 7; +magic = [25443 28006]; % ASCII 'mfcc' as int16 + +% Load output data +[data, num_channels] = get_file(fn, num_channels); + +idx1 = find(data == magic(1)); +idx = []; +for i = 1:length(idx1) + if data(idx1(i) + 1) == magic(2) + idx = [idx idx1(i)]; + end +end + +if isempty(idx) + error('No magic value markers found from stream'); +end + +period_mel = idx(2)-idx(1); +num_frames = length(idx); + +% Last frame can be incomplete due to span over multiple periods +last = idx(end) + num_mel - 1; +if (last > length(data)) + num_frames = num_frames - 1; +end + +t_mel = period_mel / num_channels / fs; +t = (0:num_frames -1) * t_mel; +n = 1:num_mel; + +mel = zeros(num_mel, num_frames); +for i = 1:num_frames + i1 = idx(i) + 2; + i2 = i1 + num_mel - 1; + mel(:,i) = data(i1:i2) / 2^qformat; +end + +figure; +imagesc(t, n, mel); +axis xy; +colormap(jet); +colorbar; +tstr = sprintf('SOF MFCC Mel coefficients (%s)', fn); +title(tstr, 'Interpreter', 'None'); +xlabel('Time (s)'); +ylabel('Mel coef #'); + +end + +function [data, num_channels] = get_file(fn, num_channels) + +[~, ~, ext] = fileparts(fn); + +switch lower(ext) + case '.raw' + fh = fopen(fn, 'r'); + data = fread(fh, 'int16'); + fclose(fh); + case '.wav' + tmp = audioread(fn, 'native'); + t = whos('tmp'); + if ~strcmp(t.class, 'int16') + error('Only 16-bit wav file format is supported'); + end + s = size(tmp); + num_channels = s(2); + if num_channels > 1 + data = int16(zeros(prod(s), 1)); + for i = 1:num_channels + data(i:num_channels:end) = tmp(:, i); + end + end + otherwise + error('Unknown audio format'); +end + +end diff --git a/src/audio/mfcc/tune/run_mfcc.sh b/src/audio/mfcc/tune/run_mfcc.sh index d531e4519755..a1b8030a6063 100755 --- a/src/audio/mfcc/tune/run_mfcc.sh +++ b/src/audio/mfcc/tune/run_mfcc.sh @@ -4,19 +4,53 @@ set -e -RAW_INPUT=in.raw -RAW_OUTPUT=mfcc.raw +RAW_INPUT_S16=in_s16.raw +RAW_INPUT_S24=in_s24.raw +RAW_INPUT_S32=in_s32.raw +RAW_OUTPUT_S16=mfcc_s16.raw +RAW_OUTPUT_S24=mfcc_s24.raw +RAW_OUTPUT_S32=mfcc_s32.raw +VALGRIND="valgrind --leak-check=full" TESTBENCH=$SOF_WORKSPACE/sof/tools/testbench/build_testbench/install/bin/sof-testbench4 -TOPOLOGY=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfcc16.tplg -OPT="-r 16000 -c 2 -b S16_LE -p 3,4 -t $TOPOLOGY -i $RAW_INPUT -o $RAW_OUTPUT" +TOPOLOGY_S16=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfcc16.tplg +TOPOLOGY_S24=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfcc24.tplg +TOPOLOGY_S32=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfcc32.tplg +OPT_S16="-r 16000 -c 2 -b S16_LE -p 3,4 -t $TOPOLOGY_S16" +OPT_S24="-r 16000 -c 2 -b S24_LE -p 3,4 -t $TOPOLOGY_S24" +OPT_S32="-r 16000 -c 2 -b S32_LE -p 3,4 -t $TOPOLOGY_S32" -# Convert input audio file raw 16 kHz 1 channel 16 bit -sox --encoding signed-integer "$1" -L -r 16000 -c 1 -b 16 "$RAW_INPUT" +# Convert input audio file raw 16 kHz 2 channel 16 bit +sox -R --encoding signed-integer "$1" -L -r 16000 -c 2 -b 16 "$RAW_INPUT_S16" +sox -R --no-dither --encoding signed-integer -L -r 16000 -c 2 -b 16 "$RAW_INPUT_S16" -b 32 "$RAW_INPUT_S32" +sox -R --no-dither --encoding signed-integer -L -r 16000 -c 2 -b 16 "$RAW_INPUT_S16" -b 32 "$RAW_INPUT_S24" vol 0.003906250000 # Run testbench -$TESTBENCH $OPT -i "$RAW_INPUT" -o "$RAW_OUTPUT" +$VALGRIND $TESTBENCH $OPT_S16 -i "$RAW_INPUT_S16" -o "$RAW_OUTPUT_S16" +$VALGRIND $TESTBENCH $OPT_S24 -i "$RAW_INPUT_S24" -o "$RAW_OUTPUT_S24" +$VALGRIND $TESTBENCH $OPT_S32 -i "$RAW_INPUT_S32" -o "$RAW_OUTPUT_S32" -echo ----------------------------------------------- -echo The MFCC data was output to file $RAW_OUTPUT -echo ----------------------------------------------- +echo ---------------------------------------------------------------------------------- +echo The MFCC data was output to file $RAW_OUTPUT_S16, $RAW_OUTPUT_S24, $RAW_OUTPUT_S32 +echo ---------------------------------------------------------------------------------- + +RAW_OUTPUT_S16=mel_s16.raw +RAW_OUTPUT_S24=mel_s24.raw +RAW_OUTPUT_S32=mel_s32.raw + +TESTBENCH=$SOF_WORKSPACE/sof/tools/testbench/build_testbench/install/bin/sof-testbench4 +TOPOLOGY_S16=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfccmel16.tplg +TOPOLOGY_S24=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfccmel24.tplg +TOPOLOGY_S32=$SOF_WORKSPACE/sof/tools/build_tools/topology/topology2/development/sof-hda-benchmark-mfccmel32.tplg +OPT_S16="-r 16000 -c 2 -b S16_LE -p 3,4 -t $TOPOLOGY_S16" +OPT_S24="-r 16000 -c 2 -b S24_LE -p 3,4 -t $TOPOLOGY_S24" +OPT_S32="-r 16000 -c 2 -b S32_LE -p 3,4 -t $TOPOLOGY_S32" + +# Run testbench +$VALGRIND $TESTBENCH $OPT_S16 -i "$RAW_INPUT_S16" -o "$RAW_OUTPUT_S16" +$VALGRIND $TESTBENCH $OPT_S24 -i "$RAW_INPUT_S24" -o "$RAW_OUTPUT_S24" +$VALGRIND $TESTBENCH $OPT_S32 -i "$RAW_INPUT_S32" -o "$RAW_OUTPUT_S32" + +echo ---------------------------------------------------------------------------------- +echo The MFCC Mel data was output to file $RAW_OUTPUT_S16, $RAW_OUTPUT_S24, $RAW_OUTPUT_S32 +echo ---------------------------------------------------------------------------------- diff --git a/src/audio/mfcc/tune/setup_mfcc.m b/src/audio/mfcc/tune/setup_mfcc.m index e0d42e1e034d..bd2b3f11e60b 100644 --- a/src/audio/mfcc/tune/setup_mfcc.m +++ b/src/audio/mfcc/tune/setup_mfcc.m @@ -1,23 +1,36 @@ -% setup_mfcc(cfg) +% setup_mfcc() % -% Input -% cfg - optional MFCC configuration parameters struct, see -% below from code -% -% Create binary configuration blob for MFCC component. The hex data -% is written to tools/topology/topology2/include/components/mfcc and -% tools/topology/topology1/m4/mfcc. +% Create binary configuration blobs for the MFCC component. +% The hex data is written to files in directory +% tools/topology/topology2/include/components/mfcc. % SPDX-License-Identifier: BSD-3-Clause % -% Copyright (c) 2018-2026, Intel Corporation. All rights reserved. +% Copyright (c) 2018-2026, Intel Corporation. + +function setup_mfcc() + + gen_cfg.tplg_ver = 2; + gen_cfg.ipc_ver = 4; + gen_cfg.tools_path = '../../../../tools/'; + gen_cfg.mfcc_conf_path = [gen_cfg.tools_path 'topology/topology2/include/components/mfcc/']; + + % Default blob + setup = get_mfcc_default_config(); + setup.tplg_fn = 'default.conf'; + export_mfcc_setup(gen_cfg, setup); -function setup_mfcc(cfg) + % Blob for mel spectrogram data + setup = get_mel_spectrogram_config(); + setup.tplg_fn = 'mel80.conf'; + export_mfcc_setup(gen_cfg, setup); -if nargin < 1 +end + +function cfg = get_mfcc_default_config() cfg.blackman_coef = 0.42; cfg.cepstral_lifter = 22.0; - cfg.channel = -1; % -1 expect mono, 0 left, 1 right ... + cfg.channel = 0; % -1 expect mono, 0 left, 1 right ... cfg.dither = 0.0; % no support cfg.energy_floor = 1.0; cfg.frame_length = 25.0; % ms @@ -44,26 +57,54 @@ function setup_mfcc(cfg) cfg.mel_log = 'log'; % Set to 'db' for librosa, set to 'log10' for matlab cfg.pmin = 5e-10; % Set to 1e-10 for librosa cfg.top_db = 200; % Set to 80 for librosa + cfg.mel_offset = 0; % For mel_only mode, no impact with num_ceps > 0 + cfg.mel_scale = 0; % same + cfg.mmax_init = 0; % same + cfg.mmax_coef = 0; % same + cfg.dynamic_mmax = false; % same end -cfg.tools = '../../../../tools/'; - -cfg.tplg_fn = [cfg.tools 'topology/topology1/m4/mfcc/mfcc_config.m4']; -cfg.tplg_ver = 1; -cfg.ipc_ver = 3; -export_mfcc_setup(cfg); - -cfg.tplg_fn = [cfg.tools 'topology/topology2/include/components/mfcc/default.conf']; -cfg.tplg_ver = 2; -cfg.ipc_ver = 4; -export_mfcc_setup(cfg); - +function cfg = get_mel_spectrogram_config() + cfg.blackman_coef = 0; + cfg.cepstral_lifter = 0; + cfg.channel = 0; + cfg.dither = 0; + cfg.energy_floor = 1.0; + cfg.frame_length = 25.0; % 400 samples at 16 kHz + cfg.frame_shift = 10.0; % 160 samples at 16 kHz + cfg.high_freq = 8000; + cfg.htk_compat = false; + cfg.low_freq = 0; + cfg.num_ceps = 0; % Mel-only mode, no DCT + cfg.min_duration = 0; + cfg.norm = 'slaney'; + cfg.num_mel_bins = 80; + cfg.preemphasis_coefficient = 0; + cfg.raw_energy = false; + cfg.remove_dc_offset = false; + cfg.round_to_power_of_two = true; + cfg.sample_frequency = 16000; + cfg.snip_edges = true; + cfg.subtract_mean = false; + cfg.use_energy = false; + cfg.vtln_high = 0; + cfg.vtln_low = 0; + cfg.vtln_warp = 1.0; + cfg.window_type = 'hann'; + cfg.mel_log = 'log10'; + cfg.pmin = 1e-10; + cfg.top_db = 8; % applied for log10, would be 80 dB clamp for decibels as 10*log10() + cfg.mel_offset = 4.0; % For whisper like Mel scale and normalize + cfg.mel_scale = 0.25; % For whisper like Mel scale and normalize + cfg.mmax_init = 0; % Initial value max Mel value, data clamp is mmax - top_db + cfg.mmax_coef = 0; % Dynamic max Mel value decay coefficient (zero lock to found max) + cfg.dynamic_mmax = true; end -function export_mfcc_setup(cfg) +function export_mfcc_setup(gen_cfg, cfg) %% Use blob tool from EQ -addpath([cfg.tools 'tune/common']); +addpath([gen_cfg.tools_path 'tune/common']); %% Blob size, size plus reserved(8) + current parameters nbytes_data = 104; @@ -73,7 +114,7 @@ function export_mfcc_setup(cfg) sh16 = [0 -8]; %% Get ABI information -[abi_bytes, nbytes_abi] = sof_get_abi(nbytes_data, cfg.ipc_ver); +[abi_bytes, nbytes_abi] = sof_get_abi(nbytes_data, gen_cfg.ipc_ver); %% Initialize correct size uint8 array nbytes = nbytes_abi + nbytes_data; @@ -86,14 +127,21 @@ function export_mfcc_setup(cfg) %% Apply default MFCC configuration, first struct header and reserved, then data [b8, j] = add_w32b(nbytes_data, b8, j); -for i = 1:8 + +v = q_convert(cfg.mel_offset, 7); [b8, j] = add_w16b(v, b8, j); +v = q_convert(cfg.mel_scale, 12); [b8, j] = add_w16b(v, b8, j); +v = q_convert(cfg.mmax_init, 7); [b8, j] = add_w16b(v, b8, j); +v = q_convert(cfg.mmax_coef, 15); [b8, j] = add_w16b(v, b8, j); + +% Reserved +for i = 1:6 [b8, j] = add_w32b(0, b8, j); end v = q_convert(cfg.sample_frequency, 0); [b8, j] = add_w32b(v, b8, j); v = q_convert(cfg.pmin, 31); [b8, j] = add_w32b(v, b8, j); -v = 0; [b8, j] = add_w32b(v, b8, j); % enum mel_log -v = 0; [b8, j] = add_w32b(v, b8, j); % enum norm +v = get_mel_log_value(cfg.mel_log); [b8, j] = add_w32b(v, b8, j); % enum mel_log +v = get_norm_value(cfg.norm); [b8, j] = add_w32b(v, b8, j); % enum norm v = 0; [b8, j] = add_w32b(v, b8, j); % enum pad v = get_window(cfg); [b8, j] = add_w32b(v, b8, j); % enum window v = 1; [b8, j] = add_w32b(v, b8, j); % enum dct type @@ -119,22 +167,24 @@ function export_mfcc_setup(cfg) v = cfg.snip_edges; [b8, j] = add_w8b(v, b8, j); % bool v = cfg.subtract_mean; [b8, j] = add_w8b(v, b8, j); % bool v = cfg.use_energy; [b8, j] = add_w8b(v, b8, j); % bool +v = cfg.dynamic_mmax; [b8, j] = add_w8b(v, b8, j); % bool %% Export -switch cfg.tplg_ver +tplg_fn = [gen_cfg.mfcc_conf_path cfg.tplg_fn]; +switch gen_cfg.tplg_ver case 1 - sof_tplg_write(cfg.tplg_fn, b8, "DEF_MFCC_PRIV", ... + sof_tplg_write(tplg_fn, b8, "DEF_MFCC_PRIV", ... "Exported with script setup_mfcc.m", ... "cd src/audio/mfcc/tune; octave setup_mfcc.m"); case 2 - sof_tplg2_write(cfg.tplg_fn, b8, "mfcc_config", ... + sof_tplg2_write(tplg_fn, b8, "mfcc_config", ... "Exported MFCC configuration", ... "cd src/audio/mfcc/tune; octave setup_mfcc.m"); otherwise - error("Illegal cfg.tplg_ver, use 1 for topology v1 or 2 topology v2."); + error("Illegal tplg_ver, use 1 for topology v1 or 2 topology v2."); end -rmpath([cfg.tools 'tune/common']); +rmpath([gen_cfg.tools_path 'tune/common']); end @@ -157,6 +207,30 @@ function export_mfcc_setup(cfg) end end +function n = get_mel_log_value(mel_log) + switch lower(mel_log) + case 'log' + n = 0; + case 'log10' + n = 1; + case 'db' + n = 2; + otherwise + error('Unknown mel_log type'); + end +end + +function n = get_norm_value(norm) + switch lower(norm) + case 'none' + n = 0; + case 'slaney' + n = 1; + otherwise + error('Unknown norm type'); + end +end + function bytes = w8b(word) bytes = uint8(zeros(1,1)); bytes(1) = bitand(word, 255); diff --git a/src/include/sof/audio/mfcc/mfcc_comp.h b/src/include/sof/audio/mfcc/mfcc_comp.h index 7323428ec37d..accf45868cbd 100644 --- a/src/include/sof/audio/mfcc/mfcc_comp.h +++ b/src/include/sof/audio/mfcc/mfcc_comp.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: BSD-3-Clause * - * Copyright(c) 2022 Intel Corporation. All rights reserved. + * Copyright(c) 2022-2026 Intel Corporation. * * Author: Seppo Ingalsuo */ @@ -114,6 +114,7 @@ struct mfcc_state { struct mat_matrix_16b *mel_spectra; /**< Pointer to scratch */ struct mat_matrix_16b *cepstral_coef; /**< Pointer to scratch */ int32_t *power_spectra; /**< Pointer to scratch */ + int16_t mmax; /**< Maximum Mel value in Q9.7 */ int16_t buf_avail; int16_t *buffers; int16_t *prev_data; /**< prev_data_size */ @@ -125,9 +126,13 @@ struct mfcc_state { int low_freq; int high_freq; int sample_rate; + bool mel_only; /**< When true, output Mel spectra instead of cepstral coefficients */ bool waiting_fill; /**< booleans */ bool prev_samples_valid; + bool magic_pending; /**< True when magic word not yet written for current output */ size_t sample_buffers_size; /**< bytes */ + int16_t *out_data_ptr; /**< Read pointer into scratch data for multi-period output */ + int out_remain; /**< Remaining int16_t samples to write to sink from scratch */ }; /* MFCC component private data */ @@ -156,12 +161,6 @@ int mfcc_setup(struct processing_module *mod, int max_frames, int rate, int chan void mfcc_free_buffers(struct processing_module *mod); -void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer *bsource, - struct output_stream_buffer *bsink, int frames); - -void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, - struct mfcc_pre_emph *emph, int frames, int source_channel); - void mfcc_fill_prev_samples(struct mfcc_buffer *buf, int16_t *prev_data, int prev_data_length); @@ -175,16 +174,31 @@ void mfcc_apply_window(struct mfcc_state *state, int input_shift); #if CONFIG_FORMAT_S16LE -int16_t *mfcc_sink_copy_zero_s16(const struct audio_stream *sink, - int16_t *w_ptr, int samples); - -int16_t *mfcc_sink_copy_data_s16(const struct audio_stream *sink, int16_t *w_ptr, - int samples, int16_t *r_ptr); +void mfcc_source_copy_s16(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel); void mfcc_s16_default(struct processing_module *mod, struct input_stream_buffer *bsource, struct output_stream_buffer *bsink, int frames); #endif +#if CONFIG_FORMAT_S24LE + +void mfcc_source_copy_s24(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel); + +void mfcc_s24_default(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, int frames); +#endif + +#if CONFIG_FORMAT_S32LE + +void mfcc_source_copy_s32(struct input_stream_buffer *bsource, struct mfcc_buffer *buf, + struct mfcc_pre_emph *emph, int frames, int source_channel); + +void mfcc_s32_default(struct processing_module *mod, struct input_stream_buffer *bsource, + struct output_stream_buffer *bsink, int frames); +#endif + #ifdef UNIT_TEST void sys_comp_module_mfcc_interface_init(void); #endif diff --git a/src/include/user/mfcc.h b/src/include/user/mfcc.h index 7a5b7fcca98e..8a0defcd9883 100644 --- a/src/include/user/mfcc.h +++ b/src/include/user/mfcc.h @@ -50,7 +50,11 @@ enum sof_mfcc_dct_type { */ struct sof_mfcc_config { uint32_t size; /**< Size of this struct in bytes */ - uint32_t reserved[8]; + int16_t mel_offset; /**< Q8.7 default 0, use 4.0 for Whisper */ + int16_t mel_scale; /**< Q4.12 default 1.0, use 0.25 for Whisper */ + int16_t mmax_init; /**< Q8.7 default 0, with dynamic_mmax false, can sim. Whisper mmax */ + int16_t mmax_coef; /**< Q1.15 decay coefficient for dynamic mmax, a small value for slow */ + uint32_t reserved[6]; int32_t sample_frequency; /**< Hz. e.g. 16000 */ int32_t pmin; /**< Q1.31 linear power, limit minimum Mel energy, e.g. 1e-9 */ enum sof_mfcc_mel_log_type mel_log; /**< Use MEL_LOG_IS_LOG, LOG10 or DB*/ @@ -69,7 +73,7 @@ struct sof_mfcc_config { int16_t num_ceps; /**< Number of cepstral coefficients, e.g. 13 */ int16_t num_mel_bins; /**< Number of internal Mel bands, e.g. 23 */ int16_t preemphasis_coefficient; /**< Q1.15, e.g. 0.97, or 0 for disable */ - int16_t top_db; /**< Q8.7 dB, limit Mel energies to this value e.g. 200 */ + int16_t top_db; /**< Q8.7 dB, limit min. Mel energies to chunk max - top_dB, e.g. 80 */ int16_t vtln_high; /**< Reserved, no support */ int16_t vtln_low; /**< Reserved, no support */ int16_t vtln_warp; /**< Reserved, no support */ @@ -80,7 +84,7 @@ struct sof_mfcc_config { bool snip_edges; /**< Must be true (1) */ bool subtract_mean; /**< Must be false (0) */ bool use_energy; /**< Must be false (0) */ - bool reserved_bool1; + bool dynamic_mmax; /**< Track max Mel value for clamp with top_db value */ bool reserved_bool2; bool reserved_bool3; } __attribute__((packed)); diff --git a/tools/topology/topology2/cavs-benchmark-hda.conf b/tools/topology/topology2/cavs-benchmark-hda.conf index 62c0ad4f4fbc..95ab67431812 100644 --- a/tools/topology/topology2/cavs-benchmark-hda.conf +++ b/tools/topology/topology2/cavs-benchmark-hda.conf @@ -834,6 +834,16 @@ IncludeByKey.BENCH_CONFIG { } + "mfccmel16" { + + } + "mfccmel24" { + + } + "mfccmel32" { + + } + # # Micsel component # diff --git a/tools/topology/topology2/development/tplg-targets-bench.cmake b/tools/topology/topology2/development/tplg-targets-bench.cmake index eff707d49aa9..5c0f82dc7dfc 100644 --- a/tools/topology/topology2/development/tplg-targets-bench.cmake +++ b/tools/topology/topology2/development/tplg-targets-bench.cmake @@ -19,6 +19,7 @@ set(components "igo_nr" "level_multiplier" "mfcc" + "mfccmel" "micsel" "rtnr" "sound_dose" @@ -45,6 +46,7 @@ set(component_parameters "BENCH_IGO_NR_PARAMS=default" "BENCH_LEVEL_MULTIPLIER_PARAMS=default" "BENCH_MFCC_PARAMS=default" + "BENCH_MFCC_PARAMS=mel80" "BENCH_MICSEL_PARAMS=passthrough" "BENCH_RTNR_PARAMS=default" "BENCH_SOUND_DOSE_PARAMS=default" diff --git a/tools/topology/topology2/include/bench/mfcc_controls_capture.conf b/tools/topology/topology2/include/bench/mfcc_controls_capture.conf index 56a731b86687..d45baec1ee8f 100644 --- a/tools/topology/topology2/include/bench/mfcc_controls_capture.conf +++ b/tools/topology/topology2/include/bench/mfcc_controls_capture.conf @@ -6,6 +6,7 @@ name '$ANALOG_CAPTURE_PCM MFCC bytes' IncludeByKey.BENCH_MFCC_PARAMS { "default" "include/components/mfcc/default.conf" + "mel80" "include/components/mfcc/mel80.conf" } } #mixer."1" { diff --git a/tools/topology/topology2/include/bench/mfcc_controls_playback.conf b/tools/topology/topology2/include/bench/mfcc_controls_playback.conf index 7649678c8468..cc2ada04b8d7 100644 --- a/tools/topology/topology2/include/bench/mfcc_controls_playback.conf +++ b/tools/topology/topology2/include/bench/mfcc_controls_playback.conf @@ -6,6 +6,7 @@ name '$ANALOG_PLAYBACK_PCM MFCC bytes' IncludeByKey.BENCH_MFCC_PARAMS { "default" "include/components/mfcc/default.conf" + "mel80" "include/components/mfcc/mel80.conf" } } #mixer."1" { diff --git a/tools/topology/topology2/include/bench/mfccmel_s16.conf b/tools/topology/topology2/include/bench/mfccmel_s16.conf new file mode 100644 index 000000000000..ec89bffb90a1 --- /dev/null +++ b/tools/topology/topology2/include/bench/mfccmel_s16.conf @@ -0,0 +1,13 @@ + # Created with script "./bench_comp_generate.sh mfcc" + Object.Widget.mfcc.1 { + index $BENCH_PLAYBACK_HOST_PIPELINE + + + } + Object.Widget.mfcc.2 { + index $BENCH_CAPTURE_HOST_PIPELINE + + + } + + diff --git a/tools/topology/topology2/include/bench/mfccmel_s24.conf b/tools/topology/topology2/include/bench/mfccmel_s24.conf new file mode 100644 index 000000000000..73571fabe5f2 --- /dev/null +++ b/tools/topology/topology2/include/bench/mfccmel_s24.conf @@ -0,0 +1,13 @@ + # Created with script "./bench_comp_generate.sh mfcc" + Object.Widget.mfcc.1 { + index $BENCH_PLAYBACK_HOST_PIPELINE + + + } + Object.Widget.mfcc.2 { + index $BENCH_CAPTURE_HOST_PIPELINE + + + } + + diff --git a/tools/topology/topology2/include/bench/mfccmel_s32.conf b/tools/topology/topology2/include/bench/mfccmel_s32.conf new file mode 100644 index 000000000000..75c01eaf4a43 --- /dev/null +++ b/tools/topology/topology2/include/bench/mfccmel_s32.conf @@ -0,0 +1,13 @@ + # Created with script "./bench_comp_generate.sh mfcc" + Object.Widget.mfcc.1 { + index $BENCH_PLAYBACK_HOST_PIPELINE + + + } + Object.Widget.mfcc.2 { + index $BENCH_CAPTURE_HOST_PIPELINE + + + } + + diff --git a/tools/topology/topology2/include/components/mfcc/default.conf b/tools/topology/topology2/include/components/mfcc/default.conf index 1f9141886de9..42a6d6608b8b 100644 --- a/tools/topology/topology2/include/components/mfcc/default.conf +++ b/tools/topology/topology2/include/components/mfcc/default.conf @@ -1,9 +1,9 @@ -# Exported MFCC configuration 24-Jul-2024 -# cd tools/tune/mfcc; octave setup_mfcc.m +# Exported MFCC configuration 05-May-2026 +# cd src/audio/mfcc/tune; octave setup_mfcc.m Object.Base.data."mfcc_config" { bytes " 0x53,0x4f,0x46,0x34,0x00,0x00,0x00,0x00, - 0x68,0x00,0x00,0x00,0x00,0xa0,0x01,0x03, + 0x68,0x00,0x00,0x00,0x01,0xd0,0x01,0x03, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x68,0x00,0x00,0x00,0x00,0x00,0x00,0x00, @@ -14,7 +14,7 @@ Object.Base.data."mfcc_config" { 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, 0x02,0x00,0x00,0x00,0x01,0x00,0x00,0x00, - 0xc3,0x35,0x00,0x2c,0xff,0xff,0x00,0x00, + 0xc3,0x35,0x00,0x2c,0x00,0x00,0x00,0x00, 0x90,0x01,0xa0,0x00,0x00,0x00,0x14,0x00, 0x0d,0x00,0x17,0x00,0x00,0x00,0x00,0x64, 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x01, diff --git a/tools/topology/topology2/include/components/mfcc/mel80.conf b/tools/topology/topology2/include/components/mfcc/mel80.conf new file mode 100644 index 000000000000..04aa2a15c660 --- /dev/null +++ b/tools/topology/topology2/include/components/mfcc/mel80.conf @@ -0,0 +1,22 @@ +# Exported MFCC configuration 05-May-2026 +# cd src/audio/mfcc/tune; octave setup_mfcc.m +Object.Base.data."mfcc_config" { + bytes " + 0x53,0x4f,0x46,0x34,0x00,0x00,0x00,0x00, + 0x68,0x00,0x00,0x00,0x01,0xd0,0x01,0x03, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x68,0x00,0x00,0x00,0x00,0x02,0x00,0x04, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x80,0x3e,0x00,0x00, + 0x00,0x00,0x00,0x00,0x01,0x00,0x00,0x00, + 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x03,0x00,0x00,0x00,0x01,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x90,0x01,0xa0,0x00,0x40,0x1f,0x00,0x00, + 0x00,0x00,0x50,0x00,0x00,0x00,0x00,0x04, + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, + 0x00,0x01,0x01,0x00,0x00,0x01,0x00,0x00" +}