Prvni ulozeni z chegewara githubu

This commit is contained in:
2023-02-25 16:13:53 +01:00
commit 01eb80dfe2
3279 changed files with 638407 additions and 0 deletions

View File

@@ -0,0 +1,135 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_TTS_H_
#define _ESP_TTS_H_
#include "stdlib.h"
#include "stdio.h"
#include "esp_tts_voice.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
NONE_MODE = 0, //do not play any word before playing a specific number
ALI_PAY_MODE, //play zhi fu bao shou kuan before playing a specific number
WEIXIN_PAY_MODE //play wei xin shou kuan before playing a specific number
} pay_mode_t;
typedef void * esp_tts_handle_t;
/**
* @brief Init an instance of the TTS voice set structure.
*
* @param template The const esp_tts_voice_template.
* @param data The customize voice data
* @return
* - NULL: Init failed
* - Others: The instance of voice set
*/
esp_tts_voice_t *esp_tts_voice_set_init(const esp_tts_voice_t *template, void *data);
/**
* @brief Init an instance of the TTS voice set structure.
*
* @param template The const esp_tts_voice_template.
* @param data The customize voice data
* @return
* - NULL: Init failed
* - Others: The instance of voice set
*/
void esp_tts_voice_set_free(esp_tts_voice_t *voice);
/**
* @brief Creates an instance of the TTS structure.
*
* @param voice Voice set containing all basic phonemes.
* @return
* - NULL: Create failed
* - Others: The instance of TTS structure
*/
esp_tts_handle_t esp_tts_create(esp_tts_voice_t *voice);
/**
* @brief parse money pronuciation.
*
* @param tts_handle Instance of TTS
* @param yuan The number of yuan
* @param jiao The number of jiao
* @param fen The number of fen
* @param mode The pay mode: please refer to pay_mode_t
* @return
* - 0: failed
* - 1: succeeded
*/
int esp_tts_parse_money(esp_tts_handle_t tts_handle, int yuan, int jiao, int fen, pay_mode_t mode);
/**
* @brief parse Chinese PinYin pronuciation.
*
* @param tts_handle Instance of TTS
* @param pinyin PinYin string, like this "da4 jia1 hao3"
* @return
* - 0: failed
* - 1: succeeded
*/
int esp_tts_parse_pinyin(esp_tts_handle_t tts_handle, const char *pinyin);
/**
* @brief parse Chinese string.
*
* @param tts_handle Instance of TTS
* @param str Chinese string, like this "大家好"
* @return
* - 0: failed
* - 1: succeeded
*/
int esp_tts_parse_chinese(esp_tts_handle_t tts_handle, const char *str);
/**
* @brief output TTS voice data by stream.
*
* @Warning The output data should not be freed.
Once the output length is 0, the all voice data has been output.
*
* @param tts_handle Instance of TTS
* @param len The length of output data
* @param speed The speech speed speed of synthesized speech,
range:0~5, 0: the slowest speed, 5: the fastest speech
* @return
* - voice raw data
*/
short* esp_tts_stream_play(esp_tts_handle_t tts_handle, int *len, unsigned int speed);
/**
* @brief reset tts stream and clean all cache of TTS instance.
*
* @param tts_handle Instance of TTS
*/
void esp_tts_stream_reset(esp_tts_handle_t tts_handle);
/**
* @brief Free the TTS instance
*
* @param tts_handle The instance of TTS.
*/
void esp_tts_destroy(esp_tts_handle_t tts_handle);
#ifdef __cplusplus
extern "C" {
#endif
#endif

View File

@@ -0,0 +1,25 @@
#ifndef _ESP_TTS_PARSER_H_
#define _ESP_TTS_PARSER_H_
#include "stdlib.h"
#include "esp_tts_voice.h"
typedef struct {
int *syll_idx;
int syll_num;
int total_num;
esp_tts_voice_t *voice;
}esp_tts_utt_t;
esp_tts_utt_t* esp_tts_parser_chinese (const char* str, esp_tts_voice_t *voice);
esp_tts_utt_t* esp_tts_parser_money(char *play_tag, int yuan, int jiao, int fen, esp_tts_voice_t *voice);
esp_tts_utt_t* esp_tts_parser_pinyin(char* pinyin, esp_tts_voice_t *voice);
esp_tts_utt_t* esp_tts_utt_alloc(int syll_num, esp_tts_voice_t *voice);
void esp_tts_utt_free(esp_tts_utt_t *utt);
#endif

View File

@@ -0,0 +1,67 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_TTS_PLAYER_H_
#define _ESP_TTS_PLAYER_H_
#include "stdlib.h"
#include "stdio.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef void * esp_tts_player_handle_t;
/**
* @brief Creates an instance of the TTS Player structure.
*
* @param mode mode of player, default:0
* @return
* - NULL: Create failed
* - Others: The instance of TTS Player
*/
esp_tts_player_handle_t esp_tts_player_create(int mode);
/**
* @brief Concatenate audio files.
*
* @Warning Just support mono audio data.
*
* @param player The handle of TTS player
* @param file_list The dir of files
* @param file_num The number of file
* @param len The length of return audio buffer
* @param sample_rate The sample rate of input audio file
* @param sample_width The sample width of input audio file, sample_width=1:8-bit, sample_width=2:16-bit,...
* @return
* - audio data buffer
*/
unsigned char* esp_tts_stream_play_by_concat(esp_tts_player_handle_t player, const char **file_list, int file_num, int *len, int *sample_rate, int *sample_width);
/**
* @brief Free the TTS Player instance
*
* @param player The instance of TTS Player.
*/
void esp_tts_player_destroy(esp_tts_player_handle_t player);
#ifdef __cplusplus
extern "C" {
#endif
#endif

View File

@@ -0,0 +1,48 @@
////////////////////////////////////////////////////////////////////////////
// **** AUDIO-STRETCH **** //
// Time Domain Harmonic Scaler //
// Copyright (c) 2019 David Bryant //
// All Rights Reserved. //
// Distributed under the BSD Software License (see license.txt) //
////////////////////////////////////////////////////////////////////////////
// stretch.h
// Time Domain Harmonic Compression and Expansion
//
// This library performs time domain harmonic scaling with pitch detection
// to stretch the timing of a 16-bit PCM signal (either mono or stereo) from
// 1/2 to 2 times its original length. This is done without altering any of
// its tonal characteristics.
#ifndef STRETCH_H
#define STRETCH_H
#ifdef __cplusplus
extern "C" {
#endif
typedef void *StretchHandle;
/* extern function */
StretchHandle stretch_init (int shortest_period, int longest_period, int num_chans, int fast_mode);
int stretch_samples (StretchHandle handle, short *samples, int num_samples, short *output, float ratio);
int stretch_flush (StretchHandle handle, short *output);
void stretch_deinit (StretchHandle handle);
/* internel function */
StretchHandle stretcher_init_internal(int shortest_period, int longest_period, int buff_len);
void stretcher_deinit (StretchHandle handle);
int stretcher_is_empty(StretchHandle handle);
int stretcher_is_full(StretchHandle handle, int num_samples);
int stretcher_push_data(StretchHandle handle, short *samples, int num_samples);
int stretcher_stretch_samples(StretchHandle handle, short *output, float ratio);
int stretcher_stretch_samples_flash(StretchHandle handle, short *output, float ratio, const short *period_data,
int *start_idx, int end_idx);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,20 @@
#ifndef _ESP_TTS_VOICE_H_
#define _ESP_TTS_VOICE_H_
typedef struct {
char *voice_name; // voice set name
char *format; // the format of voice data, currently support pcm and amrwb
int sample_rate; // the sample rate of voice data, just for pcm format
int bit_width; // the bit width of voice data, just for pcm format
int syll_num; // the syllable mumber
char **sylls; // the syllable names
int *syll_pos; // the position of syllable in syllable audio data array
short *pinyin_idx; // the index of pinyin
short *phrase_dict; // the pinyin dictionary of common phrase
short *extern_idx; // the idx of extern phrases
short *extern_dict; // the extern phrase dictionary
unsigned char *data; // the audio data of all syllables
} esp_tts_voice_t;
#endif

View File

@@ -0,0 +1,5 @@
#pragma once
#include "esp_tts.h"
extern const esp_tts_voice_t esp_tts_voice_template;

View File

@@ -0,0 +1,5 @@
#pragma once
#include "esp_tts.h"
extern const esp_tts_voice_t esp_tts_voice_xiaole;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_customized_word_wn5;

View File

@@ -0,0 +1,411 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_H
#define DL_LIB_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#include "esp_heap_caps.h"
#include "sdkconfig.h"
#define DL_SPIRAM_SUPPORT 1
#endif
#ifdef CONFIG_IDF_TARGET_ESP32S3
#include "esp32s3/rom/cache.h"
#endif
typedef int padding_state;
// /**
// * @brief Allocate a chunk of memory which has the given capabilities.
// * Equivalent semantics to libc malloc(), for capability-aware memory.
// * In IDF, malloc(p) is equivalent to heap_caps_malloc(p, MALLOC_CAP_8BIT).
// *
// * @param size In bytes, of the amount of memory to allocate
// * @param caps Bitwise OR of MALLOC_CAP_* flags indicating the type of memory to be returned
// * MALLOC_CAP_SPIRAM: Memory must be in SPI RAM
// * MALLOC_CAP_INTERNAL: Memory must be internal; specifically it should not disappear when flash/spiram cache is switched off
// * MALLOC_CAP_DMA: Memory must be able to accessed by DMA
// * MALLOC_CAP_DEFAULT: Memory can be returned in a non-capability-specific memory allocation
// * @return Pointer to currently allocated heap memory
// **/
// void *heap_caps_malloc(size_t size, uint32_t caps);
/**
* @brief Allocate aligned memory from internal memory or external memory.
* if cnt*size > CONFIG_SPIRAM_MALLOC_ALWAYSINTERNAL, allocate memory from internal RAM
* else, allocate memory from PSRAM
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently allocated heap memory
*/
void *dl_lib_calloc(int cnt, int size, int align);
/**
* @brief Always allocate aligned memory from external memory.
*
* @param cnt Number of continuing chunks of memory to allocate
* @param size Size, in bytes, of a chunk of memory to allocate
* @param align Aligned size, in bits
* @return Pointer to currently aligned heap memory
*/
void *dl_lib_calloc_psram(int cnt, int size, int align);
/**
* @brief Free aligned memory allocated by `dl_lib_calloc` or `dl_lib_calloc_psram`
*
* @param prt Pointer to free
*/
void dl_lib_free(void *ptr);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* As described in https://codingforspeed.com/using-faster-exponential-approximation/
* Should be good til an input of 5 or so with a steps factor of 8.
*
* @param in Floating point input
* @param steps Approximation steps. More is more precise. 8 or 10 should be good enough for most purposes.
* @return Exp()'ed output
*/
fptp_t fast_exp(double x, int steps);
/**
* @brief Does a fast version of the exp() operation on a floating point number.
*
* @param in Floating point input
* @return Exp()'ed output
*/
double fast_exp_pro(double x);
/**
* @brief Does a softmax operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a softmax operation on a quantized matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_softmax_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a floating point number
*
* @param in Floating point input
* @return Sigmoid output
*/
fptp_t dl_sigmoid_op(fptp_t in);
/**
* @brief Does a sigmoid operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a tanh operation on a floating point number
*
* @param in Floating point input number
* @return Tanh value
*/
fptp_t dl_tanh_op(fptp_t v);
/**
* @brief Does a tanh operation on a matrix.
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh(const dl_matrix2d_t *in, dl_matrix2d_t *out);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a floating point number
*
* @param in Floating point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
fptp_t dl_relu_op(fptp_t in, fptp_t clip);
/**
* @brief Does a ReLu operation on a matrix.
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Fully connected layer operation
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Biases for the neurons. Can be NULL if a bias of 0 is required.
* @param out Output array. Outputs are placed here. Needs to be an initialized, weight->w by in->h in size, matrix.
*/
void dl_fully_connect_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, dl_matrix2d_t *out);
/**
* @brief Pre-calculate the sqrtvari variable for the batch_normalize function.
* The sqrtvari matrix depends on the variance and epsilon values, which normally are constant. Hence,
* this matrix only needs to be calculated once. This function does that.
*
* @param
* @return
*/
void dl_batch_normalize_get_sqrtvar(const dl_matrix2d_t *variance, fptp_t epsilon, dl_matrix2d_t *out);
/**
* @brief Batch-normalize a matrix
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @return
*/
void dl_batch_normalize(dl_matrix2d_t *m, const dl_matrix2d_t *offset, const dl_matrix2d_t *scale,
const dl_matrix2d_t *mean, const dl_matrix2d_t *sqrtvari);
/**
* @brief Do a basic LSTM layer pass.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2d_t *dl_basic_lstm_layer(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a basic LSTM layer pass, partial quantized version.
* This LSTM function accepts 16-bit fixed-point weights and 32-bit float-point bias.
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons, need to be quantised
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_quantised_weights(const dl_matrix2d_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Do a fully-connected layer pass, fully-quantized version.
*
* @param in Input vector
* @param weight Weights of the neurons
* @param bias Bias values of the neurons. Can be NULL if no bias is needed.
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
void dl_fully_connect_layer_q(const dl_matrix2dq_t *in, const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, dl_matrix2dq_t *out, int shift);
/**
* @brief Do a basic LSTM layer pass, fully-quantized version
*
* @warning Returns state_h pointer, so do not free result.
* @param in Input vector
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param weights Weights for the neurons
* @param bias Bias for the neurons. Can be NULL if no bias is required
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return Output values of the neurons
*/
dl_matrix2dq_t *dl_basic_lstm_layer_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int shift);
/**
* @brief Batch-normalize a matrix, fully-quantized version
*
* @param m The matrix to normalize
* @param offset Offset matrix
* @param scale Scale matrix
* @param mean Mean matrix
* @param sqrtvari Matrix precalculated using dl_batch_normalize_get_sqrtvar
* @param shift Number of bits to shift the result back by. See dl_lib_matrixq.h for more info
* @return
*/
void dl_batch_normalize_q(dl_matrix2dq_t *m, const dl_matrix2dq_t *offset, const dl_matrix2dq_t *scale,
const dl_matrix2dq_t *mean, const dl_matrix2dq_t *sqrtvari, int shift);
/**
* @brief Does a relu (Rectifier Linear Unit) operation on a fixed-point number
* This accepts and returns fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @param clip If value is higher than this, it will be clipped to this value
* @return Relu output
*/
qtp_t dl_relu_q_op(qtp_t in, qtp_t clip);
/**
* @brief Does a ReLu operation on a matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_relu_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Does a sigmoid operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return Sigmoid output
*/
int dl_sigmoid_op_q(const int in);
int16_t dl_sigmoid_op_q8(const int16_t in);
/**
* @brief Does a sigmoid operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a matrix, quantized version
*
* @param in Input matrix
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Does a tanh operation on a fixed-point number.
* This accepts and returns a fixed-point 32-bit number with the last 15 bits being the bits after the decimal
* point. (Equivalent to a mantissa in a quantized matrix with exponent -15.)
*
* @param in Fixed-point input
* @return tanh output
*/
int dl_tanh_op_q(int v);
int16_t dl_tanh_op_q8(int16_t v);
void load_mat_psram_mn4(void);
void load_mat_psram_mn3(void);
void free_mat_psram_mn4(void);
void free_mat_psram_mn3(void);
qtp_t dl_hard_sigmoid_op(qtp_t in, int exponent);
qtp_t dl_hard_tanh_op(qtp_t in, int exponent);
int16_t dl_table_tanh_op(int16_t in, int exponent);
int16_t dl_table_sigmoid_op(int16_t in, int exponent);
void dl_hard_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_hard_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_sigmoid_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
void dl_table_tanh_q(const dl_matrix2dq_t *in, dl_matrix2dq_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, quantized version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum(const dl_matrix2d_t *in, fptp_t clip, dl_matrix2d_t *out);
/**
* @brief Filter out the number greater than clip in the matrix, float version
*
* @param in Input matrix
* @param clip If values are higher than this, they will be clipped to this value
* @param out Output matrix. Can be the same as the input matrix; if so, output results overwrite the input.
*/
void dl_minimum_q(const dl_matrix2dq_t *in, fptp_t clip, dl_matrix2dq_t *out);
/**
* @brief Do a basic CNN layer pass.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height
* @param bias Bias for the CNN layer.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1.
* @return The result of CNN layer.
*/
dl_matrix2d_t *dl_basic_conv_layer(const dl_matrix2d_t *in, const dl_matrix2d_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
/**
* @brief Do a basic CNN layer pass, quantised wersion.
*
* @Warning This just supports the single channel input image, and the output is single row matrix.
That is to say, the height of output is 1, and the weight of output is out_channels*out_image_width*out_image_height
*
* @param in Input single channel image
* @param weight Weights of the neurons, weight->w = out_channels, weight->h = filter_width*filter_height,
* @param bias Bias of the neurons.
* @param filter_height The height of convolution kernel
* @param filter_width The width of convolution kernel
* @param out_channels The number of output channels of convolution kernel
* @param stride_x The step length of the convolution window in x(width) direction
* @param stride_y The step length of the convolution window in y(height) direction
* @param pad One of `"VALID"` or `"SAME"`, 0 is "VALID" and the other is "SAME"
* @param out The result of CNN layer, out->h=1
* @return The result of CNN layer
*/
dl_matrix2d_t *dl_basic_conv_layer_quantised_weight(const dl_matrix2d_t *in, const dl_matrix2dq_t *weight, const dl_matrix2d_t *bias, int filter_width, int filter_height,
const int out_channels, const int stride_x, const int stride_y, padding_state pad, const dl_matrix2d_t* out);
#endif

View File

@@ -0,0 +1,72 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_COEFGETTER_IF_H
#define DL_LIB_COEFGETTER_IF_H
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "cJSON.h"
//Set this if the coefficient requested is a batch-normalization popvar matrix which needs to be preprocessed by
//dl_batch_normalize_get_sqrtvar first.
#define COEF_GETTER_HINT_BNVAR (1<<0)
/*
This struct describes the basic information of model data:
word_num: the number of wake words or speech commands
word_list: the name list of wake words or speech commands
thres_list: the threshold list of wake words or speech commands
info_str: the string used to reflect the version and information of model data
which consist of the architecture of network, the version of model data, wake words and their threshold
*/
typedef struct {
int word_num;
char **word_list;
int *win_list;
float *thresh_list;
char *info_str;
} model_info_t;
/*
Alphabet struct describes the basic grapheme or phoneme.
item_num: the number of baisc item(grapheme or phonemr)
items: the list of basic item
*/
typedef struct {
int item_num;
char **items;
}alphabet_t;
/*
This struct describes a generic coefficient getter: a way to get the constant coefficients needed for a neural network.
For the two getters, the name describes the name of the coefficient matrix, usually the same as the Numpy filename the
coefficient was originally stored in. The arg argument can be used to optionally pass an additional user-defined argument
to the getter (e.g. the directory to look for files in the case of the Numpy file loader getter). The hint argument
is a bitwise OR of the COEF_GETTER_HINT_* flags or 0 when none is needed. Use the free_f/free_q functions to release the
memory for the returned matrices, when applicable.
*/
typedef struct {
const dl_matrix2d_t* (*getter_f)(const char *name, void *arg, int hint);
const dl_matrix2dq_t* (*getter_q)(const char *name, void *arg, int hint);
const dl_matrix2dq8_t* (*getter_q8)(const char *name, void *arg, int hint);
void (*free_f)(const dl_matrix2d_t *m);
void (*free_q)(const dl_matrix2dq_t *m);
void (*free_q8)(const dl_matrix2dq8_t *m);
const model_info_t* (*getter_info)(void *arg);
const alphabet_t* (*getter_alphabet)(void *arg);
const cJSON* (*getter_config)(void *arg);
} model_coeff_getter_t;
#endif

View File

@@ -0,0 +1,164 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONV_QUEUE_H
#define DL_LIB_CONV_QUEUE_H
#include "dl_lib_matrix.h"
typedef float fptp_t;
//Flags for matrices
#define DL_MF_FOREIGNDATA (1<<0) /*< Matrix *item data actually points to another matrix and should not be freed */
//Float convolution FIFO queue.
typedef struct {
int n; /*< the length of queue */
int c; /*< the channel number of queue element*/
int front; /*< the front(top) position of queue */
int flag; /*< not used*/
fptp_t *item; /*< Pointer to item array */
} dl_conv_queue_t;
/**
* @brief Allocate a convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_conv_queue_t *dl_conv_queue_alloc(int n, int c);
/**
* @brief Free a convolution queue
*
* @param cq The convolution queue to free
*/
void dl_conv_queue_free(dl_conv_queue_t *cq);
void dl_conv_to_matrix2d(dl_conv_queue_t *cq, dl_matrix2d_t* out);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input convolution queue
* @return Pointer of oldest element
*/
fptp_t *dl_conv_queue_pop(dl_conv_queue_t *cq);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input convolution queue
* @param item The new element
*/
void dl_conv_queue_push(dl_conv_queue_t *cq, fptp_t* item);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_get_queue_item(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a sigmoid operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a sigmoid operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_sigmoid_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a tanh operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_tanh_step(dl_conv_queue_t *cq, int offset);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a softmax operation
* by this pointer, then return the pointer
*
* @param cq Input convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
fptp_t *dl_softmax_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_step(dl_conv_queue_t *cq, int offset);
fptp_t *dl_relu_look(dl_matrix2d_t *cq, int offset);
dl_matrix2d_t *dl_matrix_concat1(const dl_conv_queue_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_basic_lstm_layer1(const dl_conv_queue_t *in, dl_matrix2d_t *state_c, dl_matrix2d_t *state_h,
const dl_matrix2d_t *weight, const dl_matrix2d_t *bias);
/**
* @brief Fast implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @return The result of atrous convolution
*/
fptp_t *dl_atrous_conv1d_step(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
fptp_t *dl_look_conv_step(dl_conv_queue_t *in, dl_matrix2d_t *out, int rate, int size,
dl_matrix2d_t* kernel, dl_matrix2d_t* bias);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is first element of output queue and should not be freed separately.
*
* @param in Input convolution queue
* @param out Output convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @return The result of dilation layer
*/
fptp_t *dl_dilation_layer(dl_conv_queue_t *in, dl_conv_queue_t *out, int rate, int size,
dl_matrix2d_t* filter_kernel, dl_matrix2d_t* filter_bias,
dl_matrix2d_t* gate_kernel, dl_matrix2d_t* gate_bias);
void test_atrous_conv(int size, int rate, int in_channel, int out_channel);
#endif

View File

@@ -0,0 +1,273 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ8_QUEUE_H
#define DL_LIB_CONVQ8_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib_convq_queue.h"
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the channel of queue */
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
q8tp_t *itemq; /*< Pointer to item array */
} dl_convq8_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param c The channel of queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t *dl_convq8_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_free(dl_convq8_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq8_queue_bzero(dl_convq8_queue_t *cqm);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq8_queue_push_by_qmf(dl_convq8_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8(dl_convq8_queue_t *cq, int offset);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of queue
* @return Pointer of the element
*/
q8tp_t *dl_get_queue_itemq8_mc(dl_convq8_queue_t *cq, int offset, int ch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel Kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of atrous convolution
*/
void dl_atrous_conv1dq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
* @return The result of dilation layer
*/
void dl_dilation_layerq8_steps(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
dl_conv_queue_t *dl_convq8_queue_add(dl_convq8_queue_t *cq1, dl_convq8_queue_t *cq2);
int8_t dl_sigmoid_lutq8(int in);
/**
* @brief Allocate a 8-bit fixed-point Multi-Channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch  The channel number
* @return The convolution queue, or NULL if out of memory
*/
dl_convq8_queue_t **dl_convq8_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a 8-bit fixed-point Multi-Channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number
*/
void dl_convq8_queue_mc_free(dl_convq8_queue_t **cqm, int nch);
/**
* @brief Tanh activation function for 8-bit fixed-point Multi-Channel convolution queue input
*
* @param cqm Input 8-bit fixed-point Multi-Channel convolution queue
* @param offset Offset used to calculate the beginning of input conv queue
* @param nch The channel number
*/
void dl_tanh_convq8_mc(dl_convq8_queue_t **cqm, int offset, int nch);
/**
* @brief Fast and quantised 16-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* Usually, this layer is used as first layer for 8-bit network.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* Input is a 16-bit queue point, Output is an 8-bit queue point.
*
* @param in Input 16bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_16in_mc_steps(dl_convq_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int out_exponent, int offset, int prenum);
/**
* @brief Fast and quantised 8-bit implement for Multi-channel 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8bit fixed-point convolution queue array
* @param out Output 8bit fixed-point convolution queue array
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param out_exponent Exponent of output
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_atrous_conv1dq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out,
int nch, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq8_t* bias,
int out_exponent, int offset, int prenum);
/**
* @brief Fast implement of 8-bit dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input 8-bit fixed-point convolution queue
* @param out Output 8-bit fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param offset Offset used to calculate the beginning of input conv queue
* @param prenum The num to control the parameter size of preload operation
*/
void dl_dilation_layerq8_mc_steps(dl_convq8_queue_t **in, dl_convq8_queue_t **out, int nch, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq8_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq8_t* gate_bias,
int offset, int prenum);
void dl_convq8_queue_mc_bzero(dl_convq8_queue_t **cqm, int nch);
dl_convq8_queue_t *dl_convq8_queue_alloc_from_psram(int n, int c);
qtp_t *dl_dilation_layerq16_8(dl_convq_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_dilation_layerq8(dl_convq8_queue_t *in, dl_convq8_queue_t *out, int rate, int size,
dl_matrix2dq8_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq8_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
dl_matrix2dq8_t *dl_convq8_lstm_layer(const dl_convq8_queue_t *in, dl_convq8_queue_t *out, dl_matrix2dq8_t *state_c,
dl_matrix2dq8_t *state_h, const dl_matrix2dq8_t *in_weight, const dl_matrix2dq8_t *h_weight,
const dl_matrix2dq_t *bias, int prenum);
qtp_t *dl_atrous_conv1dq8_16_s3(dl_convq8_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq8_t* kernel, dl_matrix2dq_t* bias, int prenum);
void print_convq8(dl_convq8_queue_t *cq, int offset);
void print_convq(dl_convq_queue_t *cq, int offset);
void lstmq8_free(void);
#endif

View File

@@ -0,0 +1,375 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_CONVQ_QUEUE_H
#define DL_LIB_CONVQ_QUEUE_H
#include "dl_lib_matrixq.h"
#include "dl_lib_conv_queue.h"
#include "dl_lib.h"
//fixed-point convolution FIFO queue.
//[nch, n, c]
typedef struct {
int n; /*< the length of queue */
int c; /*< the number of queue element*/
int front; /*< the front(top) position of queue */
int nch; /*< the multiple of queue*/
int exponent; /*< The values in items should be multiplied by pow(2,exponent)
to get the real values */
qtp_t *itemq; /*< Pointer to item array */
} dl_convq_queue_t;
/**
* @brief Allocate a fixed-point convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc(int n, int c);
/**
* @brief Allocate a fixed-point convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_from_psram(int n, int c);
/**
* @brief Allocate a fixed-point multi-channel convolution queue
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc(int n, int c, int nch);
/**
* @brief Allocate a fixed-point multi-channel convolution queue from PSRAM
*
* @param n The length of queue
* @param c The number of elements in the queue
* @param nch The channel of conv queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t *dl_convq_queue_alloc_mc_from_psram(int n, int c, int nch);
void dl_convq_to_matrix2dq(dl_convq_queue_t *cq, dl_matrix2dq_t* out, int row);
/**
* @brief Free a fixed-point convolution queue
*
* @param cq The fixed-point convolution queue to free
*/
void dl_convq_queue_free(dl_convq_queue_t *cq);
/**
* @brief Set itemq of convolution queue to 0
*
* @param cq The fixed-point convolution queue point
*/
void dl_convq_queue_bzero(dl_convq_queue_t *cq);
/**
* @brief Move the front pointer of queue forward,
the First(oldest) element become the last(newest) element,
*
* @param cq Input fixed-point convolution queue
* @return Pointer of oldest element
*/
inline qtp_t *dl_convq_queue_pop(dl_convq_queue_t *cq);
inline qtp_t *dl_convq_queue_popn(dl_convq_queue_t *cq, int n);
/**
* @brief Remove the oldest element, then insert the input element at the end of queue
*
* @param cq Input fixed-point convolution queue
* @param item The new element
*/
void dl_convq_queue_push(dl_convq_queue_t *cq, dl_matrix2dq_t *a, int shift);
/**
* @brief Insert the float-point element at the end of queue.
* The precision of fixed-point numbers is described by the Qm.f notation,
*
* @param cq Input fixed-point convolution queue
* @param item The float-point element
* @param m_bit The number of integer bits including the sign bits
* @param f_bit The number of fractional bits
*/
void dl_convq_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
void dl_convq16_queue_push_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit);
dl_conv_queue_t *dl_queue_from_convq(dl_convq_queue_t *cq1);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param last_num Offset from the front of the queue
* @return Pointer of the element
*/
inline qtp_t *dl_get_queue_itemq(dl_convq_queue_t *cq, int last_num);
/**
* @brief Get the pointer of element in the queue by offset
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param ch Channel index of convolution queue
* @return Pointer of the element
*/
qtp_t *dl_get_queue_itemq_mc(dl_convq_queue_t *cq, int offset, int ch);
/**
* @brief Does a tanh operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_tanh_convq(dl_convq_queue_t *cq, int offset);
/**
* @brief Does a tanh operation on the one of element in multi channel convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* tanh operation by this pointer, then return the pointer
*
* @param cq Input fixed-point multi channnel convolution queue
* @param offset Offset from the front of the queue
* @param nch The channel number of cqm
* @return Pointer of the element
*/
void dl_tanh_convq_mc(dl_convq_queue_t **cqm, int offset, int nch);
/**
* @brief Does a relu operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, and does a
* relu operation by this pointer, then return the pointer
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @return Pointer of the element
*/
void dl_relu_convq(dl_convq_queue_t *cq, fptp_t clip, int last_num);
/**
* @brief Does a softmax operation on the one of element in the convolution queue.
* Gets the pointer of element in the convolution queue by offset, input data
stay as it is. Results are saved into the *out* array.
*
* @param cq Input fixed-point convolution queue
* @param offset Offset from the front of the queue
* @param out Old array to re-use. Passing NULL will allocate a new matrix.
* @return softmax results
*/
fptp_t * dl_softmax_step_q(dl_convq_queue_t *cq, int offset, fptp_t *out);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @return The result of atrous convolution
*/
qtp_t * dl_atrous_conv1dq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int prenum);
/**
* @brief Fast implement of dilation layer as follows
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int offset, int prenum);
qtp_t *dl_dilation_layerq(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias,
int filter_shift, int gate_shift, int prenum);
qtp_t *dl_dilation_layerq16(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* filter_kernel, dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel, dl_matrix2dq_t* gate_bias, int prenum);
qtp_t *dl_atrous_conv1dq_steps(dl_convq_queue_t *in, dl_convq_queue_t *out, int rate, int size,
dl_matrix2dq_t* kernel, dl_matrix2dq_t* bias, int shift, int offset, int prenum);
/**
* @brief Add a pair of fixed-point convolution queue item-by-item, and return float-point convolution queue
*
* @param cq1 First fixed-point convolution queue
* @param cq2 Seconf fixed-point convolution queue
* @return The result of float-point convolution queue
*/
dl_conv_queue_t *dl_convq_queue_add(dl_convq_queue_t *cq1, dl_convq_queue_t *cq2);
/**
* @brief Fast implement of LSTM layer by dl_atrous_conv1dq function
*
* @Warning LSTM kernel is split into two part, the first part input is the last layer output,
* and kernel is parameter *in_weight*. The second part input is the last frame LSTM output,
* the kernel is parameters *h_weight*.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param state_c Internal state of the LSTM network
* @param state_h Internal state (previous output values) of the LSTM network
* @param in_weight the LSTM kernel needed by first part
* @param h_weight the LSTM kernel needed by second part
* @param bias The bias matrix of LSTM. Can be NULL if a bias of 0 is required.
* @in_shift Shift ratio used in first part
* @h_shift Shift ratio used in second part
* @return The result of LSTM layer
*/
dl_matrix2dq_t *dl_convq_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
const dl_matrix2dq_t *bias, int in_shift, int h_shift, int prenum);
dl_matrix2dq_t *dl_basic_lstm_layer1_q(const dl_convq_queue_t *in, dl_matrix2dq_t *state_c, dl_matrix2dq_t *state_h,
const dl_matrix2dq_t *weight, const dl_matrix2dq_t *bias, int step, int shift);
dl_matrix2dq_t *dl_convq16_lstm_layer(const dl_convq_queue_t *in, dl_convq_queue_t *out, dl_matrix2dq_t *state_c,
dl_matrix2dq_t *state_h, const dl_matrix2dq_t *in_weight, const dl_matrix2dq_t *h_weight,
const dl_matrix2dq_t *bias, int prenum);
/**
* @brief Allocate a fixed-point multi channel convolution queue
*
* @param n The length of queue
* @param c The channel number of elements in the queue
* @param nch the channel numbet of convolution queue
* @return The convolution queue, or NULL if out of memory
*/
dl_convq_queue_t **dl_convq_queue_mc_alloc(int n, int c, int nch);
/**
* @brief Free a fixed-point multi channel convolution queue
*
* @param cqm The fixed-point convolution queue to free
* @param nch The channel number of cqm
*/
void dl_convq_queue_mc_free(dl_convq_queue_t **cqm, int nch);
/**
* @brief Fast and quantised implement for 1D atrous convolution (a.k.a. convolution with holes or dilated convolution)
* based on convolution queue.
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param kernel The kernel matrix of filter
* @param bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param shift Shift ratio used in dot operation between two 16-bit fixed point vector
* @param offset the offset to calculate input convq
* @param prenum the preload size, 0: do not use preload function
* @return The result of atrous convolution
*/
qtp_t *dl_atrous_conv1dq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* kernel,
dl_matrix2dq_t* bias,
int shift,
int offset,
int prenum);
/**
* @brief Fast implement of dilation layer as follows for multi channel input
*
* |-> [gate(sigmoid)] -|
* input - | |-> (*) - output
* |-> [filter(tanh)] -|
*
* @Warning All input and output convolution queue and matrix should be allocated. The return pointer
* is last element of output queue and should not be freed separately.
*
* @param in Input fixed-point convolution queue
* @param out Output fixed-point convolution queue
* @param nch The channel number of input
* @param rate A positive int, the stride with which we sample input value
* @param size A positive int, the size of 1D-filter
* @param filter_kernel The kernel matrix of filter
* @param filter_bias The bias matrix of filter. Can be NULL if a bias of 0 is required.
* @param gate_kernel The kernel matrix of gate
* @param gate_bias The bias matrix of gate. Can be NULL if a bias of 0 is required.
* @param filter_shift Shift ratio used in filter operation between two 16-bit fixed point vector
* @param gate_shift Shift ratio used in gate operation between two 16-bit fixed point vector
* @param offset The offset to calculate input convq
* @param prenum The preload size, 0: do not use preload function
* @return The result of dilation layer
*/
qtp_t *dl_dilation_layerq_mc_steps( dl_convq_queue_t **in,
dl_convq_queue_t **out,
int nch,
int rate,
int size,
dl_matrix2dq_t* filter_kernel,
dl_matrix2dq_t* filter_bias,
dl_matrix2dq_t* gate_kernel,
dl_matrix2dq_t* gate_bias,
int filter_shift,
int gate_shift,
int offset,
int prenum);
void test_atrous_convq(int size, int rate, int in_channel, int out_channel);
void test_lstm_convq(int size, int in_dim, int lstm_cell);
void dl_nn_tanh_i162(dl_convq_queue_t **cqm, int offset, int nch);
void dl_copy_queue_item_by_qmf(dl_convq_queue_t *cq, fptp_t* item, int m_bit, int f_bit, int offset, int ch);
void dl_convq_queue_mc_bzero(dl_convq_queue_t **cqm, int nch);
#endif

View File

@@ -0,0 +1,252 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIX_H
#define DL_LIB_MATRIX_H
#ifdef ESP_PLATFORM
#include "freertos/FreeRTOS.h"
#include "freertos/task.h"
#include "freertos/queue.h"
#include "esp_system.h"
#endif
// #ifdef CONFIG_IDF_TARGET_ESP32S3
// #include "dl_tie728_bzero.h"
// #endif
typedef float fptp_t;
#if CONFIG_BT_SHARE_MEM_REUSE
extern multi_heap_handle_t gst_heap;
#endif
//Flags for matrices
#define DL_MF_FOREIGNDATA (1<<0) /*< Matrix *item data actually points to another matrix and should not be freed */
//'Normal' float matrix
typedef struct {
int w; /*< Width */
int h; /*< Height */
int stride; /*< Row stride, essentially how many items to skip to get to the same position in the next row */
int flags; /*< Flags. OR of DL_MF_* values */
fptp_t *item; /*< Pointer to item array */
} dl_matrix2d_t;
//Macro to quickly access the raw items in a matrix
#define DL_ITM(m, x, y) m->item[(x)+(y)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_alloc(int w, int h);
/**
* @brief Free a matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrix_free(dl_matrix2d_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrix_zero(dl_matrix2d_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to zero
*/
dl_matrix2d_t *dl_matrix_copy_to_psram(const dl_matrix2d_t *m);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_slice(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2d_t *dl_matrix_flatten(const dl_matrix2d_t *src, int x, int y, int w, int h, dl_matrix2d_t *in);
/**
* @brief Generate a matrix from existing floating-point data
*
* @param w Width of resulting matrix
* @param h Height of resulting matrix
* @param data Data to populate matrix with
* @return A newaly allocated matrix populated with the given input data, or NULL if out of memory.
*/
dl_matrix2d_t *dl_matrix_from_data(int w, int h, int stride, const void *data);
/**
* @brief Multiply a pair of matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_mul(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two matrices : res=a.b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_dot(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *res);
/**
* @brief Add a pair of matrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_add(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Divide a pair of matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_div(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Subtract a matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
*/
void dl_matrix_sub(const dl_matrix2d_t *a, const dl_matrix2d_t *b, dl_matrix2d_t *out);
/**
* @brief Add a constant to every item of the matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrix_add_const(dl_matrix2d_t *subj, const fptp_t add);
/**
* @brief Concatenate the rows of two matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated array with as avlues a|b
*/
dl_matrix2d_t *dl_matrix_concat(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
dl_matrix2d_t *dl_matrix_concat_h( dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Print the contents of a matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrix(const dl_matrix2d_t *a);
/**
* @brief Return the average square error given a correct and a test matrix.
*
* ...Well, more or less. If anything, it gives an indication of the error between
* the two. Check the code for the exact implementation.
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return value indicating the relative difference between matrices
*/
float dl_matrix_get_avg_sq_err(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Check if two matrices have the same shape, that is, the same amount of rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrix_same_shape(const dl_matrix2d_t *a, const dl_matrix2d_t *b);
/**
* @brief Get a specific item from the matrix
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
inline static fptp_t dl_matrix_get(const dl_matrix2d_t *m, const int x, const int y) {
return DL_ITM(m, x, y);
}
/**
* @brief Set a specific item in the matrix to the given value
*
* Please use these for external matrix access instead of DL_ITM
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
inline static void dl_matrix_set(dl_matrix2d_t *m, const int x, const int y, fptp_t val) {
DL_ITM(m, x, y)=val;
}
void matrix_get_range(const dl_matrix2d_t *m, fptp_t *rmin, fptp_t *rmax);
#endif

View File

@@ -0,0 +1,378 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ_H
#define DL_LIB_MATRIXQ_H
#include <stdint.h>
#include "dl_lib_matrix.h"
typedef int16_t qtp_t;
//Quantized matrix. Uses fixed numbers and has the storage for the rows/columns inverted
//for easy use as a multiplicand without stressing out the flash cache too much.
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
qtp_t *itemq;
} dl_matrix2dq_t;
#define DL_QTP_SHIFT 15
#define DL_QTP_RANGE ((1<<DL_QTP_SHIFT)-1)
#define DL_ITMQ(m, x, y) m->itemq[(y)+(x)*m->stride]
#define DL_QTP_EXP_NA 255 //non-applicable exponent because matrix is null
#define DL_SHIFT_AUTO 32
/**
* @info About quantized matrices and shift values
*
* Grab a coffee (or tea, or hot water) and sit down when you read this for the first
* time. Quantized matrices can speed up your operations, but come with some quirks, and
* it's good to understand how they work before using them.
*
* The data in the quantized matrix type is stored similarily to floating-point types:
* when storing a real value, the value is stored as a mantissa (base number) and an
* exponent. The 'real' value that can be re-derived from those two numbers is something
* similar to mantissa*2^exponent. Up to this point, there's not that much difference from
* the standard floating point implementations like e.g. IEEE-754.
*
* The difference with respect to quantized matrices is that for a quantized matrix, it is
* assumed all values stored have more-or-less the same order of magnitude. This allows the
* matrix to only store all the mantissas, while the exponents are shared; there is only one
* exponent for the entire matrix. This makes it quicker to handle matrix operations - the
* logic to fix the exponents only needs to happen once, while the rest can be done in simple
* integer arithmetic. It also nets us some memory savings - while normally a floating point
* number is 32-bit, storing only 16-bit mantissas as the matrix items almost halves the
* memory requirements.
*
* While most of the details of handling the intricacies of the quantized matrixes are done
* transparently by the code in dl_lib_matrixq.c, some implementation details leak out,
* specifically in places where addition/subtraction/division happens.
*
* The problem is that the routines do not know what the size of the resulting operation is. For
* instance, when adding two matrices of numbers, the resulting numbers *could* be large enough
* to overflow the mantissa of the result if the exponent is the same. However, if by default we
* assume the mantissas needs to be scaled back, we may lose precision.
*
* In order to counter this, all operations that have this issue have a ``shift`` argument. If
* the argument is zero, the routine will be conservative, that is, increase the exponent of
* the result to such an extent it's mathematically impossible a value in the result will exceed
* the maximum value that can be stored. However, when this argument is larger than zero, the
* algorithm will hold back on this scaling by the indicated amount of bits, preserving precision
* but increasing the chance of some of the calculated values not fitting in the mantissa anymore.
* If this happens, the value will be clipped to the largest (or, for negative values, smallest)
* value possible. (Neural networks usually are okay with this happening for a limited amount
* of matrix indices).
*
* For deciding on these shift values, it is recommended to start with a shift value of one, then
* use dl_matrixq_check_sanity on the result. If this indicates clipping, lower the shift value.
* If it indicates bits are under-used, increase it. Note that for adding and subtraction, only
* shift values of 0 or 1 make sense; these routines will error out if you try to do something
* else.
*
* For neural networks and other noise-tolerant applications, note that even when
* dl_matrixq_check_sanity does not indicate any problems, twiddling with the shift value may lead
* to slightly improved precision. Feel free to experiment.
**/
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_alloc(int w, int h);
dl_matrix2dq_t *dl_matrixq_alloc_psram(int w, int h);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq_t *out);
/**
* TODO: DESCRIBE THIS FUNCTION
*/
dl_matrix2dq_t *dl_matrixq_from_matrix2d_by_qmf(const dl_matrix2d_t *m, dl_matrix2dq_t *out, int m_bit, int f_bit);
/**
* @brief Convert a quantized matrix to a floating-point one.
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
**/
dl_matrix2d_t *dl_matrix2d_from_matrixq(const dl_matrix2dq_t *m, dl_matrix2d_t *out);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq_free(dl_matrix2dq_t *m);
/**
* @brief Zero out the matrix
* Sets all entries in the matrix to 0.
*
* @param m Matrix to zero
*/
void dl_matrixq_zero(dl_matrix2dq_t *m);
/**
* @brief Copy the matrix into psram
* Copy the matrix from flash or iram/psram into psram
*
* @param m Matrix to copy
*/
dl_matrix2dq_t *dl_matrixq_copy_to_psram(const dl_matrix2dq_t *m);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b, Result is a fixed-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices: res=a.b, Result is a floating-point matrix.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a fixed-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot calls; this function can be
* much slower than dl_matrixq_dot .
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
* @param shift Shift ratio
*/
void dl_matrixq_dot_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Do a dotproduct of two quantized matrices : res=a.b. This always uses the simple & stupid C algo for the dot product.
*
* Result is a floating-point matrix.
*
* Use this only if you expect something is wrong with the accelerated routines that dl_matrixq_dot_matrix_out calls; this function can be
* much slower than dl_matrixq_dot_matrix_out.
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Dotproduct data. *Must* be a *different* matrix from a or b!
*/
void dl_matrixq_dot_matrix_out_c_impl(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Do a dotproduct of a floating point and a quantized matrix. Result is a floating-point matrix.
*
* @param a First multiplicand; float matrix
* @param b Second multiplicand; quantized matrix
* @param res Dotproduct data; float matrix. *Must* be a *different* matrix from a or b!
*/
void dl_matrix_matrixq_dot(const dl_matrix2d_t *a, const dl_matrix2dq_t *b, dl_matrix2d_t *res);
/**
* @brief Print the contents of a quantized matrix to stdout. Used for debugging.
*
* @param a The matrix to print.
*/
void dl_printmatrixq(const dl_matrix2dq_t *a);
/**
* @brief Add a pair of quantizedmatrices item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Added data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_add(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Generate a new matrix using a range of items from an existing matrix.
* When using this, the data of the new matrix is not allocated/copied but it re-uses a pointer
* to the existing data. Changing the data in the resulting matrix, as a result, will also change
* the data in the existing matrix that has been sliced.
*
* @Warning In contrast to the floating point equivalent of this function, the fixed-point version
* of this has the issue that as soon as the output exponent of one of the slices changes, the data
* in the sliced matrix gets corrupted (because the exponent of that matrix is still the same.) If you
* use this function, either treat the slices as read-only, or assume the sliced matrix contains
* garbage after modifying the data in one of the slices.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix (with foreign data) to re-use. Passing NULL will allocate a new matrix.
* @return The resulting slice matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_slice(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief select a range of items from an existing matrix and flatten them into one dimension.
*
* @Warning The results are flattened in row-major order.
*
* @param x X-offset of the origin of the returned matrix within the sliced matrix
* @param y Y-offset of the origin of the returned matrix within the sliced matrix
* @param w Width of the resulting matrix
* @param h Height of the resulting matrix
* @param in Old matrix to re-use. Passing NULL will allocate a new matrix.
* @return The resulting flatten matrix, or NULL if out of memory
*/
dl_matrix2dq_t *dl_matrixq_flatten(const dl_matrix2dq_t *src, int x, int y, int w, int h, dl_matrix2dq_t *in);
/**
* @brief Subtract a quantized matrix from another, item-by-item: res=a-b
*
* @param a First matrix
* @param b Second matrix
* @param res Subtracted data. Can be equal to a or b to overwrite that.
* @param shift Shift value. Only 0 or 1 makes sense here. <ToDo: check>
*/
void dl_matrixq_sub(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *res, int shift);
/**
* @brief Multiply a pair of quantized matrices item-by-item: res=a*b
*
* @param a First multiplicand
* @param b Second multiplicand
* @param res Multiplicated data. Can be equal to a or b to overwrite that matrix.
*/
void dl_matrixq_mul( dl_matrix2dq_t *a, dl_matrix2dq_t *b, dl_matrix2dq_t *res);
/**
* @brief Divide a pair of quantized matrices item-by-item: res=a/b
*
* @param a First matrix
* @param b Second matrix
* @param res Divided data. Can be equal to a or b to overwrite that.
*/
void dl_matrixq_div(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b, dl_matrix2dq_t *out, int shift);
/**
* @brief Check if two quantized matrices have the same shape, that is, the same amount of
* rows and columns
*
* @param a First of the two matrices to compare
* @param b Second of the two matrices to compare
* @return true if the two matrices are shaped the same, false otherwise.
*/
int dl_matrixq_same_shape(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Concatenate the rows of two quantized matrices into a new matrix
*
* @param a First matrix
* @param b Second matrix
* @return A newly allocated quantized matrix with as values a|b
*/
dl_matrix2dq_t *dl_matrixq_concat(const dl_matrix2dq_t *a, const dl_matrix2dq_t *b);
/**
* @brief Add a constant to every item of the quantized matrix
*
* @param subj Matrix to add the constant to
* @param add The constant
*/
void dl_matrixq_add_const(dl_matrix2dq_t *subj, const fptp_t add, int shift);
/**
* @brief Check the sanity of a quantized matrix
*
* Due to the nature of quantized matrices, depending on the calculations a quantized
* matrix is the result of and the shift values chosen in those calculations, a quantized
* matrix may have an exponent and mantissas that lead to a loss of precision, either because
* most significant mantissa bits are unused, or because a fair amount of mantissas are
* clipped. This function checks if this is the case and will report a message to stdout
* if significant loss of precision is detected.
*
* @param m The quantized matrix to check
* @param name A string to be displayed in the message if the sanity check fails
* @return True if matrix is sane, false otherwise
**/
int dl_matrixq_check_sanity(dl_matrix2dq_t *m, const char *name);
/**
* @brief re-adjust the exponent of the matrix to fit the mantissa better
*
* This function will shift up all the data in the mantissas so there are no
* most-significant bits that are unused in all mantissas. It will also adjust
* the exponent to keep the actua values in the matrix the same.
*
* Some operations done on a matrix, especially operations that re-use the
* result of earlier operations done in the same way, can lead to the loss of
* data because the exponent of the quantized matrix is never re-adjusted. You
* can do that implicitely by calling this function.
*
* @param m The matrix to re-adjust
**/
void dl_matrixq_readjust_exp(dl_matrix2dq_t *m);
/**
* @brief Get the floating-point value of a specific item from the quantized matrix
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @return Value in that position
*/
fptp_t dl_matrixq_get(const dl_matrix2dq_t *m, const int x, const int y);
/**
* @brief Set a specific item in the quantized matrix to the given
* floating-point value
*
* @warning If the given value is more than the exponent in the quantized matrix
* allows for, all mantissas in the matrix will be shifted down to make the value
* 'fit'. If, however, the exponent is such that the value would result in a
* quantized mantissa of 0, nothing is done.
*
* @param m Matrix to access
* @param x Column address
* @param y Row address
* @param val Value to write to that position
*/
void dl_matrixq_set(dl_matrix2dq_t *m, const int x, const int y, fptp_t val);
#endif

View File

@@ -0,0 +1,71 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DL_LIB_MATRIXQ8_H
#define DL_LIB_MATRIXQ8_H
#include <stdint.h>
#include "dl_lib_matrix.h"
#include "dl_lib.h"
#include "dl_lib_matrixq.h"
typedef int8_t q8tp_t;
typedef struct {
int w;
int h;
int stride; //Normally equals h, not w!
int flags;
int exponent; //The values in items should be multiplied by pow(2,exponent) to get the real values.
q8tp_t *itemq;
} dl_matrix2dq8_t;
#define DL_Q8TP_SHIFT 7
#define DL_Q8TP_RANGE ((1<<DL_Q8TP_SHIFT)-1)
#define DL_ITMQ8(m, x, y) m->itemq[(y)+(x)*m->stride]
/**
* @brief Allocate a matrix
*
* @param w Width of the matrix
* @param h Height of the matrix
* @return The matrix, or NULL if out of memory
*/
dl_matrix2dq8_t *dl_matrixq8_alloc(int w, int h);
/**
* @brief Free a quantized matrix
* Frees the matrix structure and (if it doesn't have the DL_MF_FOREIGNDATA flag set) the m->items space as well.
*
* @param m Matrix to free
*/
void dl_matrixq8_free(dl_matrix2dq8_t *m);
/**
* @brief Copy a quantized matrix
* Copy a quantized matrix from flash or iram/psram
*
* @param m Matrix to copy
*/
dl_matrix2dq8_t *dl_matrixq8_copy_to_psram(const dl_matrix2dq8_t *m);
/**
* @brief Convert a floating-point matrix to a quantized matrix
*
* @param m Floating-point matrix to convert
* @param out Quantized matrix to re-use. If NULL, allocate a new one.
* @Return The quantized version of the floating-point matrix
*/
dl_matrix2dq8_t *dl_matrixq8_from_matrix2d(const dl_matrix2d_t *m, dl_matrix2dq8_t *out);
#endif

View File

@@ -0,0 +1,112 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AEC_H_
#define _ESP_AEC_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define USE_AEC_FFT // Not kiss_fft
#define AEC_USE_SPIRAM 0
#define AEC_SAMPLE_RATE 16000 // Only Support 16000Hz
#define AEC_FRAME_LENGTH_MS 16
#define AEC_FILTER_LENGTH 1200 // Number of samples of echo to cancel
typedef void* aec_handle_t;
/**
* @brief Creates an instance to the AEC structure.
*
* @deprecated This API will be deprecated after version 1.0, please use aec_pro_create
*
* @param sample_rate The Sampling frequency (Hz) must be 16000.
*
* @param frame_length The length of the audio processing must be 16ms.
*
* @param filter_length Number of samples of echo to cancel.
*
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t aec_create(int sample_rate, int frame_length, int filter_length);
/**
* @brief Creates an instance to the AEC structure.
*
* @deprecated This API will be deprecated after version 1.0, please use aec_pro_create
*
* @param sample_rate The Sampling frequency (Hz) must be 16000.
*
* @param frame_length The length of the audio processing must be 16ms.
*
* @param filter_length Number of samples of echo to cancel.
*
* @param nch Number of input signal channel.
*
* @return
* - NULL: Create failed
* - Others: The instance of AEC
*/
aec_handle_t aec_create_multimic(int sample_rate, int frame_length, int filter_length, int nch);
/**
* @brief Creates an instance of more powerful AEC.
*
* @param frame_length Length of input signal. Must be 16ms if mode is 0; otherwise could be 16ms or 32ms. Length of input signal to aec_process must be modified accordingly.
*
* @param nch Number of microphones.
*
* @param mode Mode of AEC (0 to 5), indicating aggressiveness and RAM allocation. 0: mild; 1 or 2: medium (1: internal RAM, 2: SPIRAM); 3 and 4: aggressive (3: internal RAM, 4: SPIRAM); 5: agressive, accelerated for ESP32-S3.
*
* @return
* - NULL: Create failed
* - Others: An Instance of AEC
*/
aec_handle_t aec_pro_create(int frame_length, int nch, int mode);
/**
* @brief Performs echo cancellation a frame, based on the audio sent to the speaker and frame from mic.
*
* @param inst The instance of AEC.
*
* @param indata An array of 16-bit signed audio samples from mic.
*
* @param refdata An array of 16-bit signed audio samples sent to the speaker.
*
* @param outdata Returns near-end signal with echo removed.
*
* @return None
*
*/
void aec_process(const aec_handle_t inst, int16_t *indata, int16_t *refdata, int16_t *outdata);
/**
* @brief Free the AEC instance
*
* @param inst The instance of AEC.
*
* @return None
*
*/
void aec_destroy(aec_handle_t inst);
#ifdef __cplusplus
}
#endif
#endif //_ESP_AEC_H_

View File

@@ -0,0 +1,104 @@
#pragma once
#include "stdint.h"
#include "esp_wn_iface.h"
#include "esp_wn_models.h"
#include "esp_vad.h"
//AFE: Audio Front-End
//SR: Speech Recognition
//afe_sr/AFE_SR: the audio front-end for speech recognition
//Set AFE_SR mode
typedef enum {
SR_MODE_LOW_COST = 0,
SR_MODE_HIGH_PERF = 1
} afe_sr_mode_t;
typedef enum {
AFE_MEMORY_ALLOC_MORE_INTERNAL = 1, // malloc with more internal ram
AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE = 2, // malloc with internal ram and psram in balance
AFE_MEMORY_ALLOC_MORE_PSRAM = 3 // malloc with more psram
} afe_memory_alloc_mode_t;
typedef enum {
AFE_MN_PEAK_AGC_MODE_1 = -5, // The peak amplitude of audio fed to multinet is -5dB
AFE_MN_PEAK_AGC_MODE_2 = -4, // The peak amplitude of audio fed to multinet is -4dB
AFE_MN_PEAK_AGC_MODE_3 = -3, // The peak amplitude of audio fed to multinet is -3dB
AFE_MN_PEAK_NO_AGC = 0, // There is no agc gain
} afe_mn_peak_agc_mode_t;
typedef struct {
int total_ch_num; // total channel num. It must be: total_ch_num = mic_num + ref_num
int mic_num; // mic channel num
int ref_num; // reference channel num
int sample_rate; // sample rate of audio
} afe_pcm_config_t;
typedef struct {
bool aec_init;
bool se_init;
bool vad_init;
bool wakenet_init;
bool voice_communication_init;
bool voice_communication_agc_init; // AGC swich for voice communication
int voice_communication_agc_gain; // AGC gain(dB) for voice communication
vad_mode_t vad_mode; // The value can be: VAD_MODE_0, VAD_MODE_1, VAD_MODE_2, VAD_MODE_3, VAD_MODE_4
char *wakenet_model_name; // The model name of wakenet
det_mode_t wakenet_mode;
afe_sr_mode_t afe_mode;
int afe_perferred_core;
int afe_perferred_priority;
int afe_ringbuf_size;
afe_memory_alloc_mode_t memory_alloc_mode;
afe_mn_peak_agc_mode_t agc_mode; // The agc mode for ASR
afe_pcm_config_t pcm_config; // Config the channel num of original data which is fed to the afe feed function.
} afe_config_t;
#if CONFIG_IDF_TARGET_ESP32
#define AFE_CONFIG_DEFAULT() { \
.aec_init = true, \
.se_init = true, \
.vad_init = true, \
.wakenet_init = true, \
.voice_communication_init = false, \
.voice_communication_agc_init = false, \
.voice_communication_agc_gain = 15, \
.vad_mode = VAD_MODE_3, \
.wakenet_model_name = NULL, \
.wakenet_mode = DET_MODE_90, \
.afe_mode = SR_MODE_HIGH_PERF, \
.afe_perferred_core = 0, \
.afe_perferred_priority = 5, \
.afe_ringbuf_size = 50, \
.memory_alloc_mode = AFE_MEMORY_ALLOC_INTERNAL_PSRAM_BALANCE, \
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
.pcm_config.total_ch_num = 2, \
.pcm_config.mic_num = 1, \
.pcm_config.ref_num = 1, \
.pcm_config.sample_rate = 16000, \
}
#elif CONFIG_IDF_TARGET_ESP32S3
#define AFE_CONFIG_DEFAULT() { \
.aec_init = true, \
.se_init = true, \
.vad_init = true, \
.wakenet_init = true, \
.voice_communication_init = false, \
.voice_communication_agc_init = false, \
.voice_communication_agc_gain = 15, \
.vad_mode = VAD_MODE_3, \
.wakenet_model_name = NULL, \
.wakenet_mode = DET_MODE_2CH_90, \
.afe_mode = SR_MODE_LOW_COST, \
.afe_perferred_core = 0, \
.afe_perferred_priority = 5, \
.afe_ringbuf_size = 50, \
.memory_alloc_mode = AFE_MEMORY_ALLOC_MORE_PSRAM, \
.agc_mode = AFE_MN_PEAK_AGC_MODE_2, \
.pcm_config.total_ch_num = 3, \
.pcm_config.mic_num = 2, \
.pcm_config.ref_num = 1, \
.pcm_config.sample_rate = 16000, \
}
#endif

View File

@@ -0,0 +1,190 @@
#pragma once
#include "stdint.h"
#include "esp_afe_config.h"
//AFE: Audio Front-End
//SR: Speech Recognition
//afe_sr/AFE_SR: the audio front-end for speech recognition
//Opaque AFE_SR data container
typedef struct esp_afe_sr_data_t esp_afe_sr_data_t;
/**
* @brief The state of vad
*/
typedef enum
{
AFE_VAD_SILENCE = 0, // noise or silence
AFE_VAD_SPEECH // speech
} afe_vad_state_t;
/**
* @brief The result of fetch function
*/
typedef struct afe_fetch_result_t
{
int16_t *data; // the data of audio.
int data_size; // the size of data. The unit is byte.
int wakeup_state; // the value is afe_wakeup_state_t
int wake_word_index; // if the wake word is detected. It will store the wake word index which start from 1.
int vad_state; // the value is afe_vad_state_t
int trigger_channel_id; // the channel index of output
int wake_word_length; // the length of wake word. It's unit is the number of samples.
int ret_value; // the return state of fetch function
void* reserved; // reserved for future use
} afe_fetch_result_t;
/**
* @brief Function to initialze a AFE_SR instance
*
* @param afe_config The config of AFE_SR
* @returns Handle to the AFE_SR data
*/
typedef esp_afe_sr_data_t* (*esp_afe_sr_iface_op_create_from_config_t)(afe_config_t *afe_config);
/**
* @brief Get the amount of each channel samples per frame that need to be passed to the function
*
* Every speech enhancement AFE_SR processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param afe The AFE_SR object to query
* @return The amount of samples to feed the fetch function
*/
typedef int (*esp_afe_sr_iface_op_get_samp_chunksize_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the total channel number which be config
*
* @param afe The AFE_SR object to query
* @return The amount of total channels
*/
typedef int (*esp_afe_sr_iface_op_get_total_channel_num_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the mic channel number which be config
*
* @param afe The AFE_SR object to query
* @return The amount of mic channels
*/
typedef int (*esp_afe_sr_iface_op_get_channel_num_t)(esp_afe_sr_data_t *afe);
/**
* @brief Get the sample rate of the samples to feed to the function
*
* @param afe The AFE_SR object to query
* @return The sample rate, in hz
*/
typedef int (*esp_afe_sr_iface_op_get_samp_rate_t)(esp_afe_sr_data_t *afe);
/**
* @brief Feed samples of an audio stream to the AFE_SR
*
* @Warning The input data should be arranged in the format of channel interleaving.
* The last channel is reference signal if it has reference data.
*
* @param afe The AFE_SR object to query
*
* @param in The input microphone signal, only support signed 16-bit @ 16 KHZ. The frame size can be queried by the
* `get_feed_chunksize`.
* @return The size of input
*/
typedef int (*esp_afe_sr_iface_op_feed_t)(esp_afe_sr_data_t *afe, const int16_t* in);
/**
* @brief fetch enhanced samples of an audio stream from the AFE_SR
*
* @Warning The output is single channel data, no matter how many channels the input is.
*
* @param afe The AFE_SR object to query
* @return The result of output, please refer to the definition of `afe_fetch_result_t`. (The frame size of output audio can be queried by the `get_fetch_chunksize`.)
*/
typedef afe_fetch_result_t* (*esp_afe_sr_iface_op_fetch_t)(esp_afe_sr_data_t *afe);
/**
* @brief Initial wakenet and wake words coefficient, or reset wakenet and wake words coefficient
* when wakenet has been initialized.
*
* @param afe The AFE_SR object to query
* @param wakenet_word The wakenet word, should be DEFAULT_WAKE_WORD or EXTRA_WAKE_WORD
* @return 0: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_set_wakenet_t)(esp_afe_sr_data_t *afe, char* model_name);
/**
* @brief Disable wakenet model.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_disable_wakenet_t)(esp_afe_sr_data_t *afe);
/**
* @brief Enable wakenet model.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_enable_wakenet_t)(esp_afe_sr_data_t *afe);
/**
* @brief Disable AEC algorithm.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_disable_aec_t)(esp_afe_sr_data_t *afe);
/**
* @brief Enable AEC algorithm.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_enable_aec_t)(esp_afe_sr_data_t *afe);
/**
* @brief Disable SE algorithm.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_disable_se_t)(esp_afe_sr_data_t *afe);
/**
* @brief Enable SE algorithm.
*
* @param afe The AFE_SR object to query
* @return 0: fail, 1: success
*/
typedef int (*esp_afe_sr_iface_op_enable_se_t)(esp_afe_sr_data_t *afe);
/**
* @brief Destroy a AFE_SR instance
*
* @param afe AFE_SR object to destroy
*/
typedef void (*esp_afe_sr_iface_op_destroy_t)(esp_afe_sr_data_t *afe);
/**
* This structure contains the functions used to do operations on a AFE_SR.
*/
typedef struct {
esp_afe_sr_iface_op_create_from_config_t create_from_config;
esp_afe_sr_iface_op_feed_t feed;
esp_afe_sr_iface_op_fetch_t fetch;
esp_afe_sr_iface_op_get_samp_chunksize_t get_feed_chunksize;
esp_afe_sr_iface_op_get_samp_chunksize_t get_fetch_chunksize;
esp_afe_sr_iface_op_get_total_channel_num_t get_total_channel_num;
esp_afe_sr_iface_op_get_channel_num_t get_channel_num;
esp_afe_sr_iface_op_get_samp_rate_t get_samp_rate;
esp_afe_sr_iface_op_set_wakenet_t set_wakenet;
esp_afe_sr_iface_op_disable_wakenet_t disable_wakenet;
esp_afe_sr_iface_op_enable_wakenet_t enable_wakenet;
esp_afe_sr_iface_op_disable_aec_t disable_aec;
esp_afe_sr_iface_op_enable_aec_t enable_aec;
esp_afe_sr_iface_op_disable_se_t disable_se;
esp_afe_sr_iface_op_enable_se_t enable_se;
esp_afe_sr_iface_op_destroy_t destroy;
} esp_afe_sr_iface_t;

View File

@@ -0,0 +1,27 @@
#pragma once
#if defined CONFIG_USE_AFE
#include "esp_afe_sr_iface.h"
#if CONFIG_AFE_INTERFACE_V1
extern const esp_afe_sr_iface_t esp_afe_sr_v1;
extern const esp_afe_sr_iface_t esp_afe_vc_v1;
#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
#else
#error No valid afe selected.
#endif
#else
#include "esp_afe_sr_iface.h"
extern const esp_afe_sr_iface_t esp_afe_sr_v1;
extern const esp_afe_sr_iface_t esp_afe_vc_v1;
#define ESP_AFE_SR_HANDLE esp_afe_sr_v1
#define ESP_AFE_VC_HANDLE esp_afe_vc_v1
#endif

View File

@@ -0,0 +1,31 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_AGC_H_
#define _ESP_AGC_H_
////all positive value is valid, negective is error
typedef enum {
ESP_AGC_SUCCESS = 0, ////success
ESP_AGC_FAIL = -1, ////agc fail
ESP_AGC_SAMPLE_RATE_ERROR = -2, ///sample rate can be only 8khz, 16khz, 32khz
ESP_AGC_FRAME_SIZE_ERROR = -3, ////the input frame size should be only 10ms, so should together with sample-rate to get the frame size
} ESP_AGE_ERR;
void *esp_agc_open(int agc_mode, int sample_rate);
void set_agc_config(void *agc_handle, int gain_dB, int limiter_enable, int target_level_dbfs);
int esp_agc_process(void *agc_handle, short *in_pcm, short *out_pcm, int frame_size, int sample_rate);
void esp_agc_close(void *agc_handle);
#endif // _ESP_AGC_H_

View File

@@ -0,0 +1,81 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#define MASE_SAMPLE_RATE 16000 // Supports 16kHz only
#define MASE_FRAME_SIZE 16 // Supports 16ms only
#define MASE_MIC_DISTANCE 65 // According to physical design of mic-array
/**
* @brief Sets mic-array type, currently 2-mic line array and 3-mic circular array
* are supported.
*/
typedef enum {
TWO_MIC_LINE = 0,
THREE_MIC_CIRCLE = 1
} mase_mic_array_type_t;
/**
* @brief Sets operating mode, supporting normal mode and wake-up enhancement mode
*/
typedef enum {
NORMAL_ENHANCEMENT_MODE = 0,
WAKE_UP_ENHANCEMENT_MODE = 1
} mase_op_mode_t;
typedef void* mase_handle_t;
/**
* @brief Creates an instance to the MASE structure.
*
* @param sample_rate The sampling frequency (Hz) must be 16000.
*
* @param frame_size The length of the audio processing must be 16ms.
*
* @param array_type '0' for 2-mic line array and '1' for 3-mic circular array.
*
* @param mic_distance The distance between neiboring microphones in mm.
*
* @param operating_mode '0' for normal mode and '1' for wake-up enhanced mode.
*
* @param filter_strength Strengh of the mic-array speech enhancement, must be 0, 1, 2 or 3.
*
* @return
* - NULL: Create failed
* - Others: An instance of MASE
*/
mase_handle_t mase_create(int fs, int frame_size, int array_type, float mic_distance, int operating_mode, int filter_strength);
/**
* @brief Performs mic array processing for one frame.
*
* @param inst The instance of MASE.
*
* @param in An array of 16-bit signed audio samples from mic.
*
* @param dsp_out Returns enhanced signal.
*
* @return None
*
*/
void mase_process(mase_handle_t st, int16_t *in, int16_t *dsp_out);
/**
* @brief Free the MASE instance
*
* @param inst The instance of MASE.
*
* @return None
*
*/
void mase_destory(mase_handle_t st);

View File

@@ -0,0 +1,153 @@
#pragma once
#include "stdint.h"
#include "esp_wn_iface.h"
#define ESP_MN_RESULT_MAX_NUM 5
#define ESP_MN_MAX_PHRASE_NUM 200
#define ESP_MN_MAX_PHRASE_LEN 63
#define ESP_MN_MIN_PHRASE_LEN 2
#define ESP_MN_PREFIX "mn"
#define ESP_MN_ENGLISH "en"
#define ESP_MN_CHINESE "cn"
typedef enum {
ESP_MN_STATE_DETECTING = 0, // detecting
ESP_MN_STATE_DETECTED = 1, // detected
ESP_MN_STATE_TIMEOUT = 2, // time out
} esp_mn_state_t;
// Return all possible recognition results
typedef struct{
esp_mn_state_t state;
int num; // The number of phrase in list, num<=5. When num=0, no phrase is recognized.
int command_id[ESP_MN_RESULT_MAX_NUM]; // The list of command id.
int phrase_id[ESP_MN_RESULT_MAX_NUM]; // The list of phrase id.
float prob[ESP_MN_RESULT_MAX_NUM]; // The list of probability.
} esp_mn_results_t;
typedef struct{
int16_t num; // The number of error phrases, which can not added into model
int16_t phrase_idx[ESP_MN_MAX_PHRASE_NUM]; // The error phrase index in singly linked list
} esp_mn_error_t;
typedef struct {
char phoneme_string[ESP_MN_MAX_PHRASE_LEN + 1]; // phoneme string
int16_t command_id; // the command id
float threshold; // trigger threshold, default: 0
int16_t *wave; // prompt wave data of the phrase
} esp_mn_phrase_t;
typedef struct _mn_node_ {
esp_mn_phrase_t *phrase;
struct _mn_node_ *next;
} esp_mn_node_t;
/**
* @brief Initialze a model instance with specified model name.
*
* @param model_name The wakenet model name.
* @param duration The duration (ms) to trigger the timeout
*
* @returns Handle to the model data.
*/
typedef model_iface_data_t* (*esp_mn_iface_op_create_t)(const char *model_name, int duration);
/**
* @brief Callback function type to fetch the amount of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_mn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Callback function type to fetch the number of frames recognized by the command word
*
* @param model The model object to query
* @return The number of the frames recognized by the command word
*/
typedef int (*esp_mn_iface_op_get_samp_chunknum_t)(model_iface_data_t *model);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger speech commands, the range of det_threshold is 0.0~0.9999
*/
typedef int (*esp_mn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_mn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Get the language of model
*
* @param model The language name
* @return Language name string defined in esp_mn_models.h, eg: ESP_MN_CHINESE, ESP_MN_ENGLISH
*/
typedef char * (*esp_mn_iface_op_get_language_t)(model_iface_data_t *model);
/**
* @brief Feed samples of an audio stream to the speech recognition model and detect if there is a speech command found.
*
* @param model The model object to query.
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @return The state of multinet
*/
typedef esp_mn_state_t (*esp_mn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Destroy a speech commands recognition model
*
* @param model The Model object to destroy
*/
typedef void (*esp_mn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* @brief Get recognition results
*
* @param model The Model object to query
*
* @return The current results.
*/
typedef esp_mn_results_t* (*esp_mn_iface_op_get_results_t)(model_iface_data_t *model);
/**
* @brief Open the log print
*
* @param model_data The model object to query.
*
*/
typedef void (*esp_mn_iface_op_open_log_t)(model_iface_data_t *model_data);
/**
* @brief Set the speech commands by mn_command_root
*
* @param model_data The model object to query.
* @param mn_command_root The speech commands link.
* @return The error phrase id info.
*/
typedef esp_mn_error_t* (*esp_wn_iface_op_set_speech_commands)(model_iface_data_t *model_data, esp_mn_node_t *mn_command_root);
typedef struct {
esp_mn_iface_op_create_t create;
esp_mn_iface_op_get_samp_rate_t get_samp_rate;
esp_mn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_mn_iface_op_get_samp_chunknum_t get_samp_chunknum;
esp_mn_iface_op_set_det_threshold_t set_det_threshold;
esp_mn_iface_op_detect_t detect;
esp_mn_iface_op_destroy_t destroy;
esp_mn_iface_op_get_results_t get_results;
esp_mn_iface_op_open_log_t open_log;
esp_wn_iface_op_set_speech_commands set_speech_commands;
} esp_mn_iface_t;

View File

@@ -0,0 +1,61 @@
#pragma once
#include "esp_mn_iface.h"
//Contains declarations of all available speech recognion models. Pair this up with the right coefficients and you have a model that can recognize
//a specific phrase or word.
/**
* @brief Get the multinet handle from model name
*
* @param model_name The name of model
* @returns The handle of multinet
*/
esp_mn_iface_t *esp_mn_handle_from_name(char *model_name);
/**
* @brief Get the multinet language from model name
*
* @param model_name The name of model
* @returns The language of multinet
*/
char *esp_mn_language_from_name(char *model_name);
/*
Configure wake word to use based on what's selected in menuconfig.
*/
#ifdef CONFIG_SR_MN_CN_MULTINET2_SINGLE_RECOGNITION
#include "multinet2_ch.h"
#define MULTINET_COEFF get_coeff_multinet2_ch
#define MULTINET_MODEL_NAME "mn2_cn"
#else
#define MULTINET_COEFF "COEFF_NULL"
#define MULTINET_MODEL_NAME "NULL"
#endif
/* example
static const esp_mn_iface_t *multinet = &MULTINET_MODEL;
//Initialize MultiNet model data
model_iface_data_t *model_data = multinet->create(&MULTINET_COEFF);
add_speech_commands(multinet, model_data);
//Set parameters of buffer
int audio_chunksize=model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
//Detect
int r=model->detect(model_data, buffer);
if (r>0) {
printf("Detection triggered output %d.\n", r);
}
//Destroy model
model->destroy(model_data)
*/

View File

@@ -0,0 +1,86 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_NS_H_
#define _ESP_NS_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define NS_USE_SPIARM 0
#define NS_FRAME_LENGTH_MS 10 //Supports 10ms, 20ms, 30ms
/**
* The Sampling frequency (Hz) must be 16000Hz
*/
typedef void* ns_handle_t;
/**
* @brief Creates an instance to the NS structure.
*
* @param frame_length The length of the audio processing can be 10ms, 20ms, 30ms.
*
* @return
* - NULL: Create failed
* - Others: The instance of NS
*/
ns_handle_t ns_create(int frame_length);
/**
* @brief Creates an instance of the more powerful noise suppression algorithm.
*
* @warning frame_length only supports be 10 ms.
*
* @param frame_length The length of the audio processing can only be 10ms.
* @param mode 0: Mild, 1: Medium, 2: Aggressive
* @param sample_rate The sample rate of the audio.
*
* @return
* - NULL: Create failed
* - Others: The instance of NS
*/
ns_handle_t ns_pro_create(int frame_length, int mode, int sample_rate);
/**
* @brief Feed samples of an audio stream to the NS and get the audio stream after Noise suppression.
*
* @param inst The instance of NS.
*
* @param indata An array of 16-bit signed audio samples.
*
* @param outdata An array of 16-bit signed audio samples after noise suppression.
*
* @return None
*
*/
void ns_process(ns_handle_t inst, int16_t *indata, int16_t *outdata);
/**
* @brief Free the NS instance
*
* @param inst The instance of NS.
*
* @return None
*
*/
void ns_destroy(ns_handle_t inst);
#ifdef __cplusplus
}
#endif
#endif //_ESP_NS_H_

View File

@@ -0,0 +1,104 @@
// Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License
#ifndef _ESP_VAD_H_
#define _ESP_VAD_H_
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
#define SAMPLE_RATE_HZ 16000 //Supports 32000, 16000, 8000
#define VAD_FRAME_LENGTH_MS 30 //Supports 10ms, 20ms, 30ms
/**
* @brief Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
* restrictive in reporting speech.
*/
typedef enum {
VAD_MODE_0 = 0,
VAD_MODE_1,
VAD_MODE_2,
VAD_MODE_3,
VAD_MODE_4
} vad_mode_t;
typedef enum {
VAD_SILENCE = 0,
VAD_SPEECH
} vad_state_t;
typedef void* vad_handle_t;
/**
* @brief Creates an instance to the VAD structure.
*
* @param vad_mode Sets the VAD operating mode.
*
* @return
* - NULL: Create failed
* - Others: The instance of VAD
*/
vad_handle_t vad_create(vad_mode_t vad_mode);
/**
* @brief Feed samples of an audio stream to the VAD and check if there is someone speaking.
*
* @param inst The instance of VAD.
*
* @param data An array of 16-bit signed audio samples.
*
* @param sample_rate_hz The Sampling frequency (Hz) can be 32000, 16000, 8000, default: 16000.
*
* @param one_frame_ms The length of the audio processing can be 10ms, 20ms, 30ms, default: 30.
*
* @return
* - VAD_SILENCE if no voice
* - VAD_SPEECH if voice is detected
*
*/
vad_state_t vad_process(vad_handle_t inst, int16_t *data, int sample_rate_hz, int one_frame_ms);
/**
* @brief Free the VAD instance
*
* @param inst The instance of VAD.
*
* @return None
*
*/
void vad_destroy(vad_handle_t inst);
/*
* Programming Guide:
*
* @code{c}
* vad_handle_t vad_inst = vad_create(VAD_MODE_3, SAMPLE_RATE_HZ, VAD_FRAME_LENGTH_MS); // Creates an instance to the VAD structure.
*
* while (1) {
* //Use buffer to receive the audio data from MIC.
* vad_state_t vad_state = vad_process(vad_inst, buffer); // Feed samples to the VAD process and get the result.
* }
*
* vad_destroy(vad_inst); // Free the VAD instance at the end of whole VAD process
*
* @endcode
*/
#ifdef __cplusplus
}
#endif
#endif //_ESP_VAD_H_

View File

@@ -0,0 +1,185 @@
#pragma once
#include "stdint.h"
//Opaque model data container
typedef struct model_iface_data_t model_iface_data_t;
/**
* @brief The state of wakeup
*/
typedef enum
{
WAKENET_NO_DETECT = 0, // wake word is not detected
WAKENET_CHANNEL_VERIFIED = -1, // output channel is verified
WAKENET_DETECTED = 1 // wake word is detected
} wakenet_state_t;
//Set wake words recognition operating mode
//The probability of being wake words is increased with increasing mode,
//As a consequence also the false alarm rate goes up
typedef enum {
DET_MODE_90 = 0, // Normal
DET_MODE_95 = 1, // Aggressive
DET_MODE_2CH_90 = 2,
DET_MODE_2CH_95 = 3,
DET_MODE_3CH_90 = 4,
DET_MODE_3CH_95 = 5,
} det_mode_t;
typedef struct {
int wake_word_num; //The number of all wake words
char **wake_word_list; //The name list of wake words
} wake_word_info_t;
/**
* @brief Easy function type to initialze a model instance with a detection mode and specified wake word coefficient
*
* @param model_name The specified wake word model coefficient
* @param det_mode The wake words detection mode to trigger wake words, DET_MODE_90 or DET_MODE_95
* @returns Handle to the model data
*/
typedef model_iface_data_t* (*esp_wn_iface_op_create_t)(const void *model_name, det_mode_t det_mode);
/**
* @brief Get the amount of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_wn_iface_op_get_samp_chunksize_t)(model_iface_data_t *model);
/**
* @brief Get the channel number of samples that need to be passed to the detect function
*
* Every speech recognition model processes a certain number of samples at the same time. This function
* can be used to query that amount. Note that the returned amount is in 16-bit samples, not in bytes.
*
* @param model The model object to query
* @return The amount of samples to feed the detect function
*/
typedef int (*esp_wn_iface_op_get_channel_num_t)(model_iface_data_t *model);
/**
* @brief Get the start point of wake word when one wake word is detected.
*
* @Warning: This function should be called when the channel index is verified.
* The returned value is the number of samples from start point of wake word to detected point.
*
* @param model The model object to query
* @return The number of samples from start point to detected point (end point)
*/
typedef int (*esp_wn_iface_op_get_start_point_t)(model_iface_data_t *model);
/**
* @brief Get the sample rate of the samples to feed to the detect function
*
* @param model The model object to query
* @return The sample rate, in hz
*/
typedef int (*esp_wn_iface_op_get_samp_rate_t)(model_iface_data_t *model);
/**
* @brief Get the number of wake words
*
* @param model The model object to query
* @returns the number of wake words
*/
typedef int (*esp_wn_iface_op_get_word_num_t)(model_iface_data_t *model);
/**
* @brief Get the name of wake word by index
*
* @Warning The index of wake word start with 1
* @param model The model object to query
* @param word_index The index of wake word
* @returns the detection threshold
*/
typedef char* (*esp_wn_iface_op_get_word_name_t)(model_iface_data_t *model, int word_index);
/**
* @brief Set the detection threshold to manually abjust the probability
*
* @param model The model object to query
* @param det_treshold The threshold to trigger wake words, the range of det_threshold is 0.5~0.9999
* @param word_index The index of wake word
* @return 0: setting failed, 1: setting success
*/
typedef int (*esp_wn_iface_op_set_det_threshold_t)(model_iface_data_t *model, float det_threshold, int word_index);
/**
* @brief Get the wake word detection threshold of different modes
*
* @param model The model object to query
* @param word_index The index of wake word
* @returns the detection threshold
*/
typedef float (*esp_wn_iface_op_get_det_threshold_t)(model_iface_data_t *model, int word_index);
/**
* @brief Feed samples of an audio stream to the keyword detection model and detect if there is a keyword found.
*
* @Warning The index of wake word start with 1, 0 means no wake words is detected.
*
* @param model The model object to query
* @param samples An array of 16-bit signed audio samples. The array size used can be queried by the
* get_samp_chunksize function.
* @return The index of wake words, return 0 if no wake word is detected, else the index of the wake words.
*/
typedef wakenet_state_t (*esp_wn_iface_op_detect_t)(model_iface_data_t *model, int16_t *samples);
/**
* @brief Get the volume gain
*
* @param model The model object to query
* @param target_db The target dB to calculate volume gain
* @returns the volume gain
*/
typedef float (*esp_wn_iface_op_get_vol_gain_t)(model_iface_data_t *model, float target_db);
/**
* @brief Get the triggered channel index. Channel index starts from zero
*
* @param model The model object to query
* @return The channel index
*/
typedef int (*esp_wn_iface_op_get_triggered_channel_t)(model_iface_data_t *model);
/**
* @brief Clean all states of model
*
* @param model The model object to query
*/
typedef void (*esp_wn_iface_op_clean_t)(model_iface_data_t *model);
/**
* @brief Destroy a speech recognition model
*
* @param model Model object to destroy
*/
typedef void (*esp_wn_iface_op_destroy_t)(model_iface_data_t *model);
/**
* This structure contains the functions used to do operations on a wake word detection model.
*/
typedef struct {
esp_wn_iface_op_create_t create;
esp_wn_iface_op_get_start_point_t get_start_point;
esp_wn_iface_op_get_samp_chunksize_t get_samp_chunksize;
esp_wn_iface_op_get_channel_num_t get_channel_num;
esp_wn_iface_op_get_samp_rate_t get_samp_rate;
esp_wn_iface_op_get_word_num_t get_word_num;
esp_wn_iface_op_get_word_name_t get_word_name;
esp_wn_iface_op_set_det_threshold_t set_det_threshold;
esp_wn_iface_op_get_det_threshold_t get_det_threshold;
esp_wn_iface_op_get_triggered_channel_t get_triggered_channel;
esp_wn_iface_op_get_vol_gain_t get_vol_gain;
esp_wn_iface_op_detect_t detect;
esp_wn_iface_op_clean_t clean;
esp_wn_iface_op_destroy_t destroy;
} esp_wn_iface_t;

View File

@@ -0,0 +1,122 @@
#pragma once
#include "esp_wn_iface.h"
// The prefix of wakenet model name is used to filter all wakenet from availabel models.
#define ESP_WN_PREFIX "wn"
/**
* @brief Get the wakenet handle from model name
*
* @param model_name The name of model
* @returns The handle of wakenet
*/
const esp_wn_iface_t *esp_wn_handle_from_name(const char *model_name);
/**
* @brief Get the wake word name from model name
*
* @param model_name The name of model
* @returns The wake word name, like "alexa","hilexin","xiaoaitongxue"
*/
char* esp_wn_wakeword_from_name(const char *model_name);
// /**
// * @brief Get the model coeff from model name
// *
// * @Warning: retuen model_coeff_getter_t, when chip is ESP32,
// * return string for other chips
// *
// * @param model_name The name of model
// * @returns The handle of wakenet
// */
// void *esp_wn_coeff_from_name(char *model_name);
#if defined CONFIG_USE_WAKENET
/*
Configure wake word to use based on what's selected in menuconfig.
*/
#if CONFIG_SR_WN_WN5_HILEXIN
#include "hilexin_wn5.h"
#define WAKENET_MODEL_NAME "wn5_hilexin"
#define WAKENET_COEFF get_coeff_hilexin_wn5
#elif CONFIG_SR_WN_WN5X2_HILEXIN
#include "hilexin_wn5X2.h"
#define WAKENET_MODEL_NAME "wn5_hilexinX2"
#define WAKENET_COEFF get_coeff_hilexin_wn5X2
#elif CONFIG_SR_WN_WN5X3_HILEXIN
#include "hilexin_wn5X3.h"
#define WAKENET_MODEL_NAME "wn5_hilexinX3"
#define WAKENET_COEFF get_coeff_hilexin_wn5X3
#elif CONFIG_SR_WN_WN5_NIHAOXIAOZHI
#include "nihaoxiaozhi_wn5.h"
#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhi"
#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5
#elif CONFIG_SR_WN_WN5X2_NIHAOXIAOZHI
#include "nihaoxiaozhi_wn5X2.h"
#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhiX2"
#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5X2
#elif CONFIG_SR_WN_WN5X3_NIHAOXIAOZHI
#include "nihaoxiaozhi_wn5X3.h"
#define WAKENET_MODEL_NAME "wn5_nihaoxiaozhiX3"
#define WAKENET_COEFF get_coeff_nihaoxiaozhi_wn5X3
#elif CONFIG_SR_WN_WN5X3_NIHAOXIAOXIN
#include "nihaoxiaoxin_wn5X3.h"
#define WAKENET_MODEL_NAME "wn5_nihaoxiaoxinX3"
#define WAKENET_COEFF get_coeff_nihaoxiaoxin_wn5X3
#elif CONFIG_SR_WN_WN5X3_HIJESON
#include "hijeson_wn5X3.h"
#define WAKENET_MODEL_NAME "wn5_hijesonX3"
#define WAKENET_COEFF get_coeff_hijeson_wn5X3
#elif CONFIG_SR_WN_WN5_CUSTOMIZED_WORD
#include "customized_word_wn5.h"
#define WAKENET_MODEL_NAME "wn5_customizedword"
#define WAKENET_COEFF get_coeff_customizedword_wn5
#else
#define WAKENET_MODEL_NAME "NULL"
#define WAKENET_COEFF "COEFF_NULL"
#endif
#else
#define WAKENET_MODEL_NAME "NULL"
#define WAKENET_COEFF "COEFF_NULL"
#endif
/*
static const sr_model_iface_t *model = esp_wn_handle_from_name(model_name);
//Initialize wakeNet model data
static model_iface_data_t *model_data=model->create(model_name, DET_MODE_90);
//Set parameters of buffer
int audio_chunksize=model->get_samp_chunksize(model_data);
int frequency = model->get_samp_rate(model_data);
int16_t *buffer=malloc(audio_chunksize*sizeof(int16_t));
//Detect
int r=model->detect(model_data, buffer);
if (r>0) {
printf("Detection triggered output %d.\n", r);
}
//Destroy model
model->destroy(model_data)
*/

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_hilexin_wn5;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_hilexin_wn5X2;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_hilexin_wn5X3;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_multinet2_ch;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaoxin_wn5X3;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5X2;

View File

@@ -0,0 +1,9 @@
//Generated by mkmodel_py
#pragma once
#include <string.h>
#include "dl_lib_coefgetter_if.h"
#include "dl_lib_matrix.h"
#include "dl_lib_matrixq.h"
#include "dl_lib_matrixq8.h"
extern const model_coeff_getter_t get_coeff_nihaoxiaozhi_wn5X3;

View File

@@ -0,0 +1,158 @@
// Copyright 2015-2022 Espressif Systems (Shanghai) PTE LTD
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "esp_err.h"
#include "esp_mn_iface.h"
/*
esp_mn_node_t is a singly linked list which is used to manage speech commands.
It is easy to add one speech command into linked list and remove one speech command from linked list.
*/
/**
* @brief Initialze the speech commands singly linked list.
*
* @return
* - ESP_OK Success
* - ESP_ERR_NO_MEM No memory
* - ESP_ERR_INVALID_STATE The Speech Commands link has been initialized
*/
esp_err_t esp_mn_commands_alloc(void);
/**
* @brief Clear the speech commands linked list and free root node.
*
* @return
* - ESP_OK Success
* - ESP_ERR_INVALID_STATE The Speech Commands link has not been initialized
*/
esp_err_t esp_mn_commands_free(void);
/**
* @brief Add one speech commands with phoneme string and command ID
*
* @param command_id The command ID
* @param phoneme_string The phoneme string of the speech commands
*
* @return
* - ESP_OK Success
* - ESP_ERR_INVALID_STATE Fail
*/
esp_err_t esp_mn_commands_add(int command_id, char *phoneme_string);
/**
* @brief Modify one speech commands with new phoneme string
*
* @param old_phoneme_string The old phoneme string of the speech commands
* @param new_phoneme_string The new phoneme string of the speech commands
*
* @return
* - ESP_OK Success
* - ESP_ERR_INVALID_STATE Fail
*/
esp_err_t esp_mn_commands_modify(char *old_phoneme_string, char *new_phoneme_string);
/**
* @brief Remove one speech commands by phoneme string
*
* @param phoneme_string The phoneme string of the speech commands
*
* @return
* - ESP_OK Success
* - ESP_ERR_INVALID_STATE Fail
*/
esp_err_t esp_mn_commands_remove(char *phoneme_string);
/**
* @brief Clear all speech commands in linked list
*
* @return
* - ESP_OK Success
* - ESP_ERR_INVALID_STATE Fail
*/
esp_err_t esp_mn_commands_clear(void);
/**
* @brief Get phrase from index, which is the depth from the phrase node to root node
*
* @Warning: The first phrase index is 0, the second phrase index is 1, and so on.
*
* @return
* - esp_mn_phrase_t* Success
* - NULL Fail
*/
esp_mn_phrase_t *esp_mn_commands_get_from_index(int index);
/**
* @brief Get phrase from phoneme string
*
* @return
* - esp_mn_phrase_t* Success
* - NULL Fail
*/
esp_mn_phrase_t *esp_mn_commands_get_from_string(const char *phoneme_string);
/**
* @brief Update the speech commands of MultiNet
*
* @Warning: Must be used after [add/remove/modify/clear] function,
* otherwise the language model of multinet can not be updated.
*
* @param multinet The multinet handle
* @param model_data The model object to query
*
* @return
* - NULL Success
* - others The list of error phrase which can not be parsed by multinet.
*/
esp_mn_error_t *esp_mn_commands_update(const esp_mn_iface_t *multinet, model_iface_data_t *model_data);
/**
* @brief Print the MultiNet Speech Commands.
*/
void esp_mn_print_commands(void);
/**
* @brief Initialze the esp_mn_phrase_t struct by command id and phoneme string .
*
* @return the pointer of esp_mn_phrase_t
*/
esp_mn_phrase_t *esp_mn_phrase_alloc(int command_id, char *phoneme_string);
/**
* @brief Free esp_mn_phrase_t pointer.
*
* @param phrase The esp_mn_phrase_t pointer
*/
void esp_mn_phrase_free(esp_mn_phrase_t *phrase);
/**
* @brief Initialze the esp_mn_node_t struct by esp_mn_phrase_t pointer.
*
* @return the pointer of esp_mn_node_t
*/
esp_mn_node_t *esp_mn_node_alloc(esp_mn_phrase_t *phrase);
/**
* @brief Free esp_mn_node_t pointer.
*
* @param node The esp_mn_node_free pointer
*/
void esp_mn_node_free(esp_mn_node_t *node);
/**
* @brief Print phrase linked list.
*/
void esp_mn_commands_print(void);

View File

@@ -0,0 +1,23 @@
#pragma once
#include "esp_err.h"
#include "esp_mn_iface.h"
/**
* @brief Check chip config to ensure optimum performance
*/
void check_chip_config(void);
/**
* @brief Update the speech commands of MultiNet by menuconfig
*
* @param multinet The multinet handle
*
* @param model_data The model object to query
*
* @param langugae The language of MultiNet
*
* @return
* - ESP_OK Success
* - ESP_ERR_INVALID_STATE Fail
*/
esp_mn_error_t* esp_mn_commands_update_from_sdkconfig(const esp_mn_iface_t *multinet, model_iface_data_t *model_data);

View File

@@ -0,0 +1,94 @@
#pragma once
typedef struct
{
char **model_name; // the name of models, like "wn9_hilexin"(wakenet9, hilexin), "mn5_en"(multinet5, english)
char *partition_label; // partition label used to save the files of model
int num; // the number of models
} srmodel_list_t;
#define MODEL_NAME_MAX_LENGTH 64
/**
* @brief Return all avaliable models in spiffs or selected in Kconfig.
*
* @param partition_label The spiffs label defined in your partition file used to save models.
*
* @return all avaliable models in spiffs,save as srmodel_list_t.
*/
srmodel_list_t* esp_srmodel_init(const char* partition_label);
/**
* @brief Free srmodel_list_t and unregister SPIFFS filesystem if open SPIFFS filesystem.
*
* @param models The srmodel_list_t point allocated by esp_srmodel_init function.
*
* @return all avaliable models in spiffs,save as srmodel_list_t.
*/
void esp_srmodel_deinit(srmodel_list_t *models);
/**
* @brief Return the first model name containing the specified keywords
* If keyword is NULL, we will ignore the keyword.
*
* @param models The srmodel_list_t point allocated by esp_srmodel_init function.
* @param keyword1 The specified keyword1 , like ESP_WN_PREDIX(the prefix of wakenet),
* ESP_MN_PREFIX(the prefix of multinet),
*
* @param keyword2 The specified keyword2, like ESP_MN_ENGLISH(the english multinet)
* ESP_MN_CHINESE(the chinese multinet)
* "alexa" (the "alexa" wakenet)
* @return return model name if can find one model name containing the keywords otherwise return NULL.
*/
char *esp_srmodel_filter(srmodel_list_t *models, const char *keyword1, const char *keyword2);
/**
* @brief Check whether the specified model name exists or not.
*
* @param models The srmodel_list_t point allocated by esp_srmodel_init function.
* @param model_name The specified model name
* @return return index in models if model name exists otherwise return -1
*/
int esp_srmodel_exists(srmodel_list_t *models, char *model_name);
/**
* @brief Initialize and mount SPIFFS filesystem, return all avaliable models in spiffs.
*
* @param partition_label The spiffs label defined in your partition file used to save models.
*
* @return all avaliable models in spiffs,save as srmodel_list_t.
*/
srmodel_list_t *srmodel_spiffs_init(const char* partition_label);
/**
* @brief unregister SPIFFS filesystem and free srmodel_list_t.
*
* @param models The srmodel_list_t point allocated by srmodel_spiffs_init function.
*
* @return all avaliable models in spiffs,save as srmodel_list_t.
*/
void srmodel_spiffs_deinit(srmodel_list_t *models);
/**
* @brief Return base path of srmodel spiffs
*
* @return the base path od srmodel spiffs
*/
char *get_model_base_path(void);
#ifdef ESP_PLATFORM
#include "dl_lib_coefgetter_if.h"
/**
* @brief Return model_coeff_getter_t pointer base on model_name
*
* @warning Just support ESP32 to load old wakenet
*
* @param model_name The model name
*
* @return model_coeff_getter_t pointer or NULL
*/
model_coeff_getter_t* srmodel_get_model_coeff(char *model_name);
#endif