coqui_stt_sys/
bindings.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
/* automatically generated by rust-bindgen 0.59.2 */
#![allow(non_snake_case)]
#![allow(non_camel_case_types)]
#![allow(non_upper_case_globals)]
#![allow(deref_nullptr)]

#[repr(C)]
#[derive(Debug, Copy, Clone, Default)]
pub struct ModelState {
    _unused: [u8; 0],
}

#[repr(C)]
#[derive(Debug, Copy, Clone, Default)]
pub struct StreamingState {
    _unused: [u8; 0],
}

#[doc = " @brief Stores text of an individual token, along with its timing information"]
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TokenMetadata {
    #[doc = " The text corresponding to this token"]
    pub text: *const ::std::os::raw::c_char,
    #[doc = " Position of the token in units of 20ms"]
    pub timestep: ::std::os::raw::c_uint,
    #[doc = " Position of the token in seconds"]
    pub start_time: f32,
}

#[doc = " @brief A single transcript computed by the model, including a confidence"]
#[doc = "        value and the metadata for its constituent tokens."]
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct CandidateTranscript {
    #[doc = " Array of TokenMetadata objects"]
    pub tokens: *const TokenMetadata,
    #[doc = " Size of the tokens array"]
    pub num_tokens: ::std::os::raw::c_uint,
    #[doc = " Approximated confidence value for this transcript. This is roughly the"]
    #[doc = " sum of the acoustic model logit values for each timestep/character that"]
    #[doc = " contributed to the creation of this transcript."]
    pub confidence: f64,
}

#[doc = " @brief An array of CandidateTranscript objects computed by the model."]
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct Metadata {
    #[doc = " Array of CandidateTranscript objects"]
    pub transcripts: *const CandidateTranscript,
    #[doc = " Size of the transcripts array"]
    pub num_transcripts: ::std::os::raw::c_uint,
}

pub const STT_Error_Codes_STT_ERR_OK: STT_Error_Codes = 0;
pub const STT_Error_Codes_STT_ERR_NO_MODEL: STT_Error_Codes = 4096;
pub const STT_Error_Codes_STT_ERR_INVALID_ALPHABET: STT_Error_Codes = 8192;
pub const STT_Error_Codes_STT_ERR_INVALID_SHAPE: STT_Error_Codes = 8193;
pub const STT_Error_Codes_STT_ERR_INVALID_SCORER: STT_Error_Codes = 8194;
pub const STT_Error_Codes_STT_ERR_MODEL_INCOMPATIBLE: STT_Error_Codes = 8195;
pub const STT_Error_Codes_STT_ERR_SCORER_NOT_ENABLED: STT_Error_Codes = 8196;
pub const STT_Error_Codes_STT_ERR_SCORER_UNREADABLE: STT_Error_Codes = 8197;
pub const STT_Error_Codes_STT_ERR_SCORER_INVALID_LM: STT_Error_Codes = 8198;
pub const STT_Error_Codes_STT_ERR_SCORER_NO_TRIE: STT_Error_Codes = 8199;
pub const STT_Error_Codes_STT_ERR_SCORER_INVALID_TRIE: STT_Error_Codes = 8200;
pub const STT_Error_Codes_STT_ERR_SCORER_VERSION_MISMATCH: STT_Error_Codes = 8201;
pub const STT_Error_Codes_STT_ERR_FAIL_INIT_MMAP: STT_Error_Codes = 12288;
pub const STT_Error_Codes_STT_ERR_FAIL_INIT_SESS: STT_Error_Codes = 12289;
pub const STT_Error_Codes_STT_ERR_FAIL_INTERPRETER: STT_Error_Codes = 12290;
pub const STT_Error_Codes_STT_ERR_FAIL_RUN_SESS: STT_Error_Codes = 12291;
pub const STT_Error_Codes_STT_ERR_FAIL_CREATE_STREAM: STT_Error_Codes = 12292;
pub const STT_Error_Codes_STT_ERR_FAIL_READ_PROTOBUF: STT_Error_Codes = 12293;
pub const STT_Error_Codes_STT_ERR_FAIL_CREATE_SESS: STT_Error_Codes = 12294;
pub const STT_Error_Codes_STT_ERR_FAIL_CREATE_MODEL: STT_Error_Codes = 12295;
pub const STT_Error_Codes_STT_ERR_FAIL_INSERT_HOTWORD: STT_Error_Codes = 12296;
pub const STT_Error_Codes_STT_ERR_FAIL_CLEAR_HOTWORD: STT_Error_Codes = 12297;
pub const STT_Error_Codes_STT_ERR_FAIL_ERASE_HOTWORD: STT_Error_Codes = 12304;
pub type STT_Error_Codes = ::std::os::raw::c_uint;

#[cfg_attr(not(target_os = "windows"), link(name = "stt"))]
#[cfg_attr(target_os = "windows", link(name = "libstt.so.if"))]
extern "C" {
    #[doc = " @brief An object providing an interface to a trained Coqui STT model."]
    #[doc = ""]
    #[doc = " @param aModelPath The path to the frozen model graph."]
    #[doc = " @param[out] retval a ModelState pointer"]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure."]
    pub fn STT_CreateModel(
        aModelPath: *const ::std::os::raw::c_char,
        retval: *mut *mut ModelState,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief An object providing an interface to a trained Coqui STT model, loaded from a buffer."]
    #[doc = ""]
    #[doc = " @param aModelBuffer The buffer containing the content of the exported model."]
    #[doc = " @param aBufferSize Size of model buffer."]
    #[doc = " @param[out] retval a ModelState pointer"]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure."]
    #[cfg(not(target_os = "windows"))]
    pub fn STT_CreateModelFromBuffer(
        aModelBuffer: *const ::std::os::raw::c_char,
        aBufferSize: ::std::os::raw::c_uint,
        retval: *mut *mut ModelState,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief Get beam width value used by the model. If {@link STT_SetModelBeamWidth}"]
    #[doc = "        was not called before, will return the default value loaded from the"]
    #[doc = "        model file."]
    #[doc = ""]
    #[doc = " @param aCtx A ModelState pointer created with {@link STT_CreateModel}."]
    #[doc = ""]
    #[doc = " @return Beam width value used by the model."]
    pub fn STT_GetModelBeamWidth(aCtx: *const ModelState) -> ::std::os::raw::c_uint;

    #[doc = " @brief Set beam width value used by the model."]
    #[doc = ""]
    #[doc = " @param aCtx A ModelState pointer created with {@link STT_CreateModel}."]
    #[doc = " @param aBeamWidth The beam width used by the model. A larger beam width value"]
    #[doc = "                   generates better results at the cost of decoding time."]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure."]
    pub fn STT_SetModelBeamWidth(
        aCtx: *mut ModelState,
        aBeamWidth: ::std::os::raw::c_uint,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief Return the sample rate expected by a model."]
    #[doc = ""]
    #[doc = " @param aCtx A ModelState pointer created with {@link STT_CreateModel}."]
    #[doc = ""]
    #[doc = " @return Sample rate expected by the model for its input."]
    pub fn STT_GetModelSampleRate(aCtx: *const ModelState) -> ::std::os::raw::c_int;

    #[doc = " @brief Frees associated resources and destroys model object."]
    pub fn STT_FreeModel(ctx: *mut ModelState);

    #[doc = " @brief Enable decoding using an external scorer."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model being changed."]
    #[doc = " @param aScorerPath The path to the external scorer file."]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
    pub fn STT_EnableExternalScorer(
        aCtx: *mut ModelState,
        aScorerPath: *const ::std::os::raw::c_char,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief Enable decoding using an external scorer loaded from a buffer."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model being changed."]
    #[doc = " @param aScorerBuffer The buffer containing the content of an external-scorer file."]
    #[doc = " @param aBufferSize Size of scorer buffer."]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
    #[cfg(not(target_os = "windows"))]
    pub fn STT_EnableExternalScorerFromBuffer(
        aCtx: *mut ModelState,
        aScorerBuffer: *const ::std::os::raw::c_char,
        aBufferSize: ::std::os::raw::c_uint,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief Add a hot-word and its boost."]
    #[doc = ""]
    #[doc = " Words that don't occur in the scorer (e.g. proper nouns) or strings that contain spaces won't be taken into account."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model being changed."]
    #[doc = " @param word The hot-word."]
    #[doc = " @param boost The boost. Positive value increases and negative reduces chance of a word occuring in a transcription. Excessive positive boost might lead to splitting up of letters of the word following the hot-word."]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
    pub fn STT_AddHotWord(
        aCtx: *mut ModelState,
        word: *const ::std::os::raw::c_char,
        boost: f32,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief Remove entry for a hot-word from the hot-words map."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model being changed."]
    #[doc = " @param word The hot-word."]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
    pub fn STT_EraseHotWord(
        aCtx: *mut ModelState,
        word: *const ::std::os::raw::c_char,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief Removes all elements from the hot-words map."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model being changed."]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure (invalid arguments)."]
    pub fn STT_ClearHotWords(aCtx: *mut ModelState) -> ::std::os::raw::c_int;

    #[doc = " @brief Disable decoding using an external scorer."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model being changed."]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure."]
    pub fn STT_DisableExternalScorer(aCtx: *mut ModelState) -> ::std::os::raw::c_int;

    #[doc = " @brief Set hyperparameters alpha and beta of the external scorer."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model being changed."]
    #[doc = " @param aAlpha The alpha hyperparameter of the decoder. Language model weight."]
    #[doc = " @param aLMBeta The beta hyperparameter of the decoder. Word insertion weight."]
    #[doc = ""]
    #[doc = " @return Zero on success, non-zero on failure."]
    pub fn STT_SetScorerAlphaBeta(
        aCtx: *mut ModelState,
        aAlpha: f32,
        aBeta: f32,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief Use the Coqui STT model to convert speech to text."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model to use."]
    #[doc = " @param aBuffer A 16-bit, mono raw audio signal at the appropriate"]
    #[doc = "                sample rate (matching what the model was trained on)."]
    #[doc = " @param aBufferSize The number of samples in the audio signal."]
    #[doc = ""]
    #[doc = " @return The STT result. The user is responsible for freeing the string using"]
    #[doc = "         {@link STT_FreeString()}. Returns NULL on error."]
    pub fn STT_SpeechToText(
        aCtx: *mut ModelState,
        aBuffer: *const ::std::os::raw::c_short,
        aBufferSize: ::std::os::raw::c_uint,
    ) -> *mut ::std::os::raw::c_char;

    #[doc = " @brief Use the Coqui STT model to convert speech to text and output results"]
    #[doc = " including metadata."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model to use."]
    #[doc = " @param aBuffer A 16-bit, mono raw audio signal at the appropriate"]
    #[doc = "                sample rate (matching what the model was trained on)."]
    #[doc = " @param aBufferSize The number of samples in the audio signal."]
    #[doc = " @param aNumResults The maximum number of CandidateTranscript structs to return. Returned value might be smaller than this."]
    #[doc = ""]
    #[doc = " @return Metadata struct containing multiple CandidateTranscript structs. Each"]
    #[doc = "         transcript has per-token metadata including timing information. The"]
    #[doc = "         user is responsible for freeing Metadata by calling {@link STT_FreeMetadata()}."]
    #[doc = "         Returns NULL on error."]
    pub fn STT_SpeechToTextWithMetadata(
        aCtx: *mut ModelState,
        aBuffer: *const ::std::os::raw::c_short,
        aBufferSize: ::std::os::raw::c_uint,
        aNumResults: ::std::os::raw::c_uint,
    ) -> *mut Metadata;

    #[doc = " @brief Create a new streaming inference state. The streaming state returned"]
    #[doc = "        by this function can then be passed to {@link STT_FeedAudioContent()}"]
    #[doc = "        and {@link STT_FinishStream()}."]
    #[doc = ""]
    #[doc = " @param aCtx The ModelState pointer for the model to use."]
    #[doc = " @param[out] retval an opaque pointer that represents the streaming state. Can"]
    #[doc = "                    be NULL if an error occurs."]
    #[doc = ""]
    #[doc = " @return Zero for success, non-zero on failure."]
    pub fn STT_CreateStream(
        aCtx: *mut ModelState,
        retval: *mut *mut StreamingState,
    ) -> ::std::os::raw::c_int;

    #[doc = " @brief Feed audio samples to an ongoing streaming inference."]
    #[doc = ""]
    #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
    #[doc = " @param aBuffer An array of 16-bit, mono raw audio samples at the"]
    #[doc = "                appropriate sample rate (matching what the model was trained on)."]
    #[doc = " @param aBufferSize The number of samples in @p aBuffer."]
    pub fn STT_FeedAudioContent(
        aSctx: *mut StreamingState,
        aBuffer: *const ::std::os::raw::c_short,
        aBufferSize: ::std::os::raw::c_uint,
    );

    #[doc = " @brief Compute the intermediate decoding of an ongoing streaming inference."]
    #[doc = ""]
    #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
    #[doc = ""]
    #[doc = " @return The STT intermediate result. The user is responsible for freeing the"]
    #[doc = "         string using {@link STT_FreeString()}."]
    pub fn STT_IntermediateDecode(aSctx: *const StreamingState) -> *mut ::std::os::raw::c_char;

    #[doc = " @brief Compute the intermediate decoding of an ongoing streaming inference,"]
    #[doc = "        return results including metadata."]
    #[doc = ""]
    #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
    #[doc = " @param aNumResults The number of candidate transcripts to return."]
    #[doc = ""]
    #[doc = " @return Metadata struct containing multiple candidate transcripts. Each transcript"]
    #[doc = "         has per-token metadata including timing information. The user is"]
    #[doc = "         responsible for freeing Metadata by calling {@link STT_FreeMetadata()}."]
    #[doc = "         Returns NULL on error."]
    pub fn STT_IntermediateDecodeWithMetadata(
        aSctx: *const StreamingState,
        aNumResults: ::std::os::raw::c_uint,
    ) -> *mut Metadata;

    #[doc = " @brief EXPERIMENTAL: Compute the intermediate decoding of an ongoing streaming"]
    #[doc = "        inference, flushing buffers first. This ensures that all audio that"]
    #[doc = "        has been streamed so far is included in the result, but is more expensive"]
    #[doc = "        than STT_IntermediateDecode() because buffers are processed through"]
    #[doc = "        the acoustic model. Calling this function too often will also degrade"]
    #[doc = "        transcription accuracy due to trashing of the LSTM hidden state vectors."]
    #[doc = ""]
    #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
    #[doc = ""]
    #[doc = " @return The STT result. The user is responsible for freeing the string using"]
    #[doc = "         {@link STT_FreeString()}."]
    #[doc = ""]
    #[doc = " @note This method will free the state pointer (@p aSctx)."]
    pub fn STT_IntermediateDecodeFlushBuffers(
        aSctx: *mut StreamingState,
    ) -> *mut ::std::os::raw::c_char;

    #[doc = " @brief EXPERIMENTAL: Compute the intermediate decoding of an ongoing streaming"]
    #[doc = "        inference, flushing buffers first. This ensures that all audio that"]
    #[doc = "        has been streamed so far is included in the result, but is more expensive"]
    #[doc = "        than STT_IntermediateDecodeWithMetadata() because buffers are processed"]
    #[doc = "        through the acoustic model. Calling this function too often will also"]
    #[doc = "        degrade transcription accuracy due to trashing of the LSTM hidden state"]
    #[doc = "        vectors. Returns results including metadata."]
    #[doc = ""]
    #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
    #[doc = " @param aNumResults The number of candidate transcripts to return."]
    #[doc = ""]
    #[doc = " @return Metadata struct containing multiple candidate transcripts. Each transcript"]
    #[doc = "         has per-token metadata including timing information. The user is"]
    #[doc = "         responsible for freeing Metadata by calling {@link STT_FreeMetadata()}."]
    #[doc = "         Returns NULL on error."]
    pub fn STT_IntermediateDecodeWithMetadataFlushBuffers(
        aSctx: *mut StreamingState,
        aNumResults: ::std::os::raw::c_uint,
    ) -> *mut Metadata;

    #[doc = " @brief Compute the final decoding of an ongoing streaming inference and return"]
    #[doc = "        the result. Signals the end of an ongoing streaming inference."]
    #[doc = ""]
    #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
    #[doc = ""]
    #[doc = " @return The STT result. The user is responsible for freeing the string using"]
    #[doc = "         {@link STT_FreeString()}."]
    #[doc = ""]
    #[doc = " @note This method will free the state pointer (@p aSctx)."]
    pub fn STT_FinishStream(aSctx: *mut StreamingState) -> *mut ::std::os::raw::c_char;

    #[doc = " @brief Compute the final decoding of an ongoing streaming inference and return"]
    #[doc = "        results including metadata. Signals the end of an ongoing streaming"]
    #[doc = "        inference."]
    #[doc = ""]
    #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
    #[doc = " @param aNumResults The number of candidate transcripts to return."]
    #[doc = ""]
    #[doc = " @return Metadata struct containing multiple candidate transcripts. Each transcript"]
    #[doc = "         has per-token metadata including timing information. The user is"]
    #[doc = "         responsible for freeing Metadata by calling {@link STT_FreeMetadata()}."]
    #[doc = "         Returns NULL on error."]
    #[doc = ""]
    #[doc = " @note This method will free the state pointer (@p aSctx)."]
    pub fn STT_FinishStreamWithMetadata(
        aSctx: *mut StreamingState,
        aNumResults: ::std::os::raw::c_uint,
    ) -> *mut Metadata;

    #[doc = " @brief Destroy a streaming state without decoding the computed logits. This"]
    #[doc = "        can be used if you no longer need the result of an ongoing streaming"]
    #[doc = "        inference and don't want to perform a costly decode operation."]
    #[doc = ""]
    #[doc = " @param aSctx A streaming state pointer returned by {@link STT_CreateStream()}."]
    #[doc = ""]
    #[doc = " @note This method will free the state pointer (@p aSctx)."]
    pub fn STT_FreeStream(aSctx: *mut StreamingState);

    #[doc = " @brief Free memory allocated for metadata information."]
    pub fn STT_FreeMetadata(m: *mut Metadata);

    #[doc = " @brief Free a char* string returned by the Coqui STT API."]
    pub fn STT_FreeString(str_: *mut ::std::os::raw::c_char);

    #[doc = " @brief Returns the version of this library. The returned version is a semantic"]
    #[doc = "        version (SemVer 2.0.0). The string returned must be freed with {@link STT_FreeString()}."]
    #[doc = ""]
    #[doc = " @return The version string."]
    pub fn STT_Version() -> *mut ::std::os::raw::c_char;

    #[doc = " @brief Returns a textual description corresponding to an error code."]
    #[doc = "        The string returned must be freed with @{link STT_FreeString()}."]
    #[doc = ""]
    #[doc = " @return The error description."]
    pub fn STT_ErrorCodeToErrorMessage(
        aErrorCode: ::std::os::raw::c_int,
    ) -> *mut ::std::os::raw::c_char;
}