coqui_stt/candidate_transcript.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
use crate::{OwnedTokenMetadata, TokenMetadata};
use std::fmt::{Display, Formatter, Write};
/// A single transcript computed by the model,
/// including a confidence value and the metadata for its constituent tokens.
#[repr(transparent)]
pub struct CandidateTranscript(coqui_stt_sys::CandidateTranscript);
unsafe impl Send for CandidateTranscript {}
unsafe impl Sync for CandidateTranscript {}
impl CandidateTranscript {
/// Return an array of tokens in this transcript.
#[inline]
#[must_use]
pub fn tokens(&self) -> &[TokenMetadata] {
let data = self.0.tokens.cast();
let len = self.num_tokens() as usize;
// SAFETY: the inner objects will always be of type TokenMetadata,
// and the length will always be proper
unsafe { std::slice::from_raw_parts(data, len) }
}
/// Approximated confidence value for this transcript.
/// This is roughly the sum of the acoustic model logit values for
/// each timestep/character that contributed to the creation of this transcript.
#[inline]
#[must_use]
pub const fn confidence(&self) -> f64 {
self.0.confidence
}
/// Total number of tokens in this transcript.
#[inline]
#[must_use]
pub const fn num_tokens(&self) -> u32 {
self.0.num_tokens
}
/// Convert this into an [`OwnedCandidateTranscript`](OwnedCandidateTranscript) struct.
///
/// **Warning**: this can be very expensive depending on the total number of tokens in this object.
#[inline]
#[must_use]
pub fn to_owned(&self) -> OwnedCandidateTranscript {
let tokens = self.tokens().iter().map(TokenMetadata::to_owned).collect();
OwnedCandidateTranscript {
tokens,
confidence: self.confidence(),
}
}
}
/// An owned variant of [`CandidateTranscript`](CandidateTranscript).
#[derive(Clone, Debug)]
pub struct OwnedCandidateTranscript {
tokens: Vec<OwnedTokenMetadata>,
confidence: f64,
}
impl OwnedCandidateTranscript {
/// Return an array of tokens in this transcript.
#[inline]
#[must_use]
pub fn tokens(&self) -> &[OwnedTokenMetadata] {
&self.tokens[..]
}
/// Approximated confidence value for this transcript.
/// This is roughly the sum of the acoustic model logit values for
/// each timestep/character that contributed to the creation of this transcript.
#[inline]
#[must_use]
pub const fn confidence(&self) -> f64 {
self.confidence
}
/// Total number of tokens in this transcript.
#[inline]
#[must_use]
pub fn num_tokens(&self) -> usize {
self.tokens.len()
}
}
impl Display for OwnedCandidateTranscript {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
for token in &self.tokens {
f.write_str(&token.text)?;
f.write_char(' ')?;
}
Ok(())
}
}