coqui_stt/
candidate_transcript.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
use crate::{OwnedTokenMetadata, TokenMetadata};
use std::fmt::{Display, Formatter, Write};

/// A single transcript computed by the model,
/// including a confidence value and the metadata for its constituent tokens.
#[repr(transparent)]
pub struct CandidateTranscript(coqui_stt_sys::CandidateTranscript);

unsafe impl Send for CandidateTranscript {}
unsafe impl Sync for CandidateTranscript {}

impl CandidateTranscript {
    /// Return an array of tokens in this transcript.
    #[inline]
    #[must_use]
    pub fn tokens(&self) -> &[TokenMetadata] {
        let data = self.0.tokens.cast();
        let len = self.num_tokens() as usize;

        // SAFETY: the inner objects will always be of type TokenMetadata,
        // and the length will always be proper
        unsafe { std::slice::from_raw_parts(data, len) }
    }

    /// Approximated confidence value for this transcript.
    /// This is roughly the sum of the acoustic model logit values for
    /// each timestep/character that contributed to the creation of this transcript.
    #[inline]
    #[must_use]
    pub const fn confidence(&self) -> f64 {
        self.0.confidence
    }

    /// Total number of tokens in this transcript.
    #[inline]
    #[must_use]
    pub const fn num_tokens(&self) -> u32 {
        self.0.num_tokens
    }

    /// Convert this into an [`OwnedCandidateTranscript`](OwnedCandidateTranscript) struct.
    ///
    /// **Warning**: this can be very expensive depending on the total number of tokens in this object.
    #[inline]
    #[must_use]
    pub fn to_owned(&self) -> OwnedCandidateTranscript {
        let tokens = self.tokens().iter().map(TokenMetadata::to_owned).collect();
        OwnedCandidateTranscript {
            tokens,
            confidence: self.confidence(),
        }
    }
}

/// An owned variant of [`CandidateTranscript`](CandidateTranscript).
#[derive(Clone, Debug)]
pub struct OwnedCandidateTranscript {
    tokens: Vec<OwnedTokenMetadata>,
    confidence: f64,
}

impl OwnedCandidateTranscript {
    /// Return an array of tokens in this transcript.
    #[inline]
    #[must_use]
    pub fn tokens(&self) -> &[OwnedTokenMetadata] {
        &self.tokens[..]
    }

    /// Approximated confidence value for this transcript.
    /// This is roughly the sum of the acoustic model logit values for
    /// each timestep/character that contributed to the creation of this transcript.
    #[inline]
    #[must_use]
    pub const fn confidence(&self) -> f64 {
        self.confidence
    }

    /// Total number of tokens in this transcript.
    #[inline]
    #[must_use]
    pub fn num_tokens(&self) -> usize {
        self.tokens.len()
    }
}

impl Display for OwnedCandidateTranscript {
    #[inline]
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        for token in &self.tokens {
            f.write_str(&token.text)?;
            f.write_char(' ')?;
        }
        Ok(())
    }
}