ttf_parser/tables/cmap/
format2.rs

1// This table has a pretty complex parsing algorithm.
2// A detailed explanation can be found here:
3// https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table
4// https://developer.apple.com/fonts/TrueType-Reference-Manual/RM06/Chap6cmap.html
5// https://github.com/fonttools/fonttools/blob/a360252709a3d65f899915db0a5bd753007fdbb7/Lib/fontTools/ttLib/tables/_c_m_a_p.py#L360
6
7use core::convert::TryFrom;
8
9use crate::parser::{FromData, LazyArray16, Stream};
10use crate::GlyphId;
11
12#[derive(Clone, Copy)]
13struct SubHeaderRecord {
14    first_code: u16,
15    entry_count: u16,
16    id_delta: i16,
17    id_range_offset: u16,
18}
19
20impl FromData for SubHeaderRecord {
21    const SIZE: usize = 8;
22
23    #[inline]
24    fn parse(data: &[u8]) -> Option<Self> {
25        let mut s = Stream::new(data);
26        Some(SubHeaderRecord {
27            first_code: s.read::<u16>()?,
28            entry_count: s.read::<u16>()?,
29            id_delta: s.read::<i16>()?,
30            id_range_offset: s.read::<u16>()?,
31        })
32    }
33}
34
35/// A [format 2](https://docs.microsoft.com/en-us/typography/opentype/spec/cmap#format-2-high-byte-mapping-through-table)
36/// subtable.
37#[derive(Clone, Copy)]
38pub struct Subtable2<'a> {
39    sub_header_keys: LazyArray16<'a, u16>,
40    sub_headers_offset: usize,
41    sub_headers: LazyArray16<'a, SubHeaderRecord>,
42    // The whole subtable data.
43    data: &'a [u8],
44}
45
46impl<'a> Subtable2<'a> {
47    /// Parses a subtable from raw data.
48    pub fn parse(data: &'a [u8]) -> Option<Self> {
49        let mut s = Stream::new(data);
50        s.skip::<u16>(); // format
51        s.skip::<u16>(); // length
52        s.skip::<u16>(); // language
53        let sub_header_keys = s.read_array16::<u16>(256)?;
54        // The maximum index in a sub_header_keys is a sub_headers count.
55        let sub_headers_count = sub_header_keys.into_iter().map(|n| n / 8).max()? + 1;
56
57        // Remember sub_headers offset before reading. Will be used later.
58        let sub_headers_offset = s.offset();
59        let sub_headers = s.read_array16::<SubHeaderRecord>(sub_headers_count)?;
60
61        Some(Self {
62            sub_header_keys,
63            sub_headers_offset,
64            sub_headers,
65            data,
66        })
67    }
68
69    /// Returns a glyph index for a code point.
70    ///
71    /// Returns `None` when `code_point` is larger than `u16`.
72    pub fn glyph_index(&self, code_point: u32) -> Option<GlyphId> {
73        // This subtable supports code points only in a u16 range.
74        let code_point = u16::try_from(code_point).ok()?;
75
76        let code_point = code_point;
77        let high_byte = code_point >> 8;
78        let low_byte = code_point & 0x00FF;
79
80        let i = if code_point < 0xff {
81            // 'SubHeader 0 is special: it is used for single-byte character codes.'
82            0
83        } else {
84            // 'Array that maps high bytes to subHeaders: value is subHeader index × 8.'
85            self.sub_header_keys.get(high_byte)? / 8
86        };
87
88        let sub_header = self.sub_headers.get(i)?;
89
90        let first_code = sub_header.first_code;
91        let range_end = first_code.checked_add(sub_header.entry_count)?;
92        if low_byte < first_code || low_byte >= range_end {
93            return None;
94        }
95
96        // SubHeaderRecord::id_range_offset points to SubHeaderRecord::first_code
97        // in the glyphIndexArray. So we have to advance to our code point.
98        let index_offset = usize::from(low_byte.checked_sub(first_code)?) * u16::SIZE;
99
100        // 'The value of the idRangeOffset is the number of bytes
101        // past the actual location of the idRangeOffset'.
102        let offset =
103            self.sub_headers_offset
104                // Advance to required subheader.
105                + SubHeaderRecord::SIZE * usize::from(i + 1)
106                // Move back to idRangeOffset start.
107                - u16::SIZE
108                // Use defined offset.
109                + usize::from(sub_header.id_range_offset)
110                // Advance to required index in the glyphIndexArray.
111                + index_offset;
112
113        let glyph: u16 = Stream::read_at(self.data, offset)?;
114        if glyph == 0 {
115            return None;
116        }
117
118        u16::try_from((i32::from(glyph) + i32::from(sub_header.id_delta)) % 65536).ok().map(GlyphId)
119    }
120
121    /// Calls `f` for each codepoint defined in this table.
122    pub fn codepoints(&self, f: impl FnMut(u32)) {
123        let _ = self.codepoints_inner(f);
124    }
125
126    #[inline]
127    fn codepoints_inner(&self, mut f: impl FnMut(u32)) -> Option<()> {
128        for first_byte in 0u16..256 {
129            let i = self.sub_header_keys.get(first_byte)? / 8;
130            let sub_header = self.sub_headers.get(i)?;
131            let first_code = sub_header.first_code;
132
133            if i == 0 {
134                // This is a single byte code.
135                let range_end = first_code.checked_add(sub_header.entry_count)?;
136                if first_byte >= first_code && first_byte < range_end {
137                    f(u32::from(first_byte));
138                }
139            } else {
140                // This is a two byte code.
141                let base = first_code.checked_add(first_byte << 8)?;
142                for k in 0..sub_header.entry_count {
143                    let code_point = base.checked_add(k)?;
144                    f(u32::from(code_point));
145                }
146            }
147        }
148
149        Some(())
150    }
151}
152
153impl core::fmt::Debug for Subtable2<'_> {
154    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
155        write!(f, "Subtable2 {{ ... }}")
156    }
157}