1use crate::types::{Mode, Version};
3use std::slice::Iter;
4
5#[cfg(feature = "bench")]
6extern crate test;
7
8#[derive(PartialEq, Eq, Debug, Copy, Clone)]
13pub struct Segment {
14 pub mode: Mode,
16
17 pub begin: usize,
19
20 pub end: usize,
22}
23
24impl Segment {
25 pub fn encoded_len(&self, version: Version) -> usize {
28 let byte_size = self.end - self.begin;
29 let chars_count = if self.mode == Mode::Kanji { byte_size / 2 } else { byte_size };
30
31 let mode_bits_count = version.mode_bits_count();
32 let length_bits_count = self.mode.length_bits_count(version);
33 let data_bits_count = self.mode.data_bits_count(chars_count);
34
35 mode_bits_count + length_bits_count + data_bits_count
36 }
37}
38
39struct EcsIter<I> {
54 base: I,
55 index: usize,
56 ended: bool,
57}
58
59impl<'a, I: Iterator<Item = &'a u8>> Iterator for EcsIter<I> {
60 type Item = (usize, ExclCharSet);
61
62 fn next(&mut self) -> Option<(usize, ExclCharSet)> {
63 if self.ended {
64 return None;
65 }
66
67 match self.base.next() {
68 None => {
69 self.ended = true;
70 Some((self.index, ExclCharSet::End))
71 }
72 Some(c) => {
73 let old_index = self.index;
74 self.index += 1;
75 Some((old_index, ExclCharSet::from_u8(*c)))
76 }
77 }
78 }
79}
80
81pub struct Parser<'a> {
83 ecs_iter: EcsIter<Iter<'a, u8>>,
84 state: State,
85 begin: usize,
86 pending_single_byte: bool,
87}
88
89impl<'a> Parser<'a> {
90 pub fn new(data: &[u8]) -> Parser {
102 Parser {
103 ecs_iter: EcsIter { base: data.iter(), index: 0, ended: false },
104 state: State::Init,
105 begin: 0,
106 pending_single_byte: false,
107 }
108 }
109}
110
111impl<'a> Iterator for Parser<'a> {
112 type Item = Segment;
113
114 fn next(&mut self) -> Option<Segment> {
115 if self.pending_single_byte {
116 self.pending_single_byte = false;
117 self.begin += 1;
118 return Some(Segment { mode: Mode::Byte, begin: self.begin - 1, end: self.begin });
119 }
120
121 loop {
122 let (i, ecs) = match self.ecs_iter.next() {
123 None => return None,
124 Some(a) => a,
125 };
126 let (next_state, action) = STATE_TRANSITION[self.state as usize + ecs as usize];
127 self.state = next_state;
128
129 let old_begin = self.begin;
130 let push_mode = match action {
131 Action::Idle => continue,
132 Action::Numeric => Mode::Numeric,
133 Action::Alpha => Mode::Alphanumeric,
134 Action::Byte => Mode::Byte,
135 Action::Kanji => Mode::Kanji,
136 Action::KanjiAndSingleByte => {
137 let next_begin = i - 1;
138 if self.begin == next_begin {
139 Mode::Byte
140 } else {
141 self.pending_single_byte = true;
142 self.begin = next_begin;
143 return Some(Segment { mode: Mode::Kanji, begin: old_begin, end: next_begin });
144 }
145 }
146 };
147
148 self.begin = i;
149 return Some(Segment { mode: push_mode, begin: old_begin, end: i });
150 }
151 }
152}
153
154#[cfg(test)]
155mod parse_tests {
156 use crate::optimize::{Parser, Segment};
157 use crate::types::Mode;
158
159 fn parse(data: &[u8]) -> Vec<Segment> {
160 Parser::new(data).collect()
161 }
162
163 #[test]
164 fn test_parse_1() {
165 let segs = parse(b"01049123451234591597033130128%10ABC123");
166 assert_eq!(
167 segs,
168 vec![
169 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
170 Segment { mode: Mode::Alphanumeric, begin: 29, end: 30 },
171 Segment { mode: Mode::Numeric, begin: 30, end: 32 },
172 Segment { mode: Mode::Alphanumeric, begin: 32, end: 35 },
173 Segment { mode: Mode::Numeric, begin: 35, end: 38 },
174 ]
175 );
176 }
177
178 #[test]
179 fn test_parse_shift_jis_example_1() {
180 let segs = parse(b"\x82\xa0\x81\x41\x41\xb1\x81\xf0"); assert_eq!(
182 segs,
183 vec![
184 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
185 Segment { mode: Mode::Alphanumeric, begin: 4, end: 5 },
186 Segment { mode: Mode::Byte, begin: 5, end: 6 },
187 Segment { mode: Mode::Kanji, begin: 6, end: 8 },
188 ]
189 );
190 }
191
192 #[test]
193 fn test_parse_utf_8() {
194 let segs = parse(b"\xe3\x81\x82\xe3\x80\x81A\xef\xbd\xb1\xe2\x84\xab");
196 assert_eq!(
197 segs,
198 vec![
199 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
200 Segment { mode: Mode::Byte, begin: 4, end: 5 },
201 Segment { mode: Mode::Kanji, begin: 5, end: 7 },
202 Segment { mode: Mode::Byte, begin: 7, end: 10 },
203 Segment { mode: Mode::Kanji, begin: 10, end: 12 },
204 Segment { mode: Mode::Byte, begin: 12, end: 13 },
205 ]
206 );
207 }
208
209 #[test]
210 fn test_not_kanji_1() {
211 let segs = parse(b"\x81\x30");
212 assert_eq!(
213 segs,
214 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Numeric, begin: 1, end: 2 },]
215 );
216 }
217
218 #[test]
219 fn test_not_kanji_2() {
220 let segs = parse(b"\xeb\xc0");
223 assert_eq!(
224 segs,
225 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Byte, begin: 1, end: 2 },]
226 );
227 }
228
229 #[test]
230 fn test_not_kanji_3() {
231 let segs = parse(b"\x81\x7f");
232 assert_eq!(
233 segs,
234 vec![Segment { mode: Mode::Byte, begin: 0, end: 1 }, Segment { mode: Mode::Byte, begin: 1, end: 2 },]
235 );
236 }
237
238 #[test]
239 fn test_not_kanji_4() {
240 let segs = parse(b"\x81\x40\x81");
241 assert_eq!(
242 segs,
243 vec![Segment { mode: Mode::Kanji, begin: 0, end: 2 }, Segment { mode: Mode::Byte, begin: 2, end: 3 },]
244 );
245 }
246}
247
248pub struct Optimizer<I> {
253 parser: I,
254 last_segment: Segment,
255 last_segment_size: usize,
256 version: Version,
257 ended: bool,
258}
259
260impl<I: Iterator<Item = Segment>> Optimizer<I> {
261 pub fn new(mut segments: I, version: Version) -> Self {
268 match segments.next() {
269 None => Self {
270 parser: segments,
271 last_segment: Segment { mode: Mode::Numeric, begin: 0, end: 0 },
272 last_segment_size: 0,
273 version,
274 ended: true,
275 },
276 Some(segment) => Self {
277 parser: segments,
278 last_segment: segment,
279 last_segment_size: segment.encoded_len(version),
280 version,
281 ended: false,
282 },
283 }
284 }
285}
286
287impl<'a> Parser<'a> {
288 pub fn optimize(self, version: Version) -> Optimizer<Parser<'a>> {
289 Optimizer::new(self, version)
290 }
291}
292
293impl<I: Iterator<Item = Segment>> Iterator for Optimizer<I> {
294 type Item = Segment;
295
296 fn next(&mut self) -> Option<Segment> {
297 if self.ended {
298 return None;
299 }
300
301 loop {
302 match self.parser.next() {
303 None => {
304 self.ended = true;
305 return Some(self.last_segment);
306 }
307 Some(segment) => {
308 let seg_size = segment.encoded_len(self.version);
309
310 let new_segment = Segment {
311 mode: self.last_segment.mode.max(segment.mode),
312 begin: self.last_segment.begin,
313 end: segment.end,
314 };
315 let new_size = new_segment.encoded_len(self.version);
316
317 if self.last_segment_size + seg_size >= new_size {
318 self.last_segment = new_segment;
319 self.last_segment_size = new_size;
320 } else {
321 let old_segment = self.last_segment;
322 self.last_segment = segment;
323 self.last_segment_size = seg_size;
324 return Some(old_segment);
325 }
326 }
327 }
328 }
329 }
330}
331
332pub fn total_encoded_len(segments: &[Segment], version: Version) -> usize {
334 segments.iter().map(|seg| seg.encoded_len(version)).sum()
335}
336
337#[cfg(test)]
338mod optimize_tests {
339 use crate::optimize::{total_encoded_len, Optimizer, Segment};
340 use crate::types::{Mode, Version};
341
342 fn test_optimization_result(given: Vec<Segment>, expected: Vec<Segment>, version: Version) {
343 let prev_len = total_encoded_len(&*given, version);
344 let opt_segs = Optimizer::new(given.iter().map(|seg| *seg), version).collect::<Vec<_>>();
345 let new_len = total_encoded_len(&*opt_segs, version);
346 if given != opt_segs {
347 assert!(prev_len > new_len, "{} > {}", prev_len, new_len);
348 }
349 assert!(
350 opt_segs == expected,
351 "Optimization gave something better: {} < {} ({:?})",
352 new_len,
353 total_encoded_len(&*expected, version),
354 opt_segs
355 );
356 }
357
358 #[test]
359 fn test_example_1() {
360 test_optimization_result(
361 vec![
362 Segment { mode: Mode::Alphanumeric, begin: 0, end: 3 },
363 Segment { mode: Mode::Numeric, begin: 3, end: 6 },
364 Segment { mode: Mode::Byte, begin: 6, end: 10 },
365 ],
366 vec![
367 Segment { mode: Mode::Alphanumeric, begin: 0, end: 6 },
368 Segment { mode: Mode::Byte, begin: 6, end: 10 },
369 ],
370 Version::Normal(1),
371 );
372 }
373
374 #[test]
375 fn test_example_2() {
376 test_optimization_result(
377 vec![
378 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
379 Segment { mode: Mode::Alphanumeric, begin: 29, end: 30 },
380 Segment { mode: Mode::Numeric, begin: 30, end: 32 },
381 Segment { mode: Mode::Alphanumeric, begin: 32, end: 35 },
382 Segment { mode: Mode::Numeric, begin: 35, end: 38 },
383 ],
384 vec![
385 Segment { mode: Mode::Numeric, begin: 0, end: 29 },
386 Segment { mode: Mode::Alphanumeric, begin: 29, end: 38 },
387 ],
388 Version::Normal(9),
389 );
390 }
391
392 #[test]
393 fn test_example_3() {
394 test_optimization_result(
395 vec![
396 Segment { mode: Mode::Kanji, begin: 0, end: 4 },
397 Segment { mode: Mode::Alphanumeric, begin: 4, end: 5 },
398 Segment { mode: Mode::Byte, begin: 5, end: 6 },
399 Segment { mode: Mode::Kanji, begin: 6, end: 8 },
400 ],
401 vec![Segment { mode: Mode::Byte, begin: 0, end: 8 }],
402 Version::Normal(1),
403 );
404 }
405
406 #[test]
407 fn test_example_4() {
408 test_optimization_result(
409 vec![Segment { mode: Mode::Kanji, begin: 0, end: 10 }, Segment { mode: Mode::Byte, begin: 10, end: 11 }],
410 vec![Segment { mode: Mode::Kanji, begin: 0, end: 10 }, Segment { mode: Mode::Byte, begin: 10, end: 11 }],
411 Version::Normal(1),
412 );
413 }
414
415 #[test]
416 fn test_annex_j_guideline_1a() {
417 test_optimization_result(
418 vec![
419 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
420 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
421 ],
422 vec![
423 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
424 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
425 ],
426 Version::Micro(2),
427 );
428 }
429
430 #[test]
431 fn test_annex_j_guideline_1b() {
432 test_optimization_result(
433 vec![
434 Segment { mode: Mode::Numeric, begin: 0, end: 2 },
435 Segment { mode: Mode::Alphanumeric, begin: 2, end: 4 },
436 ],
437 vec![Segment { mode: Mode::Alphanumeric, begin: 0, end: 4 }],
438 Version::Micro(2),
439 );
440 }
441
442 #[test]
443 fn test_annex_j_guideline_1c() {
444 test_optimization_result(
445 vec![
446 Segment { mode: Mode::Numeric, begin: 0, end: 3 },
447 Segment { mode: Mode::Alphanumeric, begin: 3, end: 4 },
448 ],
449 vec![Segment { mode: Mode::Alphanumeric, begin: 0, end: 4 }],
450 Version::Micro(3),
451 );
452 }
453}
454
455#[cfg(feature = "bench")]
456#[bench]
457fn bench_optimize(bencher: &mut test::Bencher) {
458 use crate::types::Version;
459
460 let data = b"QR\x83R\x81[\x83h\x81i\x83L\x83\x85\x81[\x83A\x81[\x83\x8b\x83R\x81[\x83h\x81j\
461 \x82\xc6\x82\xcd\x81A1994\x94N\x82\xc9\x83f\x83\x93\x83\\\x81[\x82\xcc\x8aJ\
462 \x94\xad\x95\x94\x96\xe5\x81i\x8c\xbb\x8d\xdd\x82\xcd\x95\xaa\x97\xa3\x82\xb5\x83f\
463 \x83\x93\x83\\\x81[\x83E\x83F\x81[\x83u\x81j\x82\xaa\x8aJ\x94\xad\x82\xb5\x82\xbd\
464 \x83}\x83g\x83\x8a\x83b\x83N\x83X\x8c^\x93\xf1\x8e\x9f\x8c\xb3\x83R\x81[\x83h\
465 \x82\xc5\x82\xa0\x82\xe9\x81B\x82\xc8\x82\xa8\x81AQR\x83R\x81[\x83h\x82\xc6\
466 \x82\xa2\x82\xa4\x96\xbc\x8f\xcc\x81i\x82\xa8\x82\xe6\x82\xd1\x92P\x8c\xea\x81j\
467 \x82\xcd\x83f\x83\x93\x83\\\x81[\x83E\x83F\x81[\x83u\x82\xcc\x93o\x98^\x8f\xa4\
468 \x95W\x81i\x91\xe64075066\x8d\x86\x81j\x82\xc5\x82\xa0\x82\xe9\x81BQR\x82\xcd\
469 Quick Response\x82\xc9\x97R\x97\x88\x82\xb5\x81A\x8d\x82\x91\xac\x93\xc7\x82\xdd\
470 \x8e\xe6\x82\xe8\x82\xaa\x82\xc5\x82\xab\x82\xe9\x82\xe6\x82\xa4\x82\xc9\x8aJ\
471 \x94\xad\x82\xb3\x82\xea\x82\xbd\x81B\x93\x96\x8f\x89\x82\xcd\x8e\xa9\x93\xae\
472 \x8e\xd4\x95\x94\x95i\x8dH\x8f\xea\x82\xe2\x94z\x91\x97\x83Z\x83\x93\x83^\x81[\
473 \x82\xc8\x82\xc7\x82\xc5\x82\xcc\x8eg\x97p\x82\xf0\x94O\x93\xaa\x82\xc9\x8aJ\
474 \x94\xad\x82\xb3\x82\xea\x82\xbd\x82\xaa\x81A\x8c\xbb\x8d\xdd\x82\xc5\x82\xcd\x83X\
475 \x83}\x81[\x83g\x83t\x83H\x83\x93\x82\xcc\x95\x81\x8by\x82\xc8\x82\xc7\x82\xc9\
476 \x82\xe6\x82\xe8\x93\xfa\x96{\x82\xc9\x8c\xc0\x82\xe7\x82\xb8\x90\xa2\x8aE\x93I\
477 \x82\xc9\x95\x81\x8by\x82\xb5\x82\xc4\x82\xa2\x82\xe9\x81B";
478 bencher.iter(|| Parser::new(data).optimize(Version::Normal(15)));
479}
480
481#[derive(Copy, Clone)]
489enum ExclCharSet {
490 End = 0,
492
493 Symbol = 1,
496
497 Numeric = 2,
499
500 Alpha = 3,
503
504 KanjiHi1 = 4,
506
507 KanjiHi2 = 5,
509
510 KanjiHi3 = 6,
514
515 KanjiLo1 = 7,
519
520 KanjiLo2 = 8,
525
526 Byte = 9,
528}
529
530impl ExclCharSet {
531 fn from_u8(c: u8) -> Self {
533 match c {
534 0x20 | 0x24 | 0x25 | 0x2a | 0x2b | 0x2d..=0x2f | 0x3a => ExclCharSet::Symbol,
535 0x30..=0x39 => ExclCharSet::Numeric,
536 0x41..=0x5a => ExclCharSet::Alpha,
537 0x81..=0x9f => ExclCharSet::KanjiHi1,
538 0xe0..=0xea => ExclCharSet::KanjiHi2,
539 0xeb => ExclCharSet::KanjiHi3,
540 0x40 | 0x5b..=0x7e | 0x80 | 0xa0..=0xbf => ExclCharSet::KanjiLo1,
541 0xc0..=0xdf | 0xec..=0xfc => ExclCharSet::KanjiLo2,
542 _ => ExclCharSet::Byte,
543 }
544 }
545}
546
547#[derive(Copy, Clone)]
549enum State {
550 Init = 0,
552
553 Numeric = 10,
555
556 Alpha = 20,
558
559 Byte = 30,
561
562 KanjiHi12 = 40,
565
566 KanjiHi3 = 50,
569
570 Kanji = 60,
572}
573
574#[derive(Copy, Clone)]
576enum Action {
577 Idle,
579
580 Numeric,
582
583 Alpha,
585
586 Byte,
588
589 Kanji,
591
592 KanjiAndSingleByte,
595}
596
597static STATE_TRANSITION: [(State, Action); 70] = [
598 (State::Init, Action::Idle), (State::Alpha, Action::Idle), (State::Numeric, Action::Idle), (State::Alpha, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi3, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Init, Action::Numeric), (State::Alpha, Action::Numeric), (State::Numeric, Action::Idle), (State::Alpha, Action::Numeric), (State::KanjiHi12, Action::Numeric), (State::KanjiHi12, Action::Numeric), (State::KanjiHi3, Action::Numeric), (State::Byte, Action::Numeric), (State::Byte, Action::Numeric), (State::Byte, Action::Numeric), (State::Init, Action::Alpha), (State::Alpha, Action::Idle), (State::Numeric, Action::Alpha), (State::Alpha, Action::Idle), (State::KanjiHi12, Action::Alpha), (State::KanjiHi12, Action::Alpha), (State::KanjiHi3, Action::Alpha), (State::Byte, Action::Alpha), (State::Byte, Action::Alpha), (State::Byte, Action::Alpha), (State::Init, Action::Byte), (State::Alpha, Action::Byte), (State::Numeric, Action::Byte), (State::Alpha, Action::Byte), (State::KanjiHi12, Action::Byte), (State::KanjiHi12, Action::Byte), (State::KanjiHi3, Action::Byte), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Byte, Action::Idle), (State::Init, Action::KanjiAndSingleByte), (State::Alpha, Action::KanjiAndSingleByte), (State::Numeric, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::Byte, Action::KanjiAndSingleByte), (State::Init, Action::KanjiAndSingleByte), (State::Alpha, Action::KanjiAndSingleByte), (State::Numeric, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Kanji, Action::Idle), (State::KanjiHi12, Action::KanjiAndSingleByte), (State::KanjiHi3, Action::KanjiAndSingleByte), (State::Kanji, Action::Idle), (State::Byte, Action::KanjiAndSingleByte), (State::Byte, Action::KanjiAndSingleByte), (State::Init, Action::Kanji), (State::Alpha, Action::Kanji), (State::Numeric, Action::Kanji), (State::Alpha, Action::Kanji), (State::KanjiHi12, Action::Idle), (State::KanjiHi12, Action::Idle), (State::KanjiHi3, Action::Idle), (State::Byte, Action::Kanji), (State::Byte, Action::Kanji), (State::Byte, Action::Kanji), ];
678
679