array_tool/
string.rs

1// Copyright 2015-2017 Daniel P. Clark & array_tool Developers
2// 
3// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
4// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
5// http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8/// A grapheme iterator that produces the bytes for each grapheme.
9#[derive(Debug)]
10pub struct GraphemeBytesIter<'a> {
11  source: &'a str,
12  offset: usize,
13  grapheme_count: usize,
14}
15impl<'a> GraphemeBytesIter<'a> {
16  /// Creates a new grapheme iterator from a string source.
17  pub fn new(source: &'a str) -> GraphemeBytesIter<'a> {
18    GraphemeBytesIter {
19      source: source,
20      offset: 0,
21      grapheme_count: 0,
22    }
23  }
24}
25impl<'a> Iterator for GraphemeBytesIter<'a> {
26  type Item = &'a [u8];
27
28  fn next(&mut self) -> Option<&'a [u8]> {
29    let mut result: Option<&[u8]> = None;
30    let mut idx = self.offset;
31    for _ in self.offset..self.source.len() {
32      idx += 1;
33      if self.offset < self.source.len() {
34        if self.source.is_char_boundary(idx) {
35          let slice: &[u8] = self.source[self.offset..idx].as_bytes();
36
37          self.grapheme_count += 1;
38          self.offset = idx;
39
40          result = Some(slice);
41          break
42        }
43      }
44    }
45    result
46  }
47}
48impl<'a> ExactSizeIterator for GraphemeBytesIter<'a> {
49  fn len(&self) -> usize {
50    self.source.chars().count()
51  }
52}
53/// ToGraphemeBytesIter - create an iterator to return bytes for each grapheme in a string.
54pub trait ToGraphemeBytesIter<'a> {
55  /// Returns a GraphemeBytesIter which you may iterate over.
56  ///
57  /// # Example
58  /// ```
59  /// use array_tool::string::ToGraphemeBytesIter;
60  ///
61  /// let string = "a s—d féZ";
62  /// let mut graphemes = string.grapheme_bytes_iter();
63  /// graphemes.skip(3).next();
64  /// ```
65  ///
66  /// # Output
67  /// ```text
68  /// [226, 128, 148]
69  /// ```
70  fn grapheme_bytes_iter(&'a self) -> GraphemeBytesIter<'a>;
71}
72impl<'a> ToGraphemeBytesIter<'a> for str {
73  fn grapheme_bytes_iter(&'a self) -> GraphemeBytesIter<'a> {
74    GraphemeBytesIter::new(&self)
75  }
76}
77
78/// Squeeze - squeezes duplicate characters down to one each
79pub trait Squeeze {
80  /// # Example
81  /// ```
82  /// use array_tool::string::Squeeze;
83  ///
84  /// "yellow moon".squeeze("");
85  /// ```
86  ///
87  /// # Output
88  /// ```text
89  /// "yelow mon"
90  /// ```
91  fn squeeze(&self, targets: &'static str) -> String;
92}
93impl Squeeze for str {
94  fn squeeze(&self, targets: &'static str) -> String {
95    let mut output = Vec::<u8>::with_capacity(self.len());
96    let everything: bool = targets.is_empty();
97    let chars = targets.grapheme_bytes_iter().collect::<Vec<&[u8]>>();
98    let mut last: &[u8] = &[0];
99    for character in self.grapheme_bytes_iter() {
100      if last != character {
101        output.extend_from_slice(character);
102      } else if !(everything || chars.contains(&character)) {
103        output.extend_from_slice(character);
104      }
105      last = character;
106    }
107    String::from_utf8(output).expect("squeeze failed to render String!")
108  }
109}
110
111/// Justify - expand line to given width.
112pub trait Justify {
113  /// # Example
114  /// ```
115  /// use array_tool::string::Justify;
116  ///
117  /// "asd asdf asd".justify_line(14);
118  /// ```
119  ///
120  /// # Output
121  /// ```text
122  /// "asd  asdf  asd"
123  /// ```
124  fn justify_line(&self, width: usize) -> String;
125}
126
127impl Justify for str {
128  fn justify_line(&self, width: usize) -> String {
129    if self.is_empty() { return format!("{}", self) };
130    let trimmed = self.trim() ;
131    let len = trimmed.chars().count();
132    if len >= width { return self.to_string(); };
133    let difference = width - len;
134    let iter = trimmed.split_whitespace();
135    let spaces = iter.count() - 1;
136    let mut iter = trimmed.split_whitespace().peekable();
137    if spaces == 0 { return self.to_string(); }
138    let mut obj = String::with_capacity(trimmed.len() + spaces);
139
140    let div = difference / spaces;
141    let mut remainder = difference % spaces;
142
143    while let Some(x) = iter.next() {
144      obj.push_str( x );
145      let val = if remainder > 0 {
146        remainder = remainder - 1;
147        div + 1
148      } else { div };
149      for _ in 0..val+1 {
150        if let Some(_) = iter.peek() { // Don't add spaces if last word
151          obj.push_str( " " );
152        }
153      }
154    }
155    obj
156  }
157}
158
159/// Substitute string character for each index given.
160pub trait SubstMarks {
161  /// # Example
162  /// ```
163  /// use array_tool::string::SubstMarks;
164  ///
165  /// "asdf asdf asdf".subst_marks(vec![0,5,8], "Z");
166  /// ```
167  ///
168  /// # Output
169  /// ```text
170  /// "Zsdf ZsdZ asdf"
171  /// ```
172  fn subst_marks(&self, marks: Vec<usize>, chr: &'static str) -> String;
173}
174impl SubstMarks for str {
175  fn subst_marks(&self, marks: Vec<usize>, chr: &'static str) -> String {
176    let mut output = Vec::<u8>::with_capacity(self.len());
177    let mut count = 0;
178    let mut last = 0;
179    for i in 0..self.len() {
180      let idx = i + 1;
181      if self.is_char_boundary(idx) {
182        if marks.contains(&count) {
183          count += 1;
184          last = idx;
185          output.extend_from_slice(chr.as_bytes());
186          continue
187        }
188
189        let slice: &[u8] = self[last..idx].as_bytes();
190        output.extend_from_slice(slice);
191
192        count += 1;
193        last = idx
194      }
195    }
196    String::from_utf8(output).expect("subst_marks failed to render String!")
197  }
198}
199
200/// After whitespace
201pub trait AfterWhitespace {
202  /// Given offset method will seek from there to end of string to find the first
203  /// non white space.  Resulting value is counted from offset.
204  ///
205  /// # Example
206  /// ```
207  /// use array_tool::string::AfterWhitespace;
208  ///
209  /// assert_eq!(
210  ///   "asdf           asdf asdf".seek_end_of_whitespace(6),
211  ///   Some(9)
212  /// );
213  /// ```
214  fn seek_end_of_whitespace(&self, offset: usize) -> Option<usize>;
215}
216impl AfterWhitespace for str {
217  fn seek_end_of_whitespace(&self, offset: usize) -> Option<usize> {
218    if self.len() < offset { return None; };
219    let mut seeker = self[offset..self.len()].chars();
220    let mut val = None;
221    let mut indx = 0;
222    while let Some(x) = seeker.next() {
223      if x.ne(&" ".chars().next().unwrap()) {
224        val = Some(indx);
225        break;
226      }
227      indx += 1;
228    }
229    val
230  }
231}
232
233/// Word wrapping
234pub trait WordWrap {
235  ///  White space is treated as valid content and new lines will only be swapped in for
236  ///  the last white space character at the end of the given width.  White space may reach beyond
237  ///  the width you've provided.  You will need to trim end of lines in your own output (e.g.
238  ///  splitting string at each new line and printing the line with trim_right).  Or just trust
239  ///  that lines that are beyond the width are just white space and only print the width -
240  ///  ignoring tailing white space.
241  ///
242  /// # Example
243  /// ```
244  /// use array_tool::string::WordWrap;
245  ///
246  /// "asd asdf asd".word_wrap(8);
247  /// ```
248  ///
249  /// # Output
250  /// ```text
251  /// "asd asdf\nasd"
252  /// ```
253  fn word_wrap(&self, width: usize) -> String;
254}
255// No need to worry about character encoding since we're only checking for the
256// space and new line characters.
257impl WordWrap for &'static str {
258  fn word_wrap(&self, width: usize) -> String {
259    let mut markers = vec![];
260    fn wordwrap(t: &'static str, chunk: usize, offset: usize, mrkrs: &mut Vec<usize>) -> String {
261      match t[offset..*vec![offset+chunk,t.len()].iter().min().unwrap()].rfind("\n") {
262        None => {
263          match t[offset..*vec![offset+chunk,t.len()].iter().min().unwrap()].rfind(" ") {
264            Some(x) => {
265              let mut eows = x; // end of white space
266              if offset+chunk < t.len() { // check if white space continues
267                match t.seek_end_of_whitespace(offset+x) {
268                  Some(a) => {
269                    if a.ne(&0) {
270                      eows = x+a-1;
271                    }
272                  },
273                  None => {},
274                }
275              }
276              if offset+chunk < t.len() { // safe to seek ahead by 1 or not end of string
277                if !["\n".chars().next().unwrap(), " ".chars().next().unwrap()].contains(
278                  &t[offset+eows+1..offset+eows+2].chars().next().unwrap()
279                  ) {
280                  mrkrs.push(offset+eows)
281                }
282              };
283              wordwrap(t, chunk, offset+eows+1, mrkrs)
284            },
285            None => { 
286              if offset+chunk < t.len() { // String may continue
287                wordwrap(t, chunk, offset+1, mrkrs) // Recurse + 1 until next space
288              } else {
289                use string::SubstMarks;
290
291                return t.subst_marks(mrkrs.to_vec(), "\n")
292              }
293            },
294          }
295        },
296        Some(x) => {
297          wordwrap(t, chunk, offset+x+1, mrkrs)
298        },
299      }
300    };
301    wordwrap(self, width+1, 0, &mut markers)
302  }
303}