array_tool/
string.rs

Help
// Copyright 2015-2017 Daniel P. Clark & array_tool Developers
// 
// Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
// http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
// http://opensource.org/licenses/MIT>, at your option. This file may not be
// copied, modified, or distributed except according to those terms.

/// A grapheme iterator that produces the bytes for each grapheme.
#[derive(Debug)]
pub struct GraphemeBytesIter<'a> {
  source: &'a str,
  offset: usize,
  grapheme_count: usize,
}
impl<'a> GraphemeBytesIter<'a> {
  /// Creates a new grapheme iterator from a string source.
  pub fn new(source: &'a str) -> GraphemeBytesIter<'a> {
    GraphemeBytesIter {
      source: source,
      offset: 0,
      grapheme_count: 0,
    }
  }
}
impl<'a> Iterator for GraphemeBytesIter<'a> {
  type Item = &'a [u8];

  fn next(&mut self) -> Option<&'a [u8]> {
    let mut result: Option<&[u8]> = None;
    let mut idx = self.offset;
    for _ in self.offset..self.source.len() {
      idx += 1;
      if self.offset < self.source.len() {
        if self.source.is_char_boundary(idx) {
          let slice: &[u8] = self.source[self.offset..idx].as_bytes();

          self.grapheme_count += 1;
          self.offset = idx;

          result = Some(slice);
          break
        }
      }
    }
    result
  }
}
impl<'a> ExactSizeIterator for GraphemeBytesIter<'a> {
  fn len(&self) -> usize {
    self.source.chars().count()
  }
}
/// ToGraphemeBytesIter - create an iterator to return bytes for each grapheme in a string.
pub trait ToGraphemeBytesIter<'a> {
  /// Returns a GraphemeBytesIter which you may iterate over.
  ///
  /// # Example
  /// ```
  /// use array_tool::string::ToGraphemeBytesIter;
  ///
  /// let string = "a s—d féZ";
  /// let mut graphemes = string.grapheme_bytes_iter();
  /// graphemes.skip(3).next();
  /// ```
  ///
  /// # Output
  /// ```text
  /// [226, 128, 148]
  /// ```
  fn grapheme_bytes_iter(&'a self) -> GraphemeBytesIter<'a>;
}
impl<'a> ToGraphemeBytesIter<'a> for str {
  fn grapheme_bytes_iter(&'a self) -> GraphemeBytesIter<'a> {
    GraphemeBytesIter::new(&self)
  }
}

/// Squeeze - squeezes duplicate characters down to one each
pub trait Squeeze {
  /// # Example
  /// ```
  /// use array_tool::string::Squeeze;
  ///
  /// "yellow moon".squeeze("");
  /// ```
  ///
  /// # Output
  /// ```text
  /// "yelow mon"
  /// ```
  fn squeeze(&self, targets: &'static str) -> String;
}
impl Squeeze for str {
  fn squeeze(&self, targets: &'static str) -> String {
    let mut output = Vec::<u8>::with_capacity(self.len());
    let everything: bool = targets.is_empty();
    let chars = targets.grapheme_bytes_iter().collect::<Vec<&[u8]>>();
    let mut last: &[u8] = &[0];
    for character in self.grapheme_bytes_iter() {
      if last != character {
        output.extend_from_slice(character);
      } else if !(everything || chars.contains(&character)) {
        output.extend_from_slice(character);
      }
      last = character;
    }
    String::from_utf8(output).expect("squeeze failed to render String!")
  }
}

/// Justify - expand line to given width.
pub trait Justify {
  /// # Example
  /// ```
  /// use array_tool::string::Justify;
  ///
  /// "asd asdf asd".justify_line(14);
  /// ```
  ///
  /// # Output
  /// ```text
  /// "asd  asdf  asd"
  /// ```
  fn justify_line(&self, width: usize) -> String;
}

impl Justify for str {
  fn justify_line(&self, width: usize) -> String {
    if self.is_empty() { return format!("{}", self) };
    let trimmed = self.trim() ;
    let len = trimmed.chars().count();
    if len >= width { return self.to_string(); };
    let difference = width - len;
    let iter = trimmed.split_whitespace();
    let spaces = iter.count() - 1;
    let mut iter = trimmed.split_whitespace().peekable();
    if spaces == 0 { return self.to_string(); }
    let mut obj = String::with_capacity(trimmed.len() + spaces);

    let div = difference / spaces;
    let mut remainder = difference % spaces;

    while let Some(x) = iter.next() {
      obj.push_str( x );
      let val = if remainder > 0 {
        remainder = remainder - 1;
        div + 1
      } else { div };
      for _ in 0..val+1 {
        if let Some(_) = iter.peek() { // Don't add spaces if last word
          obj.push_str( " " );
        }
      }
    }
    obj
  }
}

/// Substitute string character for each index given.
pub trait SubstMarks {
  /// # Example
  /// ```
  /// use array_tool::string::SubstMarks;
  ///
  /// "asdf asdf asdf".subst_marks(vec![0,5,8], "Z");
  /// ```
  ///
  /// # Output
  /// ```text
  /// "Zsdf ZsdZ asdf"
  /// ```
  fn subst_marks(&self, marks: Vec<usize>, chr: &'static str) -> String;
}
impl SubstMarks for str {
  fn subst_marks(&self, marks: Vec<usize>, chr: &'static str) -> String {
    let mut output = Vec::<u8>::with_capacity(self.len());
    let mut count = 0;
    let mut last = 0;
    for i in 0..self.len() {
      let idx = i + 1;
      if self.is_char_boundary(idx) {
        if marks.contains(&count) {
          count += 1;
          last = idx;
          output.extend_from_slice(chr.as_bytes());
          continue
        }

        let slice: &[u8] = self[last..idx].as_bytes();
        output.extend_from_slice(slice);

        count += 1;
        last = idx
      }
    }
    String::from_utf8(output).expect("subst_marks failed to render String!")
  }
}

/// After whitespace
pub trait AfterWhitespace {
  /// Given offset method will seek from there to end of string to find the first
  /// non white space.  Resulting value is counted from offset.
  ///
  /// # Example
  /// ```
  /// use array_tool::string::AfterWhitespace;
  ///
  /// assert_eq!(
  ///   "asdf           asdf asdf".seek_end_of_whitespace(6),
  ///   Some(9)
  /// );
  /// ```
  fn seek_end_of_whitespace(&self, offset: usize) -> Option<usize>;
}
impl AfterWhitespace for str {
  fn seek_end_of_whitespace(&self, offset: usize) -> Option<usize> {
    if self.len() < offset { return None; };
    let mut seeker = self[offset..self.len()].chars();
    let mut val = None;
    let mut indx = 0;
    while let Some(x) = seeker.next() {
      if x.ne(&" ".chars().next().unwrap()) {
        val = Some(indx);
        break;
      }
      indx += 1;
    }
    val
  }
}

/// Word wrapping
pub trait WordWrap {
  ///  White space is treated as valid content and new lines will only be swapped in for
  ///  the last white space character at the end of the given width.  White space may reach beyond
  ///  the width you've provided.  You will need to trim end of lines in your own output (e.g.
  ///  splitting string at each new line and printing the line with trim_right).  Or just trust
  ///  that lines that are beyond the width are just white space and only print the width -
  ///  ignoring tailing white space.
  ///
  /// # Example
  /// ```
  /// use array_tool::string::WordWrap;
  ///
  /// "asd asdf asd".word_wrap(8);
  /// ```
  ///
  /// # Output
  /// ```text
  /// "asd asdf\nasd"
  /// ```
  fn word_wrap(&self, width: usize) -> String;
}
// No need to worry about character encoding since we're only checking for the
// space and new line characters.
impl WordWrap for &'static str {
  fn word_wrap(&self, width: usize) -> String {
    let mut markers = vec![];
    fn wordwrap(t: &'static str, chunk: usize, offset: usize, mrkrs: &mut Vec<usize>) -> String {
      match t[offset..*vec![offset+chunk,t.len()].iter().min().unwrap()].rfind("\n") {
        None => {
          match t[offset..*vec![offset+chunk,t.len()].iter().min().unwrap()].rfind(" ") {
            Some(x) => {
              let mut eows = x; // end of white space
              if offset+chunk < t.len() { // check if white space continues
                match t.seek_end_of_whitespace(offset+x) {
                  Some(a) => {
                    if a.ne(&0) {
                      eows = x+a-1;
                    }
                  },
                  None => {},
                }
              }
              if offset+chunk < t.len() { // safe to seek ahead by 1 or not end of string
                if !["\n".chars().next().unwrap(), " ".chars().next().unwrap()].contains(
                  &t[offset+eows+1..offset+eows+2].chars().next().unwrap()
                  ) {
                  mrkrs.push(offset+eows)
                }
              };
              wordwrap(t, chunk, offset+eows+1, mrkrs)
            },
            None => { 
              if offset+chunk < t.len() { // String may continue
                wordwrap(t, chunk, offset+1, mrkrs) // Recurse + 1 until next space
              } else {
                use string::SubstMarks;

                return t.subst_marks(mrkrs.to_vec(), "\n")
              }
            },
          }
        },
        Some(x) => {
          wordwrap(t, chunk, offset+x+1, mrkrs)
        },
      }
    };
    wordwrap(self, width+1, 0, &mut markers)
  }
}
array_tool/string.rs

array_tool/
string.rs