imageproc/
hog.rs

1//! [HoG features](https://en.wikipedia.org/wiki/Histogram_of_oriented_gradients)
2//! and helpers for visualizing them.
3
4use crate::definitions::{Clamp, Image};
5use crate::gradients::{horizontal_sobel, vertical_sobel};
6use crate::math::l2_norm;
7use image::{GenericImage, GrayImage, ImageBuffer, Luma};
8use num::Zero;
9use std::f32;
10
11/// Parameters for HoG descriptors.
12#[derive(Debug, Copy, Clone, PartialEq, Eq)]
13pub struct HogOptions {
14    /// Number of gradient orientation bins.
15    pub orientations: usize,
16    /// Whether gradients in opposite directions are treated as equal.
17    pub signed: bool,
18    /// Width and height of cell in pixels.
19    pub cell_side: usize,
20    /// Width and height of block in cells.
21    pub block_side: usize,
22    /// Offset of the start of one block from the next in cells.
23    pub block_stride: usize, // TODO: choice of normalisation - for now we just scale to unit L2 norm
24}
25
26impl HogOptions {
27    /// User-provided options, prior to validation.
28    pub fn new(
29        orientations: usize,
30        signed: bool,
31        cell_side: usize,
32        block_side: usize,
33        block_stride: usize,
34    ) -> HogOptions {
35        HogOptions {
36            orientations,
37            signed,
38            cell_side,
39            block_side,
40            block_stride,
41        }
42    }
43}
44
45/// HoG options plus values calculated from these options and the desired
46/// image dimensions. Validation must occur when instances of this struct
47/// are created - functions receiving a spec will assume that it is valid.
48#[derive(Debug, Copy, Clone, PartialEq, Eq)]
49pub struct HogSpec {
50    /// Original options.
51    options: HogOptions,
52    /// Number of non-overlapping cells required to cover the image's width.
53    cells_wide: usize,
54    /// Number of non-overlapping cells required to cover the image height.
55    cells_high: usize,
56    /// Number of (possibly overlapping) blocks required to cover the image's width.
57    blocks_wide: usize,
58    /// Number of (possibly overlapping) blocks required to cover the image's height.
59    blocks_high: usize,
60}
61
62impl HogSpec {
63    /// Returns an error message if image dimensions aren't compatible with the provided options.
64    pub fn from_options(width: u32, height: u32, options: HogOptions) -> Result<HogSpec, String> {
65        let (cells_wide, cells_high) =
66            Self::checked_cell_dimensions(width as usize, height as usize, options)?;
67        let (blocks_wide, blocks_high) =
68            Self::checked_block_dimensions(cells_wide, cells_high, options)?;
69        Ok(HogSpec {
70            options,
71            cells_wide,
72            cells_high,
73            blocks_wide,
74            blocks_high,
75        })
76    }
77
78    fn invalid_options_message(errors: &[String]) -> String {
79        format!("Invalid HoG options: {0}", errors.join(", "))
80    }
81
82    /// Returns (cells wide, cells high), or an error message if cell side doesn't evenly divide width and height.
83    fn checked_cell_dimensions(
84        width: usize,
85        height: usize,
86        options: HogOptions,
87    ) -> Result<(usize, usize), String> {
88        let mut errors: Vec<String> = vec![];
89        if width % options.cell_side != 0 {
90            errors.push(format!(
91                "cell side {} does not evenly divide width {}",
92                options.cell_side, width
93            ));
94        }
95        if height % options.cell_side != 0 {
96            errors.push(format!(
97                "cell side {} does not evenly divide height {}",
98                options.cell_side, height
99            ));
100        }
101        if !errors.is_empty() {
102            return Err(Self::invalid_options_message(&errors));
103        }
104        Ok((width / options.cell_side, height / options.cell_side))
105    }
106
107    /// Returns (blocks wide, blocks high), or an error message if the block size and stride don't evenly cover
108    /// the grid of cells.
109    fn checked_block_dimensions(
110        cells_wide: usize,
111        cells_high: usize,
112        options: HogOptions,
113    ) -> Result<(usize, usize), String> {
114        let mut errors: Vec<String> = vec![];
115        if (cells_wide - options.block_side) % options.block_stride != 0 {
116            errors.push(format!(
117                "block stride {} does not evenly divide (cells wide {} - block side {})",
118                options.block_stride, cells_wide, options.block_side
119            ));
120        }
121        if (cells_high - options.block_side) % options.block_stride != 0 {
122            errors.push(format!(
123                "block stride {} does not evenly divide (cells high {} - block side {})",
124                options.block_stride, cells_high, options.block_side
125            ));
126        }
127        if !errors.is_empty() {
128            return Err(Self::invalid_options_message(&errors));
129        }
130        Ok((
131            num_blocks(cells_wide, options.block_side, options.block_stride),
132            num_blocks(cells_high, options.block_side, options.block_stride),
133        ))
134    }
135
136    /// The total size in floats of the HoG descriptor with these dimensions.
137    pub fn descriptor_length(&self) -> usize {
138        self.blocks_wide * self.blocks_high * self.block_descriptor_length()
139    }
140
141    /// The size in floats of the descriptor for a single block.
142    fn block_descriptor_length(&self) -> usize {
143        self.options.orientations * self.options.block_side.pow(2)
144    }
145
146    /// Dimensions of a grid of cell histograms, viewed as a 3d array.
147    /// Innermost dimension is orientation bin, then horizontal cell location,
148    /// then vertical cell location.
149    fn cell_grid_lengths(&self) -> [usize; 3] {
150        [self.options.orientations, self.cells_wide, self.cells_high]
151    }
152
153    /// Dimensions of a grid of block descriptors, viewed as a 3d array.
154    /// Innermost dimension is block descriptor position, then horizontal block location,
155    /// then vertical block location.
156    fn block_grid_lengths(&self) -> [usize; 3] {
157        [
158            self.block_descriptor_length(),
159            self.blocks_wide,
160            self.blocks_high,
161        ]
162    }
163
164    /// Dimensions of a single block descriptor, viewed as a 3d array.
165    /// Innermost dimension is histogram bin, then horizontal cell location, then
166    /// vertical cell location.
167    fn block_internal_lengths(&self) -> [usize; 3] {
168        [
169            self.options.orientations,
170            self.options.block_side,
171            self.options.block_side,
172        ]
173    }
174
175    /// Area of an image cell in pixels.
176    fn cell_area(&self) -> usize {
177        self.options.cell_side * self.options.cell_side
178    }
179}
180
181/// Number of blocks required to cover `num_cells` cells when each block is
182/// `block_side` long and blocks are staggered by `block_stride`. Assumes that
183/// options are compatible.
184fn num_blocks(num_cells: usize, block_side: usize, block_stride: usize) -> usize {
185    (num_cells + block_stride - block_side) / block_stride
186}
187
188/// Computes the HoG descriptor of an image, or None if the provided
189/// options are incompatible with the image size.
190// TODO: support color images by taking the channel with maximum gradient at each point
191pub fn hog(image: &GrayImage, options: HogOptions) -> Result<Vec<f32>, String> {
192    match HogSpec::from_options(image.width(), image.height(), options) {
193        Err(e) => Err(e),
194        Ok(spec) => {
195            let mut grid: Array3d<f32> = cell_histograms(image, spec);
196            let grid_view = grid.view_mut();
197            let descriptor = hog_descriptor_from_hist_grid(grid_view, spec);
198            Ok(descriptor)
199        }
200    }
201}
202
203/// Computes the HoG descriptor of an image. Assumes that the spec and grid
204/// dimensions are consistent.
205fn hog_descriptor_from_hist_grid(grid: View3d<'_, f32>, spec: HogSpec) -> Vec<f32> {
206    let mut descriptor = Array3d::new(spec.block_grid_lengths());
207    {
208        let mut block_view = descriptor.view_mut();
209
210        for by in 0..spec.blocks_high {
211            for bx in 0..spec.blocks_wide {
212                let block_data = block_view.inner_slice_mut(bx, by);
213                let mut block = View3d::from_raw(block_data, spec.block_internal_lengths());
214
215                for iy in 0..spec.options.block_side {
216                    let cy = by * spec.options.block_stride + iy;
217                    for ix in 0..spec.options.block_side {
218                        let cx = bx * spec.options.block_stride + ix;
219                        let slice = block.inner_slice_mut(ix, iy);
220                        let hist = grid.inner_slice(cx, cy);
221                        copy(hist, slice);
222                    }
223                }
224            }
225        }
226
227        for by in 0..spec.blocks_high {
228            for bx in 0..spec.blocks_wide {
229                let norm = block_norm(&block_view, bx, by);
230                if norm > 0f32 {
231                    let block_mut = block_view.inner_slice_mut(bx, by);
232                    for i in 0..block_mut.len() {
233                        block_mut[i] /= norm;
234                    }
235                }
236            }
237        }
238    }
239
240    descriptor.data
241}
242
243/// L2 norm of the block descriptor at given location within an image descriptor.
244fn block_norm(view: &View3d<'_, f32>, bx: usize, by: usize) -> f32 {
245    let block_data = view.inner_slice(bx, by);
246    l2_norm(block_data)
247}
248
249fn copy<T: Copy>(from: &[T], to: &mut [T]) {
250    to.clone_from_slice(&from[..to.len()]);
251}
252
253/// Computes orientation histograms for each cell of an image. Assumes that
254/// the provided dimensions are valid.
255pub fn cell_histograms(image: &GrayImage, spec: HogSpec) -> Array3d<f32> {
256    let (width, height) = image.dimensions();
257    let mut grid = Array3d::new(spec.cell_grid_lengths());
258    let cell_area = spec.cell_area() as f32;
259    let cell_side = spec.options.cell_side as f32;
260    let horizontal = horizontal_sobel(image);
261    let vertical = vertical_sobel(image);
262    let interval = orientation_bin_width(spec.options);
263    let range = direction_range(spec.options);
264
265    for y in 0..height {
266        let mut grid_view = grid.view_mut();
267        let y_inter = Interpolation::from_position(y as f32 / cell_side);
268
269        for x in 0..width {
270            let x_inter = Interpolation::from_position(x as f32 / cell_side);
271
272            let h = horizontal.get_pixel(x, y)[0] as f32;
273            let v = vertical.get_pixel(x, y)[0] as f32;
274            let m = (h.powi(2) + v.powi(2)).sqrt();
275
276            let mut d = v.atan2(h);
277            if d < 0f32 {
278                d += range;
279            }
280            if !spec.options.signed && d >= f32::consts::PI {
281                d -= f32::consts::PI;
282            }
283
284            let o_inter =
285                Interpolation::from_position_wrapping(d / interval, spec.options.orientations);
286
287            for iy in 0..2usize {
288                let py = y_inter.indices[iy];
289
290                for ix in 0..2usize {
291                    let px = x_inter.indices[ix];
292
293                    for io in 0..2usize {
294                        let po = o_inter.indices[io];
295                        if contains_outer(&grid_view, px, py) {
296                            let wy = y_inter.weights[iy];
297                            let wx = x_inter.weights[ix];
298                            let wo = o_inter.weights[io];
299                            let up = wy * wx * wo * m / cell_area;
300                            let current = *grid_view.at_mut([po, px, py]);
301                            *grid_view.at_mut([po, px, py]) = current + up;
302                        }
303                    }
304                }
305            }
306        }
307    }
308
309    grid
310}
311
312/// True if the given outer two indices into a view are within bounds.
313fn contains_outer<T>(view: &View3d<'_, T>, u: usize, v: usize) -> bool {
314    u < view.lengths[1] && v < view.lengths[2]
315}
316
317/// Width of an orientation histogram bin in radians.
318fn orientation_bin_width(options: HogOptions) -> f32 {
319    direction_range(options) / (options.orientations as f32)
320}
321
322/// Length of the range of possible directions in radians.
323fn direction_range(options: HogOptions) -> f32 {
324    if options.signed {
325        2f32 * f32::consts::PI
326    } else {
327        f32::consts::PI
328    }
329}
330
331/// Indices and weights for an interpolated value.
332#[derive(Debug, Copy, Clone, PartialEq)]
333struct Interpolation {
334    indices: [usize; 2],
335    weights: [f32; 2],
336}
337
338impl Interpolation {
339    /// Creates new interpolation with provided indices and weights.
340    fn new(indices: [usize; 2], weights: [f32; 2]) -> Interpolation {
341        Interpolation { indices, weights }
342    }
343
344    /// Interpolates between two indices, without wrapping.
345    fn from_position(pos: f32) -> Interpolation {
346        let fraction = pos - pos.floor();
347        Self::new(
348            [pos as usize, pos as usize + 1],
349            [1f32 - fraction, fraction],
350        )
351    }
352
353    /// Interpolates between two indices, wrapping the right index.
354    /// Assumes that the left index is within bounds.
355    fn from_position_wrapping(pos: f32, length: usize) -> Interpolation {
356        let mut right = (pos as usize) + 1;
357        if right >= length {
358            right = 0;
359        }
360        let fraction = pos - pos.floor();
361        Self::new([pos as usize, right], [1f32 - fraction, fraction])
362    }
363}
364
365/// Visualises an array of orientation histograms.
366/// The dimensions of the provided Array3d are orientation bucket,
367/// horizontal location of the cell, then vertical location of the cell.
368/// Note that we ignore block-level aggregation or normalisation here.
369/// Each rendered star has side length `star_side`, so the image will have
370/// width grid.lengths[1] * `star_side` and height grid.lengths[2] * `star_side`.
371pub fn render_hist_grid(star_side: u32, grid: &View3d<'_, f32>, signed: bool) -> Image<Luma<u8>> {
372    let width = grid.lengths[1] as u32 * star_side;
373    let height = grid.lengths[2] as u32 * star_side;
374    let mut out = ImageBuffer::new(width, height);
375
376    for y in 0..grid.lengths[2] {
377        let y_window = y as u32 * star_side;
378        for x in 0..grid.lengths[1] {
379            let x_window = x as u32 * star_side;
380            let mut window = out.sub_image(x_window, y_window, star_side, star_side);
381            let hist = grid.inner_slice(x, y);
382            draw_star_mut(window.inner_mut(), hist, signed);
383        }
384    }
385
386    out
387}
388
389/// Draws a ray from the center of an image in place, in a direction theta radians
390/// clockwise from the y axis (recall that image coordinates increase from
391/// top left to bottom right).
392fn draw_ray_mut<I>(image: &mut I, theta: f32, color: I::Pixel)
393where
394    I: GenericImage,
395{
396    use crate::drawing::draw_line_segment_mut;
397    use std::cmp;
398
399    let (width, height) = image.dimensions();
400    let scale = cmp::max(width, height) as f32 / 2f32;
401    let start_x = (width / 2) as f32;
402    let start_y = (height / 2) as f32;
403    let start = (start_x, start_y);
404    let x_step = -scale * theta.sin();
405    let y_step = scale * theta.cos();
406    let end = (start_x + x_step, start_y + y_step);
407
408    draw_line_segment_mut(image, start, end, color);
409}
410
411/// Draws a visualisation of a histogram of edge orientation strengths as a collection of rays
412/// emanating from the centre of a square image. The intensity of each ray is
413/// proportional to the value of the bucket centred on its direction.
414fn draw_star_mut<I>(image: &mut I, hist: &[f32], signed: bool)
415where
416    I: GenericImage<Pixel = Luma<u8>>,
417{
418    let orientations = hist.len() as f32;
419    for bucket in 0..hist.len() {
420        if signed {
421            let dir = (2f32 * f32::consts::PI * bucket as f32) / orientations;
422            let intensity = Clamp::clamp(hist[bucket]);
423            draw_ray_mut(image, dir, Luma([intensity]));
424        } else {
425            let dir = (f32::consts::PI * bucket as f32) / orientations;
426            let intensity = Clamp::clamp(hist[bucket]);
427            draw_ray_mut(image, dir, Luma([intensity]));
428            draw_ray_mut(image, dir + f32::consts::PI, Luma([intensity]));
429        }
430    }
431}
432
433/// A 3d array that owns its data.
434pub struct Array3d<T> {
435    /// The owned data.
436    data: Vec<T>,
437    /// Lengths of the dimensions, from innermost (i.e. fastest-varying) to outermost.
438    lengths: [usize; 3],
439}
440
441/// A view into a 3d array.
442pub struct View3d<'a, T> {
443    /// The underlying data.
444    data: &'a mut [T],
445    /// Lengths of the dimensions, from innermost (i.e. fastest-varying) to outermost.
446    lengths: [usize; 3],
447}
448
449impl<T: Zero + Clone> Array3d<T> {
450    /// Allocates a new Array3d with the given dimensions.
451    fn new(lengths: [usize; 3]) -> Array3d<T> {
452        let data = vec![Zero::zero(); data_length(lengths)];
453        Array3d { data, lengths }
454    }
455
456    /// Provides a 3d view of the data.
457    pub fn view_mut(&mut self) -> View3d<'_, T> {
458        View3d::from_raw(&mut self.data, self.lengths)
459    }
460}
461
462impl<'a, T> View3d<'a, T> {
463    /// Constructs index from existing data and the lengths of the desired dimensions.
464    fn from_raw(data: &'a mut [T], lengths: [usize; 3]) -> View3d<'a, T> {
465        View3d { data, lengths }
466    }
467
468    /// A mutable reference from a 3d index.
469    fn at_mut(&mut self, indices: [usize; 3]) -> &mut T {
470        &mut self.data[self.offset(indices)]
471    }
472
473    /// All entries with the given outer dimensions. As the first dimension
474    /// is fastest varying, this is a contiguous slice.
475    fn inner_slice(&self, x1: usize, x2: usize) -> &[T] {
476        let offset = self.offset([0, x1, x2]);
477        &self.data[offset..offset + self.lengths[0]]
478    }
479
480    /// All entries with the given outer dimensions. As the first dimension
481    /// is fastest varying, this is a contiguous slice.
482    fn inner_slice_mut(&mut self, x1: usize, x2: usize) -> &mut [T] {
483        let offset = self.offset([0, x1, x2]);
484        &mut self.data[offset..offset + self.lengths[0]]
485    }
486
487    fn offset(&self, indices: [usize; 3]) -> usize {
488        indices[2] * self.lengths[1] * self.lengths[0] + indices[1] * self.lengths[0] + indices[0]
489    }
490}
491
492/// Length of array needed for the given dimensions.
493fn data_length(lengths: [usize; 3]) -> usize {
494    lengths[0] * lengths[1] * lengths[2]
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500    use crate::utils::gray_bench_image;
501    use ::test;
502
503    #[test]
504    fn test_num_blocks() {
505        // -----
506        // ***
507        //   ***
508        assert_eq!(num_blocks(5, 3, 2), 2);
509        // -----
510        // *****
511        assert_eq!(num_blocks(5, 5, 2), 1);
512        // ----
513        // **
514        //   **
515        assert_eq!(num_blocks(4, 2, 2), 2);
516        // ---
517        // *
518        //  *
519        //   *
520        assert_eq!(num_blocks(3, 1, 1), 3);
521    }
522
523    #[test]
524    fn test_hog_spec_valid_options() {
525        assert_eq!(
526            HogSpec::from_options(40, 40, HogOptions::new(8, true, 5, 2, 1))
527                .unwrap()
528                .descriptor_length(),
529            1568
530        );
531        assert_eq!(
532            HogSpec::from_options(40, 40, HogOptions::new(9, true, 4, 2, 1))
533                .unwrap()
534                .descriptor_length(),
535            2916
536        );
537        assert_eq!(
538            HogSpec::from_options(40, 40, HogOptions::new(8, true, 4, 2, 1))
539                .unwrap()
540                .descriptor_length(),
541            2592
542        );
543    }
544
545    #[test]
546    fn test_hog_spec_invalid_options() {
547        let opts = HogOptions {
548            orientations: 8,
549            signed: true,
550            cell_side: 3,
551            block_side: 4,
552            block_stride: 2,
553        };
554        let expected = "Invalid HoG options: block stride 2 does not evenly divide (cells wide 7 - block side 4), \
555			block stride 2 does not evenly divide (cells high 7 - block side 4)";
556        assert_eq!(
557            HogSpec::from_options(21, 21, opts),
558            Err(expected.to_owned())
559        );
560    }
561
562    #[test]
563    fn test_interpolation_from_position() {
564        assert_eq!(
565            Interpolation::from_position(10f32),
566            Interpolation::new([10, 11], [1f32, 0f32])
567        );
568        assert_eq!(
569            Interpolation::from_position(10.25f32),
570            Interpolation::new([10, 11], [0.75f32, 0.25f32])
571        );
572    }
573
574    #[test]
575    fn test_interpolation_from_position_wrapping() {
576        assert_eq!(
577            Interpolation::from_position_wrapping(10f32, 11),
578            Interpolation::new([10, 0], [1f32, 0f32])
579        );
580        assert_eq!(
581            Interpolation::from_position_wrapping(10.25f32, 11),
582            Interpolation::new([10, 0], [0.75f32, 0.25f32])
583        );
584        assert_eq!(
585            Interpolation::from_position_wrapping(10f32, 12),
586            Interpolation::new([10, 11], [1f32, 0f32])
587        );
588        assert_eq!(
589            Interpolation::from_position_wrapping(10.25f32, 12),
590            Interpolation::new([10, 11], [0.75f32, 0.25f32])
591        );
592    }
593
594    #[test]
595    fn test_hog_descriptor_from_hist_grid() {
596        // A grid of cells 3 wide and 2 high. Each cell contains a histogram of 2 items.
597        // There are two blocks, the left covering the leftmost 2x2 region, and the
598        // right covering the rightmost 2x2 region. These regions overlap by one cell column.
599        // There's no significance to the contents of the histograms used here, we're
600        // just checking that the values are binned and normalised correctly.
601        let opts = HogOptions {
602            orientations: 2,
603            signed: true,
604            cell_side: 5,
605            block_side: 2,
606            block_stride: 1,
607        };
608
609        let spec = HogSpec::from_options(15, 10, opts).unwrap();
610
611        let mut grid = Array3d::<f32>::new([2, 3, 2]);
612        let mut view = grid.view_mut();
613
614        {
615            let tl = view.inner_slice_mut(0, 0);
616            copy(&[1f32, 3f32, 2f32], tl);
617        }
618        {
619            let tm = view.inner_slice_mut(1, 0);
620            copy(&[2f32, 3f32, 5f32], tm);
621        }
622        {
623            let tr = view.inner_slice_mut(2, 0);
624            copy(&[0f32, 1f32, 0f32], tr);
625        }
626        {
627            let bl = view.inner_slice_mut(0, 1);
628            copy(&[5f32, 0f32, 7f32], bl);
629        }
630        {
631            let bm = view.inner_slice_mut(1, 1);
632            copy(&[3f32, 7f32, 9f32], bm);
633        }
634        {
635            let br = view.inner_slice_mut(2, 1);
636            copy(&[6f32, 1f32, 4f32], br);
637        }
638
639        let descriptor = hog_descriptor_from_hist_grid(view, spec);
640        assert_eq!(descriptor.len(), 16);
641
642        let counts = [1, 3, 2, 3, 5, 0, 3, 7, 2, 3, 0, 1, 3, 7, 6, 1];
643        let mut expected = [0f32; 16];
644
645        let left_norm = 106f32.sqrt();
646        let right_norm = 109f32.sqrt();
647
648        for i in 0..8 {
649            expected[i] = counts[i] as f32 / left_norm;
650        }
651        for i in 8..16 {
652            expected[i] = counts[i] as f32 / right_norm;
653        }
654
655        assert_eq!(descriptor, expected);
656    }
657
658    #[test]
659    fn test_direction_interpolation_within_bounds() {
660        let image = gray_image!(
661			2, 1, 0;
662			2, 1, 0;
663			2, 1, 0);
664
665        let opts_signed = HogOptions {
666            orientations: 8,
667            signed: true,
668            cell_side: 3,
669            block_side: 1,
670            block_stride: 1,
671        };
672
673        let desc_signed = hog(&image, opts_signed);
674        test::black_box(desc_signed.unwrap());
675
676        let opts_unsigned = HogOptions {
677            orientations: 8,
678            signed: false,
679            cell_side: 3,
680            block_side: 1,
681            block_stride: 1,
682        };
683
684        let desc_unsigned = hog(&image, opts_unsigned);
685        test::black_box(desc_unsigned.unwrap());
686    }
687
688    #[bench]
689    fn bench_hog(b: &mut test::Bencher) {
690        let image = gray_bench_image(88, 88);
691        let opts = HogOptions {
692            orientations: 8,
693            signed: true,
694            cell_side: 8,
695            block_side: 3,
696            block_stride: 2,
697        };
698        b.iter(|| {
699            let desc = hog(&image, opts);
700            test::black_box(desc.unwrap());
701        });
702    }
703}