From 5d95f10465e42dd6f752dc46ad78f057f05ff4d5 Mon Sep 17 00:00:00 2001 From: Jack Kinsey Date: Sat, 7 Dec 2024 00:48:30 -0500 Subject: [PATCH] Improve day 6 performance I profiled with `cargo flamegraph` and learned that the HashMap operations were taking forever. So, I replaced the HashMap with a straight Vec*, which bought a 10x improvement. Further profiling revealed complaints with re-finding the guard starting position in every single blocked-walk, so I added a way to provide the position to the constructor (since it never changes). This brought the total speedup to 15x, from ~1.5s on the release build to ~0.1s. *The memory usage (valgrind, valgrind --tool=massif, valgrind --tool=dhat) between the two data structures seems comparable. I guess if the walks were significantly sparser on the map the HashMap might win, but apparently they aren't, so we end up using basically as much memory in the HashMap anyway. --- src/day06.rs | 74 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 26 deletions(-) diff --git a/src/day06.rs b/src/day06.rs index cdd044d..be0daea 100644 --- a/src/day06.rs +++ b/src/day06.rs @@ -1,10 +1,10 @@ -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; fn input() -> &'static str { include_str!("../input/day06.txt") } -#[derive(Debug, Hash, Eq, PartialEq, Copy, Clone)] +#[derive(Debug, Eq, PartialEq, Copy, Clone)] enum Dir { N, E, @@ -12,7 +12,7 @@ enum Dir { W, } -#[derive(Debug, Hash, Eq, PartialEq, Copy, Clone)] +#[derive(Debug, Eq, PartialEq, Copy, Clone)] enum Tile { Space, Obstacle, @@ -57,22 +57,52 @@ impl<'a> GuardWalk<'a> { } } - fn new_blocked(map: &'a Vec>, block: (usize, usize)) -> Self { + fn new_blocked( + map: &'a Vec>, + block: (usize, usize), + pos: Option<(usize, usize)>, + ) -> Self { GuardWalk { map, guard: None, - pos: None, + pos, block: Some(block), } } + fn find_guard(&self) -> Option<(usize, usize)> { + if let Some(pos) = self.pos { + Some(pos) + } else { + self.map + .iter() + .zip(0..) + .flat_map(move |(v, i)| { + v.iter() + .zip(0..) + .filter(move |(&c, _)| matches!(c, Tile::Guard(_))) + .map(move |(_, j)| (i, j)) + }) + .last() + } + } + fn has_cycle(mut self) -> bool { - let mut counts: HashMap<((usize, usize), Tile), u32> = HashMap::new(); - while let Some(pos) = self.next() { - if let Some(guard) = self.guard { - let count = counts.entry((pos, guard)).or_default(); - *count += 1; - if *count >= 3 { + let n = self.map.len(); + // a straight vec is much faster than a hashmap + let mut counts: Vec = vec![0; n * n * 4]; + while let Some((i, j)) = self.next() { + if let Some(Tile::Guard(dir)) = self.guard { + let k = match dir { + Dir::N => 0, + Dir::E => 1, + Dir::S => 2, + Dir::W => 3, + }; + counts[i * n * 4 + j * 4 + k] += 1; + if counts[i * n * 4 + j * 4 + k] >= 3 { + // because the guard idles for a "turn" when it hits an obstacle we have to say + // 3 rather than 2 return true; } } @@ -81,22 +111,12 @@ impl<'a> GuardWalk<'a> { } } -impl<'a> Iterator for GuardWalk<'a> { +impl Iterator for GuardWalk<'_> { type Item = (usize, usize); fn next(&mut self) -> Option { if self.guard.is_none() || self.pos.is_none() { - self.pos = self - .map - .iter() - .zip(0..) - .flat_map(move |(v, i)| { - v.iter() - .zip(0..) - .filter(move |(&c, _)| matches!(c, Tile::Guard(_))) - .map(move |(_, j)| (i, j)) - }) - .last(); + self.pos = self.find_guard(); if let Some(pos) = self.pos { self.guard = Some(self.map[pos.0][pos.1]); if matches!(self.block, Some(block) if block == pos) { @@ -152,10 +172,12 @@ pub fn part1() { pub fn part2() { let map = parse(input()); - let n = GuardWalk::new(&map) + let walk = GuardWalk::new(&map); + let guard_pos = walk.find_guard(); + let n = walk .collect::>() .iter() - .filter(|&&p| GuardWalk::new_blocked(&map, p).has_cycle()) + .filter(|&&p| GuardWalk::new_blocked(&map, p, guard_pos).has_cycle()) .count(); println!("Day 6 Part 2: {}", n); } @@ -235,7 +257,7 @@ mod test { GuardWalk::new(&map) .collect::>() .iter() - .filter(|&&p| GuardWalk::new_blocked(&map, p).has_cycle()) + .filter(|&&p| GuardWalk::new_blocked(&map, p, None).has_cycle()) .count(), 6 ) -- 2.38.5