Skip to content

Commit 1877604

Browse files
committed
Faster SIMD approach
1 parent 5639196 commit 1877604

File tree

3 files changed

+345
-130
lines changed

3 files changed

+345
-130
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
213213
| 8 | [Handheld Halting](https://adventofcode.com/2020/day/8) | [Source](src/year2020/day08.rs) | 8 |
214214
| 9 | [Encoding Error](https://adventofcode.com/2020/day/9) | [Source](src/year2020/day09.rs) | 9 |
215215
| 10 | [Adapter Array](https://adventofcode.com/2020/day/10) | [Source](src/year2020/day10.rs) | 1 |
216-
| 11 | [Seating System](https://adventofcode.com/2020/day/11) | [Source](src/year2020/day11.rs) | 4537 |
216+
| 11 | [Seating System](https://adventofcode.com/2020/day/11) | [Source](src/year2020/day11.rs) | 831 |
217217
| 12 | [Rain Risk](https://adventofcode.com/2020/day/12) | [Source](src/year2020/day12.rs) | 12 |
218218
| 13 | [Shuttle Search](https://adventofcode.com/2020/day/13) | [Source](src/year2020/day13.rs) | 1 |
219219
| 14 | [Docking Data](https://adventofcode.com/2020/day/14) | [Source](src/year2020/day14.rs) | 83 |
@@ -226,7 +226,7 @@ Performance is reasonable even on older hardware, for example a 2011 MacBook Pro
226226
| 21 | [Allergen Assessment](https://adventofcode.com/2020/day/21) | [Source](src/year2020/day21.rs) | 45 |
227227
| 22 | [Crab Combat](https://adventofcode.com/2020/day/22) | [Source](src/year2020/day22.rs) | 5911 |
228228
| 23 | [Crab Cups](https://adventofcode.com/2020/day/23) | [Source](src/year2020/day23.rs) | 110000 |
229-
| 24 | [Lobby Layout](https://adventofcode.com/2020/day/24) | [Source](src/year2020/day24.rs) | 4320 |
229+
| 24 | [Lobby Layout](https://adventofcode.com/2020/day/24) | [Source](src/year2020/day24.rs) | 330 |
230230
| 25 | [Combo Breaker](https://adventofcode.com/2020/day/25) | [Source](src/year2020/day25.rs) | 20 |
231231

232232
## 2019

src/year2020/day11.rs

Lines changed: 189 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -6,112 +6,227 @@
66
//!
77
//! For part two we can further optimize by precalculating the locations of the nearest visible
88
//! seats only once then reusing that information for each step.
9+
//!
10+
//! The SIMD version speed things up by calculating 32 lanes at a time.
911
use crate::util::grid::*;
1012
use crate::util::point::*;
11-
use std::mem::swap;
12-
13-
const FLOOR: u8 = b'.';
14-
const DIRECTIONS: [Point; 8] = [
15-
Point::new(-1, -1),
16-
Point::new(0, -1),
17-
Point::new(1, -1),
18-
Point::new(-1, 0),
19-
Point::new(1, 0),
20-
Point::new(-1, 1),
21-
Point::new(0, 1),
22-
Point::new(1, 1),
23-
];
24-
25-
struct Seat {
26-
index: u16,
27-
size: u8,
28-
neighbors: [u16; 8],
29-
}
3013

31-
impl Seat {
32-
#[inline]
33-
fn push(&mut self, index: u16) {
34-
self.neighbors[self.size as usize] = index;
35-
self.size += 1;
36-
}
37-
}
14+
const SEAT: u8 = b'L';
3815

3916
pub fn parse(input: &str) -> Grid<u8> {
4017
Grid::parse(input)
4118
}
4219

4320
pub fn part1(input: &Grid<u8>) -> u32 {
44-
simulate(input, true, 4)
21+
#[cfg(not(feature = "simd"))]
22+
let result = scalar::simulate(input, false, 4);
23+
24+
#[cfg(feature = "simd")]
25+
let result = simd::simulate(input, false, 4);
26+
27+
result
4528
}
4629

4730
pub fn part2(input: &Grid<u8>) -> u32 {
48-
simulate(input, false, 5)
31+
#[cfg(not(feature = "simd"))]
32+
let result = scalar::simulate(input, true, 5);
33+
34+
#[cfg(feature = "simd")]
35+
let result = simd::simulate(input, true, 5);
36+
37+
result
4938
}
5039

51-
pub fn simulate(input: &Grid<u8>, part_one: bool, limit: u8) -> u32 {
52-
let width = input.width;
53-
let height = input.height;
54-
let mut seats = Vec::new();
40+
#[cfg(not(feature = "simd"))]
41+
mod scalar {
42+
use super::*;
5543

56-
for y in 0..height {
57-
for x in 0..width {
58-
let point = Point::new(x, y);
59-
if input[point] == FLOOR {
60-
continue;
61-
}
44+
struct Seat {
45+
point: Point,
46+
size: usize,
47+
neighbors: [Point; 8],
48+
}
49+
50+
impl Seat {
51+
fn push(&mut self, index: Point) {
52+
self.neighbors[self.size] = index;
53+
self.size += 1;
54+
}
55+
}
6256

63-
let mut seat = Seat { index: (width * y + x) as u16, size: 0, neighbors: [0; 8] };
57+
pub(super) fn simulate(input: &Grid<u8>, part_two: bool, limit: u8) -> u32 {
58+
let mut seats = Vec::new();
6459

65-
for direction in DIRECTIONS {
66-
if part_one {
67-
let next = point + direction;
68-
if input.contains(next) && input[next] != FLOOR {
69-
seat.push((width * next.y + next.x) as u16);
70-
}
71-
} else {
72-
let mut next = point + direction;
73-
while input.contains(next) {
74-
if input[next] != FLOOR {
75-
seat.push((width * next.y + next.x) as u16);
76-
break;
60+
for y in 0..input.height {
61+
for x in 0..input.width {
62+
let point = Point::new(x, y);
63+
if input[point] != SEAT {
64+
continue;
65+
}
66+
67+
let mut seat = Seat { point, size: 0, neighbors: [ORIGIN; 8] };
68+
69+
for direction in DIAGONAL {
70+
if part_two {
71+
let mut next = point + direction;
72+
while input.contains(next) {
73+
if input[next] == SEAT {
74+
seat.push(next);
75+
break;
76+
}
77+
next += direction;
78+
}
79+
} else {
80+
let next = point + direction;
81+
if input.contains(next) && input[next] == SEAT {
82+
seat.push(next);
7783
}
78-
next += direction;
7984
}
8085
}
86+
87+
seats.push(seat);
88+
}
89+
}
90+
91+
let mut current = input.same_size_with(0);
92+
let mut next = input.same_size_with(0);
93+
94+
loop {
95+
for seat in &seats {
96+
let total: u8 = seat.neighbors[0..seat.size].iter().map(|&i| current[i]).sum();
97+
98+
next[seat.point] = if current[seat.point] == 0 {
99+
u8::from(total == 0)
100+
} else {
101+
u8::from(total < limit)
102+
};
81103
}
82104

83-
seats.push(seat);
105+
(current, next) = (next, current);
106+
if current == next {
107+
return current.bytes.iter().map(|&n| n as u32).sum();
108+
}
84109
}
85110
}
111+
}
86112

87-
let mut current = vec![0; (width * height) as usize];
88-
let mut next = vec![0; (width * height) as usize];
89-
let mut change = true;
113+
#[cfg(feature = "simd")]
114+
mod simd {
115+
use super::*;
116+
use std::simd::cmp::SimdPartialEq as _;
117+
use std::simd::cmp::SimdPartialOrd as _;
118+
use std::simd::*;
119+
120+
const LANE_WIDTH: usize = 32;
121+
type Vector = Simd<u8, LANE_WIDTH>;
122+
123+
pub(super) fn simulate(input: &Grid<u8>, part_two: bool, limit: u8) -> u32 {
124+
// Input grid is taller than it is wide. To make efficient use of the wide SIMD operations:
125+
// * Add an empty border to eliminate bounds checking.
126+
// * Transpose the input grid to make it wider than it is tall.
127+
// * Round width up to next multiple of LANE_WIDTH.
128+
let width = 2 + (input.height as usize).next_multiple_of(LANE_WIDTH) as i32;
129+
let height = 2 + input.width;
130+
let mut grid = Grid::new(width, height, 0);
131+
132+
for y in 0..input.height {
133+
for x in 0..input.width {
134+
let from = Point::new(x, y);
135+
let to = Point::new(y + 1, x + 1);
136+
grid[to] = u8::from(input[from] == SEAT);
137+
}
138+
}
90139

91-
while change {
92-
change = false;
140+
// Build a list of seats that are non-adjacent but visible to each other.
141+
let mut visible = Vec::new();
93142

94-
for seat in &seats {
95-
let index = seat.index as usize;
96-
let mut total = 0;
143+
if part_two {
144+
for y in 0..height {
145+
for x in 0..width {
146+
let from = Point::new(x, y);
147+
if grid[from] == 0 {
148+
continue;
149+
}
97150

98-
for i in 0..seat.size {
99-
total += current[seat.neighbors[i as usize] as usize];
100-
}
151+
for direction in DIAGONAL {
152+
if grid[from + direction] == 1 {
153+
continue;
154+
}
101155

102-
if current[index] == 0 && total == 0 {
103-
next[index] = 1;
104-
change = true;
105-
} else if current[index] == 1 && total >= limit {
106-
next[index] = 0;
107-
change = true;
108-
} else {
109-
next[index] = current[index];
156+
let mut to = from + direction * 2;
157+
while grid.contains(to) {
158+
if grid[to] == 1 {
159+
visible.push((from, to));
160+
break;
161+
}
162+
to += direction;
163+
}
164+
}
165+
}
110166
}
111167
}
112168

113-
swap(&mut current, &mut next);
169+
// Common constants.
170+
let zero: Vector = Simd::splat(0);
171+
let one: Vector = Simd::splat(1);
172+
let limit: Vector = Simd::splat(limit);
173+
174+
let mut current = grid.same_size_with(0);
175+
let mut next = grid.same_size_with(0);
176+
let mut extra = grid.same_size_with(0);
177+
178+
loop {
179+
// Add any non-adjacent seats that are visible to the total.
180+
if part_two {
181+
extra.bytes.fill(0);
182+
for &(from, to) in &visible {
183+
extra[to] += current[from];
184+
}
185+
}
186+
187+
// Process grid column by column using wide SIMG vectors.
188+
for x in (1..width - 1).step_by(LANE_WIDTH) {
189+
let mut above = horizontal_neighbors(&current, x, 0);
190+
let mut row = horizontal_neighbors(&current, x, 1);
191+
192+
for y in 1..height - 1 {
193+
let index = (width * y + x) as usize;
194+
let seats = Simd::from_slice(&grid.bytes[index..]);
195+
let occupied = Simd::from_slice(&current.bytes[index..]);
196+
let extra = Simd::from_slice(&extra.bytes[index..]);
197+
198+
let below = horizontal_neighbors(&current, x, y + 1);
199+
let total = row + above + below + extra;
200+
above = row;
201+
row = below;
202+
203+
// Empty to occupied.
204+
let first = total.simd_eq(zero).select(one, zero);
205+
// Occupied to empty
206+
let second = total.simd_le(limit).select(occupied, zero);
207+
// Nobody sits on the floor.
208+
let result = (first + second) & seats;
209+
210+
result.copy_to_slice(&mut next.bytes[index..]);
211+
}
212+
}
213+
214+
(current, next) = (next, current);
215+
if current == next {
216+
return current.bytes.iter().map(|&b| b as u32).sum();
217+
}
218+
}
114219
}
115220

116-
current.iter().map(|&n| n as u32).sum()
221+
/// Create SIMD vector of the sum of left, right and center lanes.
222+
#[inline]
223+
fn horizontal_neighbors(grid: &Grid<u8>, x: i32, y: i32) -> Vector {
224+
let index = (grid.width * y + x) as usize;
225+
226+
let center = Simd::from_slice(&grid.bytes[index..]);
227+
let left = center.shift_elements_left::<1>(grid.bytes[index + LANE_WIDTH]);
228+
let right = center.shift_elements_right::<1>(grid.bytes[index - 1]);
229+
230+
center + left + right
231+
}
117232
}

0 commit comments

Comments
 (0)