Deps
Last parallel sytems
course on rayon: dependencies, prefix.
In case it does not stream, here is a direct link to the video.
Slides are on the parallel system website.
Written prefix code is here:
use rayon::prelude::*;
use std::collections::LinkedList;
// replace each element of s
// by the sum of the whole subslice up to s
fn prefix(s: &mut [u64]) {
s.iter_mut().fold(0, |c, e| {
*e += c;
*e
});
}
fn par_prefix_1(s: &mut [u64]) {
if s.len() < 100_000 {
prefix(s)
} else {
let n = s.len();
let (left, right) = s.split_at_mut(n / 2);
rayon::join(|| par_prefix_1(left), || par_prefix_1(right));
let u = left.last().copied().unwrap_or(0);
right.par_iter_mut().for_each(|e| *e += u);
}
}
fn par_prefix_2(s: &mut [u64]) {
let slices: LinkedList<_> = rayon::iter::split(s, |s| {
let n = s.len();
if n < 4_000_000 {
(s, None)
} else {
let (left, right) = s.split_at_mut(n / 2);
(left, Some(right))
}
})
.map(|s| {
prefix(s);
s
})
.collect();
let mut to_update = slices.into_iter();
let first_slice = to_update.next();
let value = first_slice.and_then(|s| s.last().copied()).unwrap_or(0);
to_update.fold(value, |v, s| {
s.par_iter_mut().for_each(|e| *e += v);
s.last().copied().unwrap_or(v)
});
}
const SIZE: usize = 10_000_000;
fn main() {
let mut v = vec![1; SIZE];
fast_tracer::svg("par1.svg", || {
let start = std::time::Instant::now();
par_prefix_1(&mut v);
println!("par1: {:?}", start.elapsed());
})
.unwrap();
assert!(v.iter().copied().eq(1..=SIZE as u64));
let mut v = vec![1; SIZE];
fast_tracer::svg("par2.svg", || {
let start = std::time::Instant::now();
par_prefix_2(&mut v);
println!("par2: {:?}", start.elapsed());
})
.unwrap();
assert!(v.iter().copied().eq(1..=SIZE as u64));
let mut v = vec![1; SIZE];
let start = std::time::Instant::now();
prefix(&mut v);
println!("seq: {:?}", start.elapsed());
assert!(v.iter().copied().eq(1..=SIZE as u64));
}