How does rayon::split allow custom parallel recursion patterns beyond par_iter()?

rayon::split provides fine-grained control over parallel execution by allowing you to define custom divide-and-conquer strategies. Unlike par_iter() which processes collection elements in parallel with automatic work distribution, split lets you define how to divide work, when to stop splitting, and how to combine results. This enables recursive algorithms like parallel quicksort, tree traversals, and divide-and-conquer patterns that don't fit the iterator model.

Basic Split Usage

use rayon::split;
 
fn basic_split() {
    let data = vec![1, 2, 3, 4, 5, 6, 7, 8];
    
    let (left, right): (i32, i32) = split(
        data,
        |vec| {
            // How to split the data
            if vec.len() > 4 {
                let mid = vec.len() / 2;
                (vec[..mid].to_vec(), vec[mid..].to_vec())
            } else {
                // Don't split - return empty second part
                (vec, Vec::new())
            }
        },
        |vec| {
            // How to process each piece
            vec.iter().sum()
        },
        |left_sum, right_sum| {
            // How to combine results
            left_sum + right_sum
        },
    );
    
    println!("Sum: {}", left + right);
}

split takes three closures: how to divide, how to process, and how to combine.

Comparison with par_iter

use rayon::prelude::*;
 
fn comparison() {
    let data: Vec<i32> = (1..=100).collect();
    
    // par_iter: automatic parallel processing
    let sum: i32 = data.par_iter().sum();
    
    // split: custom division control
    let total = split(
        data,
        |mut vec| {
            if vec.len() > 10 {
                let right = vec.split_off(vec.len() / 2);
                (vec, right)
            } else {
                (vec, Vec::new())
            }
        },
        |vec| vec.iter().sum(),
        |a, b| a + b,
    );
    
    // par_iter is simpler for element-wise operations
    // split is better for recursive structures
}

par_iter is best for collections; split excels at custom recursion.

Parallel Quicksort with Split

use rayon::split;
 
fn parallel_quicksort(data: &mut [i32]) {
    if data.len() <= 1 {
        return;
    }
    
    if data.len() <= 32 {
        // Base case: use sequential sort for small arrays
        data.sort();
        return;
    }
    
    // Partition around pivot
    let pivot_index = partition(data);
    let (left, right) = data.split_at_mut(pivot_index);
    
    // Split into two parallel tasks
    rayon::join(
        || parallel_quicksort(left),
        || parallel_quicksort(right),
    );
}
 
fn partition(data: &mut [i32]) -> usize {
    let pivot = data[data.len() - 1];
    let mut i = 0;
    
    for j in 0..data.len() - 1 {
        if data[j] <= pivot {
            data.swap(i, j);
            i += 1;
        }
    }
    
    data.swap(i, data.len() - 1);
    i
}
 
// Alternative using split directly
fn quicksort_with_split(data: Vec<i32>) -> Vec<i32> {
    if data.len() <= 1 {
        return data;
    }
    
    split(
        data,
        |mut data| {
            if data.len() <= 32 {
                (data, Vec::new()) // Don't split small arrays
            } else {
                let pivot = data[data.len() - 1];
                let mut less = Vec::new();
                let mut greater = Vec::new();
                
                for item in data {
                    if item <= pivot {
                        less.push(item);
                    } else {
                        greater.push(item);
                    }
                }
                
                (less, greater)
            }
        },
        |mut data| {
            data.sort();
            data
        },
        |mut left, right| {
            left.extend(right);
            left
        },
    )
}

split naturally models divide-and-conquer algorithms like quicksort.

Custom Split Thresholds

use rayon::split;
 
fn sum_with_threshold(data: Vec<i32>, threshold: usize) -> i32 {
    split(
        data,
        |vec| {
            if vec.len() > threshold {
                let mid = vec.len() / 2;
                (vec[..mid].to_vec(), vec[mid..].to_vec())
            } else {
                (vec, Vec::new()) // Signal: don't split further
            }
        },
        |vec| vec.iter().sum(),
        |a, b| a + b,
    )
}
 
fn adaptive_threshold(data: Vec<i32>) -> i32 {
    let cpu_count = rayon::current_num_threads();
    let threshold = data.len() / cpu_count;
    
    split(
        data,
        move |vec| {
            if vec.len() > threshold {
                let mid = vec.len() / 2;
                (vec[..mid].to_vec(), vec[mid..].to_vec())
            } else {
                (vec, Vec::new())
            }
        },
        |vec| vec.iter().sum(),
        |a, b| a + b,
    )
}

Control when parallelism is beneficial with custom thresholds.

Recursive Tree Traversal

use rayon::split;
 
#[derive(Debug)]
struct TreeNode {
    value: i32,
    children: Vec<TreeNode>,
}
 
impl TreeNode {
    fn new(value: i32) -> Self {
        TreeNode { value, children: Vec::new() }
    }
    
    fn with_children(value: i32, children: Vec<TreeNode>) -> Self {
        TreeNode { value, children }
    }
}
 
fn sum_tree(tree: TreeNode) -> i32 {
    if tree.children.is_empty() {
        return tree.value;
    }
    
    // Split children into two groups
    let mid = tree.children.len() / 2;
    let left_children: Vec<TreeNode> = tree.children.into_iter().take(mid).collect();
    let right_children: Vec<TreeNode> = tree.children.into_iter().skip(mid).collect();
    
    if left_children.is_empty() {
        tree.value + right_children.into_iter().map(sum_tree).sum::<i32>()
    } else {
        let (left_sum, right_sum) = rayon::join(
            || left_children.into_iter().map(sum_tree).sum::<i32>(),
            || right_children.into_iter().map(sum_tree).sum::<i32>(),
        );
        tree.value + left_sum + right_sum
    }
}
 
// Using split directly with tree
fn sum_tree_split(tree: TreeNode) -> i32 {
    split(
        tree,
        |node| {
            if node.children.len() > 2 {
                let mid = node.children.len() / 2;
                let left = TreeNode::with_children(
                    0,
                    node.children.into_iter().take(mid).collect()
                );
                let right = TreeNode::with_children(
                    0,
                    node.children.into_iter().skip(mid).collect()
                );
                (left, right)
            } else {
                (node, TreeNode::new(0)) // Don't split
            }
        },
        |node| {
            node.value + node.children.iter().map(|c| sum_tree_split(c.clone())).sum::<i32>()
        },
        |a, b| a + b,
    )
}

split handles irregular tree structures that don't fit iterator patterns.

Parallel Binary Search

use rayon::split;
 
fn parallel_binary_search(data: &[i32], target: i32) -> Option<usize> {
    if data.is_empty() {
        return None;
    }
    
    if data.len() <= 64 {
        // Sequential for small arrays
        return data.iter().position(|&x| x == target);
    }
    
    let mid = data.len() / 2;
    let mid_value = data[mid];
    
    if mid_value == target {
        return Some(mid);
    }
    
    let (left_result, right_result) = rayon::join(
        || parallel_binary_search(&data[..mid], target),
        || {
            parallel_binary_search(&data[mid + 1..], target)
                .map(|idx| idx + mid + 1)
        },
    );
    
    left_result.or(right_result)
}

Parallel binary search splits the search space in half.

Parallel Merge Sort

use rayon::split;
 
fn parallel_merge_sort(data: Vec<i32>) -> Vec<i32> {
    if data.len() <= 1 {
        return data;
    }
    
    split(
        data,
        |mut data| {
            if data.len() > 32 {
                let mid = data.len() / 2;
                let right = data.split_off(mid);
                (data, right)
            } else {
                (data, Vec::new())
            }
        },
        |mut data| {
            data.sort();
            data
        },
        |left, right| merge(left, right),
    )
}
 
fn merge(left: Vec<i32>, right: Vec<i32>) -> Vec<i32> {
    let mut result = Vec::with_capacity(left.len() + right.len());
    let mut left_iter = left.into_iter();
    let mut right_iter = right.into_iter();
    
    let mut left_peek = left_iter.next();
    let mut right_peek = right_iter.next();
    
    while let (Some(l), Some(r)) = (left_peek, right_peek) {
        if l <= r {
            result.push(l);
            left_peek = left_iter.next();
            right_peek = Some(r);
        } else {
            result.push(r);
            right_peek = right_iter.next();
            left_peek = Some(l);
        }
    }
    
    if let Some(l) = left_peek {
        result.push(l);
    }
    if let Some(r) = right_peek {
        result.push(r);
    }
    
    result.extend(left_iter);
    result.extend(right_iter);
    
    result
}

Merge sort naturally fits the divide-and-conquer pattern.

Matrix Operations

use rayon::split;
 
type Matrix = Vec<Vec<f64>>;
 
fn parallel_matrix_multiply(a: &Matrix, b: &Matrix) -> Matrix {
    let n = a.len();
    let mut result = vec![vec![0.0; n]; n];
    
    // Split rows of result matrix
    split(
        (0, n),
        |(start, end)| {
            if end - start > n / 4 {
                let mid = (start + end) / 2;
                ((start, mid), (mid, end))
            } else {
                ((start, end), (0, 0)) // Signal: don't split
            }
        },
        |(start, end)| {
            let mut local_result = vec![vec![0.0; n]; end - start];
            for i in start..end {
                for j in 0..n {
                    for k in 0..n {
                        local_result[i - start][j] += a[i][k] * b[k][j];
                    }
                }
            }
            (start, local_result)
        },
        |(_, mut left_result), (start, right_result)| {
            if start > 0 {
                left_result.extend(right_result);
            }
            (0, left_result)
        },
    );
    
    result
}

split allows parallelizing over arbitrary ranges, not just collections.

Parallel Prefix Sum (Scan)

use rayon::split;
 
fn parallel_prefix_sum(data: Vec<i32>) -> Vec<i32> {
    if data.is_empty() {
        return data;
    }
    
    if data.len() <= 32 {
        let mut result = Vec::with_capacity(data.len());
        let mut sum = 0;
        for x in data {
            result.push(sum);
            sum += x;
        }
        return result;
    }
    
    let mid = data.len() / 2;
    let left = data[..mid].to_vec();
    let right = data[mid..].to_vec();
    
    let (left_result, right_result) = rayon::join(
        || parallel_prefix_sum(left),
        || parallel_prefix_sum(right),
    );
    
    let left_sum: i32 = left_result.last().copied().unwrap_or(0)
        + data[mid - 1];
    
    let mut result = left_result;
    for val in right_result {
        result.push(val + left_sum);
    }
    
    result
}

Prefix sum requires combining partial results in a specific way.

Parallel Map-Reduce with Split

use rayon::split;
 
fn parallel_map_reduce<T, U, M, R>(
    data: Vec<T>,
    threshold: usize,
    map_fn: M,
    reduce_fn: R,
) -> U
where
    T: Send,
    U: Send,
    M: Fn(T) -> U + Send + Sync,
    R: Fn(U, U) -> U + Send + Sync,
{
    split(
        data,
        |vec| {
            if vec.len() > threshold {
                let mid = vec.len() / 2;
                (vec[..mid].to_vec(), vec[mid..].to_vec())
            } else {
                (vec, Vec::new())
            }
        },
        |vec| {
            vec.into_iter().map(&map_fn).fold(
                // Need an identity/reduce logic here
                panic!("Need initial value"),
                |acc, item| reduce_fn(acc, item),
            )
        },
        |a, b| reduce_fn(a, b),
    )
}

split enables custom map-reduce patterns with full control.

Split vs join vs par_iter

use rayon::{split, join, prelude::*};
 
fn comparison_approaches() {
    let data: Vec<i32> = (1..=1000).collect();
    
    // par_iter: Simple, automatic
    let sum1: i32 = data.par_iter().sum();
    
    // join: Manual division, two branches
    let (left, right) = data.split_at(data.len() / 2);
    let (sum2_left, sum2_right) = join(
        || left.par_iter().sum(),
        || right.par_iter().sum(),
    );
    let sum2 = sum2_left + sum2_right;
    
    // split: Recursive, custom division
    let sum3 = split(
        data,
        |vec| {
            if vec.len() > 100 {
                let mid = vec.len() / 2;
                (vec[..mid].to_vec(), vec[mid..].to_vec())
            } else {
                (vec, Vec::new())
            }
        },
        |vec| vec.iter().sum(),
        |a, b| a + b,
    );
    
    // par_iter: use for element-wise operations
    // join: use for two-way explicit parallelism
    // split: use for recursive divide-and-conquer
}

Choose based on your parallelism needs.

Work Stealing with Split

use rayon::split;
 
fn work_stealing_example(data: Vec<i32>) -> i32 {
    // Rayon's work-stealing scheduler handles load balancing
    // split integrates with this automatically
    
    split(
        data,
        |vec| {
            // Uneven splits are fine - work stealing handles it
            if vec.len() > 10 {
                // Could split unevenly
                let split_point = vec.len() / 3; // Uneven split
                (vec[..split_point].to_vec(), vec[split_point..].to_vec())
            } else {
                (vec, Vec::new())
            }
        },
        |vec| vec.iter().sum(),
        |a, b| a + b,
    )
}

Rayon's scheduler balances work across threads automatically.

Handling Empty Splits

use rayon::split;
 
fn split_with_empties() {
    let data = vec![1, 2, 3];
    
    let result = split(
        data,
        |vec| {
            if vec.is_empty() {
                // Return empty for both parts
                (Vec::<i32>::new(), Vec::new())
            } else if vec.len() <= 2 {
                // Signal don't split by returning empty second part
                (vec, Vec::new())
            } else {
                let mid = vec.len() / 2;
                (vec[..mid].to_vec(), vec[mid..].to_vec())
            }
        },
        |vec| {
            if vec.is_empty() {
                0 // Handle empty gracefully
            } else {
                vec.iter().sum()
            }
        },
        |a, b| a + b,
    );
    
    println!("Result: {}", result);
}

Handle empty chunks in both split and process closures.

Nested Parallelism

use rayon::split;
 
fn nested_parallelism(matrix: Vec<Vec<i32>>) -> i32 {
    split(
        matrix,
        |rows| {
            if rows.len() > 4 {
                let mid = rows.len() / 2;
                (rows[..mid].to_vec(), rows[mid..].to_vec())
            } else {
                (rows, Vec::new())
            }
        },
        |rows| {
            // Each chunk processes its rows in parallel too
            rows.into_par_iter()
                .map(|row| row.into_iter().sum::<i32>())
                .sum()
        },
        |a, b| a + b,
    )
}

split can be combined with par_iter for nested parallelism.

When to Use Split vs par_iter

use rayon::{split, prelude::*};
 
// Use par_iter for: element-wise operations
fn element_wise(data: &[i32]) -> i32 {
    data.par_iter().sum()
}
 
// Use split for: recursive algorithms
fn recursive(data: Vec<i32>) -> i32 {
    split(
        data,
        |v| if v.len() > 100 {
            let mid = v.len() / 2;
            (v[..mid].to_vec(), v[mid..].to_vec())
        } else {
            (v, Vec::new())
        },
        |v| v.iter().sum(),
        |a, b| a + b,
    )
}
 
// Use join for: explicit two-way parallelism
fn two_way(data: &[i32]) -> (i32, i32) {
    let mid = data.len() / 2;
    rayon::join(
        || data[..mid].par_iter().sum(),
        || data[mid..].par_iter().sum(),
    )
}

Choose the right tool for your parallelism pattern.

Synthesis

rayon::split provides custom parallel recursion that goes beyond par_iter():

Key differences from par_iter:

Feature par_iter split
Granularity Element-level Chunk-level
Division control Automatic Custom
Algorithm fit Map/filter/fold Divide-and-conquer
Result combination Standard folds Custom combine
Structure Linear Recursive

Use split when:

  • Implementing divide-and-conquer algorithms (quicksort, mergesort)
  • Processing recursive data structures (trees)
  • Need custom split thresholds or strategies
  • Combining results needs special logic

Use par_iter when:

  • Processing collections element-by-element
  • Standard map/filter/reduce operations
  • Simple parallel iteration suffices

Key insight: split gives you control over the parallel execution strategy—it's a building block for parallel algorithms where you define how work is divided, processed, and combined. par_iter abstracts this away for common cases; split exposes it when you need fine-grained control.