How does serde::ser::Serializer::collect_seq simplify serializing iterators without intermediate collections?

collect_seq allows serializers to consume iterators directly without requiring the caller to collect the iterator into an intermediate container like Vec or Vec. This is particularly valuable when serializing large datasets or when the source data is naturally an iterator rather than a collection. The method takes an iterator and serializes each element on-the-fly, potentially avoiding the allocation overhead of collecting into a temporary Vec first.

The Problem Without collect_seq

use serde::{Serialize, Serializer};
 
// Without collect_seq, you must collect iterators first
fn serialize_iterator_items<T>(items: impl Iterator<Item = T>) -> Vec<T>
where
    T: Serialize,
{
    // Forced to allocate intermediate Vec
    items.collect()
}
 
#[derive(Serialize)]
struct Data {
    // Must be a collection, not an iterator
    items: Vec<i32>,
}
 
fn main() {
    let numbers = 0..1000;
    
    // Without collect_seq:
    // 1. Collect iterator into Vec (allocation)
    // 2. Serialize the Vec
    let data = Data {
        items: numbers.collect(),  // Unnecessary allocation
    };
    
    let json = serde_json::to_string(&data).unwrap();
    println!("{}", json);
}

Without collect_seq, iterators must be collected before serialization.

Understanding collect_seq

use serde::{Serialize, Serializer, ser::SerializeSeq};
 
// The Serializer trait defines collect_seq:
// 
// fn collect_seq<I>(self, iter: I) -> Result<Self::Ok, Self::Error>
// where
//     I: IntoIterator,
//     <I as IntoIterator>::Item: Serialize,
// {
//     // Default implementation: serialize each item
// }
 
// Key insight: The iterator is consumed directly
// No intermediate Vec allocation required
 
fn main() {
    // Serializer can handle iterators directly
    let numbers = 0..10;
    
    // This is valid because collect_seq handles iterators
    let json = serde_json::to_string(&numbers.collect::<Vec<_>>()).unwrap();
    
    // But we want to avoid the collect...
}

collect_seq exists specifically to avoid the collect-to-vec pattern.

Custom Serialize Implementation

use serde::{Serialize, Serializer, ser::SerializeSeq};
 
struct Fibonacci {
    count: usize,
}
 
impl Serialize for Fibonacci {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        // Use collect_seq to serialize the iterator directly
        let fib_iter = (0..self.count).scan((0, 1), |state, _| {
            let (a, b) = *state;
            *state = (b, a + b);
            Some(a)
        });
        
        serializer.collect_seq(fib_iter)
    }
}
 
fn main() {
    let fib = Fibonacci { count: 10 };
    let json = serde_json::to_string(&fib).unwrap();
    println!("{}", json);
    // [0,1,1,2,3,5,8,13,21,34]
}

Custom Serialize implementations can use collect_seq for iterator-based data.

Serialization Without Intermediate Allocation

use serde::{Serialize, Serializer};
 
// Large dataset that we don't want to fully materialize
struct LargeSequence {
    start: u64,
    count: usize,
}
 
impl Serialize for LargeSequence {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        // Generate values on-the-fly during serialization
        // No need to allocate a Vec with all values
        let iter = (self.start..).take(self.count);
        
        serializer.collect_seq(iter)
    }
}
 
fn main() {
    let large = LargeSequence {
        start: 0,
        count: 1_000_000,
    };
    
    // Serializes directly without allocating 1 million integers
    let json = serde_json::to_string(&large).unwrap();
    
    // Without collect_seq, this would require:
    // let vec: Vec<u64> = (0..1_000_000).collect();  // Huge allocation!
    // serde_json::to_string(&vec)
}

Large sequences benefit most from avoiding intermediate allocations.

Streaming Data Sources

use serde::{Serialize, Serializer};
use std::io::{BufRead, Lines};
 
// Serialize lines from a file without loading all into memory
struct FileLines<R> {
    lines: Lines<R>,
}
 
impl<R: BufRead> Serialize for FileLines<R> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        // This approach has limitations - Lines iterator isn't Clone
        // In practice, you'd need to handle this differently
        // But the concept is: stream data directly to serialization
        
        // For demonstration, the pattern is:
        // serializer.collect_seq(self.lines.map(|l| l.unwrap()))
        
        // In real code, you might collect into a temporary iterator
        // or use a different serialization strategy
        
        unimplemented!("See note about iterator requirements")
    }
}
 
// More practical example with owned data
struct StreamingNumbers {
    values: std::vec::IntoIter<i32>,
}
 
impl Serialize for StreamingNumbers {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        // Serialize from iterator without collecting
        serializer.collect_seq(self.values.clone())
    }
}

Streaming sources can be serialized without full materialization.

Practical Example: Database Results

use serde::{Serialize, Serializer};
 
// Imagine this represents database rows
#[derive(Debug, Clone, Serialize)]
struct User {
    id: u64,
    name: String,
}
 
struct UserIterator {
    current: u64,
    max: u64,
}
 
impl Iterator for UserIterator {
    type Item = User;
    
    fn next(&mut self) -> Option<Self::Item> {
        if self.current >= self.max {
            None
        } else {
            let user = User {
                id: self.current,
                name: format!("User {}", self.current),
            };
            self.current += 1;
            Some(user)
        }
    }
}
 
struct UsersResponse {
    users: UserIterator,
}
 
impl Serialize for UsersResponse {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        // Serialize users directly from iterator
        serializer.collect_seq(self.users.clone())
    }
}
 
fn main() {
    let response = UsersResponse {
        users: UserIterator { current: 0, max: 100 },
    };
    
    let json = serde_json::to_string(&response).unwrap();
    println!("{}", json);
}

API responses often benefit from iterator serialization.

Comparison: Manual vs collect_seq

use serde::{Serialize, Serializer, ser::SerializeSeq};
 
struct Numbers {
    count: usize,
}
 
// Approach 1: Manual serialization (verbose but instructive)
impl Serialize for Numbers {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        let mut seq = serializer.serialize_seq(Some(self.count))?;
        for i in 0..self.count {
            seq.serialize_element(&i)?;
        }
        seq.end()
    }
}
 
// Approach 2: Using collect_seq (concise)
impl Serialize for Numbers {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(0..self.count)
    }
}
 
// Both produce the same output, but collect_seq is cleaner

collect_seq is syntactic sugar over manual sequence serialization.

Memory Efficiency Comparison

use serde::{Serialize, Serializer};
 
// Scenario: Serializing a generated sequence
struct GeneratedSequence {
    count: usize,
}
 
impl Serialize for GeneratedSequence {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        // With collect_seq: O(1) memory overhead
        // Values are generated and serialized one at a time
        serializer.collect_seq((0..self.count).map(|x| x * 2))
    }
}
 
// Without collect_seq (hypothetical):
// impl Serialize for GeneratedSequence {
//     fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
//     where
//         S: Serializer,
//     {
//         // Must collect first: O(n) memory
//         let values: Vec<_> = (0..self.count).map(|x| x * 2).collect();
//         values.serialize(serializer)
//     }
// }
 
fn main() {
    let seq = GeneratedSequence { count: 1_000_000 };
    
    // Memory usage:
    // - With collect_seq: Constant (serialize as generated)
    // - Without: Allocates 1 million integers first
    
    let json = serde_json::to_string(&seq).unwrap();
    println!("First 50 chars: {}...", &json[..50.min(json.len())]);
}

collect_seq maintains constant memory for generated sequences.

Serializer Implementation Details

use serde::{Serialize, Serializer, ser::SerializeSeq};
use std::marker::PhantomData;
 
// Understanding what collect_seq does internally:
 
// Default implementation in Serializer trait:
// 
// fn collect_seq<I>(self, iter: I) -> Result<Self::Ok, Self::Error>
// where
//     I: IntoIterator,
//     <I as IntoIterator>::Item: Serialize,
// {
//     let mut seq = self.serialize_seq(None)?;
//     for item in iter {
//         seq.serialize_element(&item)?;
//     }
//     seq.end()
// }
 
// Some serializers override this for efficiency:
// - JSON: Can write items directly to output
// - Binary formats: Might know exact size needed
 
// Example custom serializer that uses iterator hints
struct MySerializer<W> {
    writer: W,
}
 
impl<W: std::io::Write> Serializer for MySerializer<W> {
    type Ok = ();
    type Error = serde_json::Error;
    type SerializeSeq = SeqSerializer<W>;
    // ... other associated types ...
    
    fn collect_seq<I>(self, iter: I) -> Result<Self::Ok, Self::Error>
    where
        I: IntoIterator,
        <I as IntoIterator>::Item: Serialize,
    {
        // Custom implementation could use size hints
        // or write directly without intermediate state
        let mut seq = self.serialize_seq(None)?;
        for item in iter {
            seq.serialize_element(&item)?;
        }
        seq.end()
    }
    
    // ... other trait methods ...
}
 
struct SeqSerializer<W> {
    writer: W,
    first: bool,
}
 
impl<W: std::io::Write> SerializeSeq for SeqSerializer<W> {
    type Ok = ();
    type Error = serde_json::Error;
    
    fn serialize_element<T: ?Sized + Serialize>(&mut self, value: &T) -> Result<(), Self::Error> {
        if !self.first {
            write!(self.writer, ",")?;
        }
        self.first = false;
        value.serialize(MySerializer { writer: &mut self.writer })?;
        Ok(())
    }
    
    fn end(self) -> Result<Self::Ok, Self::Error> {
        write!(self.writer, "]")?;
        Ok(())
    }
}
 
fn write<W: std::io::Write>(w: &mut W, s: &str) -> std::io::Result<()> {
    w.write_all(s.as_bytes())
}

Understanding the implementation helps optimize custom serializers.

Working with Transformed Iterators

use serde::{Serialize, Serializer};
 
struct TransformedData<T, I> {
    source: I,
    _marker: std::marker::PhantomData<T>,
}
 
impl<T, I> Serialize for TransformedData<T, I>
where
    I: Iterator<Item = T> + Clone,
    T: Serialize,
{
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(self.source.clone())
    }
}
 
// More practical: transforming one type to another during serialization
struct DoubledValues {
    values: Vec<i32>,
}
 
impl Serialize for DoubledValues {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        // Transform during serialization, no intermediate collection
        serializer.collect_seq(self.values.iter().map(|x| x * 2))
    }
}
 
// Or with filtering
struct FilteredValues {
    values: Vec<i32>,
}
 
impl Serialize for FilteredValues {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(self.values.iter().filter(|x| **x > 0).cloned())
    }
}
 
fn main() {
    let doubled = DoubledValues { values: vec![1, 2, 3, 4, 5] };
    let json = serde_json::to_string(&doubled).unwrap();
    println!("Doubled: {}", json);  // [2, 4, 6, 8, 10]
    
    let filtered = FilteredValues { values: vec![-2, -1, 0, 1, 2, 3] };
    let json = serde_json::to_string(&filtered).unwrap();
    println!("Filtered: {}", json);  // [1, 2, 3]
}

Transform and filter during serialization without intermediate collections.

Custom Sequence Type

use serde::{Serialize, Serializer};
 
// A custom container that serializes as a sequence
struct PaginatedResponse<T> {
    items: Vec<T>,
    total: usize,
    page: usize,
    per_page: usize,
}
 
impl<T: Serialize> Serialize for PaginatedResponse<T> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        use serde::ser::SerializeStruct;
        
        let mut s = serializer.serialize_struct("PaginatedResponse", 4)?;
        s.serialize_field("total", &self.total)?;
        s.serialize_field("page", &self.page)?;
        s.serialize_field("per_page", &self.per_page)?;
        
        // Use collect_seq for items
        s.serialize_field("items", &ItemsWrapper(&self.items))?;
        
        s.end()
    }
}
 
// Helper wrapper just for serialization
struct ItemsWrapper<'a, T>(&'a [T]);
 
impl<T: Serialize> Serialize for ItemsWrapper<'_, T> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(self.0.iter())
    }
}

Combine collect_seq with struct serialization for complex types.

Common Patterns and Best Practices

use serde::{Serialize, Serializer};
 
// Pattern 1: Simple wrapper
struct Wrapper<T>(T);
 
impl<T: Serialize + Clone + IntoIterator> Serialize for Wrapper<T>
where
    <T as IntoIterator>::Item: Serialize,
{
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(self.0.clone())
    }
}
 
// Pattern 2: Calculated values
struct Range {
    start: i32,
    end: i32,
}
 
impl Serialize for Range {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(self.start..self.end)
    }
}
 
// Pattern 3: Transform external data
struct ExternalIds<'a> {
    ids: &'a [String],
    prefix: &'a str,
}
 
impl Serialize for ExternalIds<'_> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(
            self.ids.iter().map(|id| format!("{}:{}", self.prefix, id))
        )
    }
}
 
// Pattern 4: Conditional inclusion
struct OptionalItems<'a, T> {
    items: &'a [T],
    include: bool,
}
 
impl<T: Serialize> Serialize for OptionalItems<'_, T> {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        if self.include {
            serializer.collect_seq(self.items.iter())
        } else {
            serializer.collect_seq(std::iter::empty::<&T>())
        }
    }
}

Various patterns leverage collect_seq for different serialization needs.

Synthesis

Quick reference:

use serde::{Serialize, Serializer};
 
// Basic usage: Serialize an iterator directly
impl Serialize for MyType {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(self.values.iter())
    }
}
 
// With transformation
impl Serialize for Doubled {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(self.values.iter().map(|x| x * 2))
    }
}
 
// With filtering
impl Serialize for Positives {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(self.values.iter().filter(|x| **x > 0))
    }
}
 
// Generated sequence
impl Serialize for Sequence {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: Serializer,
    {
        serializer.collect_seq(0..self.count)
    }
}
 
// Key benefits:
// 1. No intermediate Vec allocation
// 2. Memory efficient for large sequences
// 3. Lazy evaluation - compute during serialization
// 4. Works with any IntoIterator
 
// Compare to alternative:
// let vec: Vec<_> = iter.collect();  // Allocates!
// vec.serialize(serializer)
 
// Serializer default implementation:
// fn collect_seq<I>(self, iter: I) -> Result<...>
// where I: IntoIterator, I::Item: Serialize
// {
//     let mut seq = self.serialize_seq(None)?;
//     for item in iter {
//         seq.serialize_element(&item)?;
//     }
//     seq.end()
// }

Key insight: collect_seq bridges the gap between Rust's iterator-centric design and serde's serialization model. Without it, you'd be forced to collect every iterator into a Vec before serializing—defeating the memory efficiency of lazy iteration. With collect_seq, serializers consume iterators directly, generating and writing each element without buffering the entire sequence. This is essential for serializing large datasets, generated sequences, or transformed data where intermediate allocation would be wasteful or impossible. The method handles the standard serialize_seq pattern internally, so you get clean code without sacrificing control.