Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/lerobot/configs/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,14 @@ class TrainPipelineConfig(HubMixin):
batch_size: int = 8
prefetch_factor: int = 4
persistent_workers: bool = True
# DataLoader multiprocessing start method. "spawn" is the safe default on
# Linux because workers do not inherit fork-time state from the parent —
# "fork" can crash with non-fork-safe libraries that the parent has loaded
# (e.g. PyAV / torchcodec / ffmpeg) with errors like
# `multiprocessing.context.AuthenticationError: digest received was wrong`,
# `Pin memory thread exited unexpectedly`, or random worker segfaults.
# See https://github.com/huggingface/lerobot/issues/2488.
dataloader_multiprocessing_context: str = "spawn"
steps: int = 100_000
eval_freq: int = 20_000
log_freq: int = 200
Expand Down
1 change: 1 addition & 0 deletions src/lerobot/scripts/lerobot_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,6 +411,7 @@ def train(cfg: TrainPipelineConfig, accelerator: "Accelerator | None" = None):
drop_last=False,
prefetch_factor=cfg.prefetch_factor if cfg.num_workers > 0 else None,
persistent_workers=cfg.persistent_workers and cfg.num_workers > 0,
multiprocessing_context=cfg.dataloader_multiprocessing_context if cfg.num_workers > 0 else None,
)

# Prepare everything with accelerator
Expand Down