The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.
SafeMapper manages sessions and checkpoints automatically, but provides tools for inspection and control when needed. This guide covers configuration, session management, and cleanup.
# View current defaults by calling with no arguments behavior
# Default configuration:
# - batch_size = 100
# - retry_attempts = 3
# - auto_recover = TRUE
# Customize settings
s_configure(
batch_size = 50, # Items per checkpoint
retry_attempts = 5, # Retries for failed batches
auto_recover = TRUE # Enable automatic recovery
)┌─────────────────────────────────────────────────────────────────────────────┐
│ Configuration Options │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ batch_size (default: 100) │
│ ┌───────────────────────────────────────────────────────────────────┐ │
│ │ Controls how often checkpoints are saved │ │
│ │ │ │
│ │ Small batch_size (10-50): │ │
│ │ ✅ More frequent saves, less work lost on failure │ │
│ │ ❌ Higher I/O overhead │ │
│ │ Use for: Slow operations, unstable environments │ │
│ │ │ │
│ │ Large batch_size (200-500): │ │
│ │ ✅ Less I/O overhead, faster overall │ │
│ │ ❌ More work lost on failure │ │
│ │ Use for: Fast operations, stable environments │ │
│ └───────────────────────────────────────────────────────────────────┘ │
│ │
│ retry_attempts (default: 3) │
│ ┌───────────────────────────────────────────────────────────────────┐ │
│ │ How many times to retry a failed batch │ │
│ │ │ │
│ │ Low (1-2): For persistent errors (local computation) │ │
│ │ Medium (3-5): For transient errors (network, APIs) │ │
│ │ High (5-10): For very unreliable operations │ │
│ └───────────────────────────────────────────────────────────────────┘ │
│ │
│ auto_recover (default: TRUE) │
│ ┌───────────────────────────────────────────────────────────────────┐ │
│ │ Whether to automatically resume from checkpoints │ │
│ │ │ │
│ │ TRUE: Re-running same code resumes automatically │ │
│ │ FALSE: Always start fresh (useful for debugging) │ │
│ └───────────────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
# For API calls (slow, potentially unstable)
s_configure(
batch_size = 20, # Save frequently
retry_attempts = 5 # Handle transient errors
)
# For local computation (fast, stable)
s_configure(
batch_size = 500, # Reduce I/O overhead
retry_attempts = 1 # Errors are usually persistent
)
# For development/debugging
s_configure(
batch_size = 10, # Easy to test recovery
retry_attempts = 1, # Fail fast
auto_recover = FALSE # Start fresh each run
)
# Reset to defaults
s_configure()┌─────────────────────────────────────────────────────────────────────────────┐
│ Session ID Strategies │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ Automatic (Default) │
│ ┌───────────────────────────────────────────────────────────────────┐ │
│ │ session_id = fingerprint(data_characteristics) │ │
│ │ │ │
│ │ Pros: │ │
│ │ ✅ Zero configuration needed │ │
│ │ ✅ Same data automatically resumes │ │
│ │ ✅ Different data gets different session │ │
│ │ │ │
│ │ Cons: │ │
│ │ ⚠️ Similar data might share session unexpectedly │ │
│ │ ⚠️ Code changes don't create new session │ │
│ └───────────────────────────────────────────────────────────────────┘ │
│ │
│ Manual (Explicit .session_id) │
│ ┌───────────────────────────────────────────────────────────────────┐ │
│ │ session_id = "my_custom_id_v2" │ │
│ │ │ │
│ │ Pros: │ │
│ │ ✅ Full control over session identity │ │
│ │ ✅ Can version your computations │ │
│ │ ✅ Predictable behavior │ │
│ │ │ │
│ │ Cons: │ │
│ │ ⚠️ Must manage IDs yourself │ │
│ │ ⚠️ Must remember to update ID when needed │ │
│ └───────────────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
# Define example data and functions
data <- 1:20
algo_v1 <- function(x) x^2
algo_v2 <- function(x) x^3
# Scenario 1: Versioned computation
result_v1 <- s_map(data, algo_v1, .session_id = "analysis_v1")
#> [5%] Processing items 1-20 of 20
#> Completed 20 items
result_v2 <- s_map(data, algo_v2, .session_id = "analysis_v2")
#> [5%] Processing items 1-20 of 20
#> Completed 20 items
# Scenario 2: Named experiments
func <- function(x) x * 2
result_a <- s_map(data, func, .session_id = "experiment_baseline")
#> [5%] Processing items 1-20 of 20
#> Completed 20 items
result_b <- s_map(data, func, .session_id = "experiment_treatment")
#> [5%] Processing items 1-20 of 20
#> Completed 20 items
# Scenario 3: Date-based sessions
today <- format(Sys.Date(), "%Y%m%d")
result <- s_map(data, func, .session_id = paste0("daily_job_", today))
#> [5%] Processing items 1-20 of 20
#> Completed 20 items┌─────────────────────────────────────────────────────────────────────────────┐
│ Checkpoint File Contents │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ checkpoint_file.rds │
│ │ │
│ ├── results │
│ │ └── List of computed results (up to completed items) │
│ │ │
│ └── metadata │
│ ├── session_id : "map_abc123def456" │
│ ├── total_items : 1000 │
│ ├── completed_items : 500 │
│ ├── mode : "map" │
│ ├── created : "2026-01-23 10:30:00" │
│ └── last_updated : "2026-01-23 10:45:00" │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
# Clean by age
s_clean_sessions(older_than_days = 30) # Remove sessions > 30 days old
# Clean specific sessions
s_clean_sessions(session_ids = c("old_experiment", "failed_job"))
# Clean by status
s_clean_sessions(status_filter = "failed") # Only failed sessions
s_clean_sessions(status_filter = "corrupted") # Only corrupted sessions┌─────────────────────────────────────────────────────────────────────────────┐
│ Checkpoint Lifecycle │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ 1. Creation │
│ s_map() starts ──► Checkpoint created │
│ │
│ 2. Updates │
│ Each batch complete ──► Checkpoint updated │
│ │
│ 3. Automatic Deletion (on success) │
│ All items complete ──► Checkpoint deleted automatically │
│ │
│ 4. Manual Cleanup (for interrupted sessions) │
│ Task abandoned ──► Use s_clean_sessions() │
│ │
│ Recommended Schedule: │
│ ┌───────────────────────────────────────────────────────────────────┐ │
│ │ Daily: s_clean_sessions(older_than_days = 7) │ │
│ │ Weekly: s_clean_sessions(status_filter = "failed") │ │
│ │ Monthly: Check total disk usage of checkpoint directory │ │
│ └───────────────────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
# daily_job.R
library(SafeMapper)
# Configure for production
s_configure(
batch_size = 100,
retry_attempts = 3
)
# Use date-based session ID for predictable behavior
job_id <- paste0("daily_process_", format(Sys.Date(), "%Y%m%d"))
# Run the job
results <- s_map(
large_dataset,
process_record,
.session_id = job_id
)
# Clean up old sessions at the end
s_clean_sessions(older_than_days = 7)# development.R
library(SafeMapper)
# Configure for debugging
s_configure(
batch_size = 10,
retry_attempts = 1,
auto_recover = FALSE # Always start fresh
)
# Test with small dataset
test_data <- head(full_data, 50)
# Run and iterate
results <- s_map(test_data, my_function)
# When ready for production, change config
s_configure(
batch_size = 100,
retry_attempts = 3,
auto_recover = TRUE
)┌─────────────────────────────────────────────────────────────────────────────┐
│ Session Management Best Practices │
├─────────────────────────────────────────────────────────────────────────────┤
│ │
│ 1. Configuration │
│ ├── Set config at script start for consistency │
│ ├── Use environment-specific configs (dev/prod) │
│ └── Document your configuration choices │
│ │
│ 2. Session IDs │
│ ├── Use auto-generated IDs for one-off scripts │
│ ├── Use manual IDs for repeatable jobs │
│ ├── Include version numbers when algorithms change │
│ └── Include dates for time-sensitive jobs │
│ │
│ 3. Cleanup │
│ ├── Run cleanup regularly (daily or weekly) │
│ ├── Monitor checkpoint directory size │
│ ├── Clean failed sessions after investigation │
│ └── Don't clean sessions you might need to resume │
│ │
│ 4. Monitoring │
│ ├── Use session IDs that are easy to identify │
│ ├── Log session IDs for tracking │
│ └── Set up alerts for sessions that never complete │
│ │
└─────────────────────────────────────────────────────────────────────────────┘
# Possible causes:
# 1. auto_recover is FALSE
s_configure(auto_recover = TRUE)
# 2. Data changed (different fingerprint)
# Use explicit session_id to force same session
result <- s_map(data, func, .session_id = "fixed_session")
# 3. Checkpoint was deleted
# Check if file exists in checkpoint directoryThese binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.