Memory Management and Workflow Tools

The hardware and bandwidth for this mirror is donated by METANET, the Webhosting and Full Service-Cloud Provider.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]metanet.ch.

Introduction

gooseR’s memory system revolutionizes how you manage R objects and workflows. Save any R object - data frames, models, lists, plots - and retrieve them instantly, even across sessions. Combined with workflow tools, you can seamlessly continue work across days, hand off projects, and maintain organized research.

Memory Basics

Saving Objects

# Save any R object
my_model <- lm(mpg ~ wt + cyl, data = mtcars)

goose_save(
  my_model,
  category = "models",
  tags = c("mtcars", "regression", "fuel_efficiency")
)

# Save data frames
clean_data <- mtcars %>%
  filter(mpg > 20) %>%
  select(mpg, wt, cyl, hp)

goose_save(
  clean_data,
  category = "datasets",
  tags = c("filtered", "mtcars", "high_mpg")
)

# Save multiple objects at once
results <- list(
  model = my_model,
  data = clean_data,
  summary = summary(my_model)
)

goose_save(
  results,
  category = "analysis",
  tags = c("complete_analysis", "2024_q4")
)

Retrieving Objects

# List what's saved
goose_list()

# List by category
goose_list(category = "models")

# List by tags
goose_list(tags = "mtcars")

# Load an object
my_saved_model <- goose_load("my_model")

# Load with full metadata
obj_with_meta <- goose_load("my_model", include_metadata = TRUE)
print(obj_with_meta$metadata$created_at)

Organization with Categories and Tags

# Categories: Broad classifications
# - "models", "datasets", "plots", "reports", "temp"

# Tags: Specific descriptors
# - "production", "test", "client_a", "2024_q4", "regression"

# Example: Organizing a project
goose_save(raw_data, category = "datasets", tags = c("raw", "client_a", "2024"))
goose_save(clean_data, category = "datasets", tags = c("clean", "client_a", "2024"))
goose_save(model_v1, category = "models", tags = c("v1", "client_a", "baseline"))
goose_save(model_v2, category = "models", tags = c("v2", "client_a", "improved"))
goose_save(final_plot, category = "plots", tags = c("final", "client_a", "presentation"))

Bulk Operations

Backup and Restore

# Backup everything before major changes
goose_backup()
# Creates timestamped backup: backup_20241204_143022

# Work on your analysis...
# If something goes wrong:

# List available backups
list.files(path = "~/.config/goose/memory/backups")

# Restore from backup
goose_restore("backup_20241204_143022")

Cleaning by Tags

# Remove all test objects
goose_clear_tags(c("test", "temp"))

# Remove draft versions
goose_clear_tags("draft")

# Clean up after experimentation
goose_clear_tags(c("experiment", "sandbox"))

Session Management

# Work in a temporary session that auto-cleans
with_goose_session({
  
  # Experimental work
  test_data <- mtcars %>% 
    mutate(mpg_squared = mpg^2)
  
  test_model <- lm(mpg_squared ~ wt + cyl, data = test_data)
  
  # Save temporarily
  goose_save(test_model, category = "session_temp", tags = "experiment")
  
  # Do analysis
  print(summary(test_model))
  
}, cleanup = TRUE)  # Everything in session_temp is deleted after

# For persistent session work
with_goose_session({
  
  # Production work
  final_model <- lm(mpg ~ wt + cyl + hp, data = mtcars)
  goose_save(final_model, category = "production", tags = "final")
  
}, cleanup = FALSE)  # Keeps everything

Workflow Tools

Sharing Data Context

# Load your data
my_data <- read.csv("complex_dataset.csv")

# Share a sample with goose for context
goose_give_sample(my_data, n = 10)

# Now goose understands your data structure
advice <- goose_ask("What's the best way to handle the missing values in this dataset?")

Getting Analysis Plans

# Share your data first
goose_give_sample(my_data)

# Get an exploratory analysis plan
exploratory_plan <- goose_make_a_plan("exploratory")
cat(exploratory_plan)

# Output:
# Based on your data structure, here's an exploratory analysis plan:
# 
# 1. Data Overview
#    - Check dimensions: 1000 rows × 15 columns
#    - Examine variable types
#    - Missing value analysis
# 
# 2. Univariate Analysis
#    - Distribution of continuous variables
#    - Frequency tables for categorical
# ...

# Get a predictive modeling plan
predictive_plan <- goose_make_a_plan("predictive")

# Get a diagnostic plan
diagnostic_plan <- goose_make_a_plan("diagnostic")

Creating Handoffs

# After completing analysis
results <- list(
  model = final_model,
  performance = model_metrics,
  plots = list(residual_plot, prediction_plot)
)

# Create comprehensive handoff documentation
handoff <- goose_handoff()

# This generates:
# - Summary of work completed
# - Key findings
# - Code snippets for reproduction
# - List of saved objects
# - Next steps recommendations

# Save the handoff
writeLines(handoff, "project_handoff.md")

Continuation Prompts

# At the end of your work session
goose_continuation_prompt()

# This creates a prompt you can use tomorrow:
# "Continue analysis of customer churn model. Last session:
#  - Completed data cleaning (saved as 'clean_data')
#  - Built baseline model (saved as 'baseline_model', AUC=0.72)
#  - Identified class imbalance issue
#  Next: Try SMOTE for balancing, feature engineering on date fields"

# Save it
prompt <- goose_continuation_prompt()
writeLines(prompt, paste0("continue_", Sys.Date(), ".txt"))

Session Summaries

# Summarize what you've done
summary <- goose_summarize_session()
cat(summary)

# Output:
# Session Summary - 2024-12-04
# 
# Objects Created:
# - clean_data (datasets): 5000 rows × 12 columns
# - model_v1 (models): Linear regression, R² = 0.84
# - model_v2 (models): Random forest, R² = 0.91
# - comparison_plot (plots): Model comparison visualization
# 
# Key Activities:
# - Data cleaning and preprocessing
# - Feature engineering (3 new features)
# - Model comparison (linear vs. tree-based)
# 
# Recommendations:
# - model_v2 shows better performance
# - Consider ensemble approach
# - Validate on holdout set

Real-World Workflow Examples

Daily Analysis Workflow

# Morning: Continue from yesterday
yesterday_prompt <- readLines("continue_2024-12-03.txt")
cat(yesterday_prompt)

# Load saved objects
my_data <- goose_load("clean_data")
my_model <- goose_load("baseline_model")

# Work on improvements
improved_model <- improve_model(my_model, my_data)

# Get feedback
goose_honk(severity = "moderate")

# Save progress
goose_save(improved_model, category = "models", tags = c("improved", "day2"))

# End of day
goose_continuation_prompt()
goose_summarize_session()

Team Collaboration Workflow

# Team member A completes initial analysis
model_a <- build_model(data)
goose_save(model_a, category = "models", tags = c("team_a", "initial"))

# Create handoff for Team member B
handoff <- goose_handoff()
writeLines(handoff, "handoff_to_team_b.md")

# Team member B picks up work
goose_list(tags = "team_a")
model_a <- goose_load("model_a")

# Continue work
model_b <- enhance_model(model_a)
goose_save(model_b, category = "models", tags = c("team_b", "enhanced"))

Project Organization Pattern

# Structure for a complete project
project_setup <- function(project_name) {
  
  # Save raw data
  goose_save(
    raw_data,
    category = "data_raw",
    tags = c(project_name, "raw", Sys.Date())
  )
  
  # Save cleaned data
  goose_save(
    clean_data,
    category = "data_clean",
    tags = c(project_name, "clean", Sys.Date())
  )
  
  # Save models with versioning
  goose_save(
    model_v1,
    category = "models",
    tags = c(project_name, "v1", "baseline")
  )
  
  # Save visualizations
  goose_save(
    plots,
    category = "visualizations",
    tags = c(project_name, "final")
  )
  
  # Save reports
  goose_save(
    report,
    category = "reports",
    tags = c(project_name, "final", Sys.Date())
  )
}

# List everything for a project
goose_list(tags = project_name)

Advanced Memory Features

Memory Statistics

# Get memory usage statistics
stats <- goose_memory_stats()
print(stats)

# Output:
# Memory Statistics:
# Total objects: 47
# Total size: 15.3 MB
# By category:
#   - models: 12 objects (8.1 MB)
#   - datasets: 20 objects (5.2 MB)
#   - plots: 15 objects (2.0 MB)

Selective Operations

# Delete specific objects
goose_delete("old_model")

# Delete by pattern
objects <- goose_list()
old_objects <- objects %>%
  filter(grepl("^test_", name))

for(obj in old_objects$name) {
  goose_delete(obj)
}

Export and Import

# Export memory for sharing
goose_export_memory("project_memory.zip")

# On another machine
goose_import_memory("project_memory.zip")

Best Practices

Use Meaningful Names: customer_churn_model not model1
Tag Consistently: Develop a tagging convention
- Version tags: “v1”, “v2”, “final”
- Status tags: “draft”, “review”, “production”
- Time tags: “2024_q4”, “december”
Regular Backups: Before major changes or experiments
Clean Regularly: Remove test and temporary objects
Document with Handoffs: Create handoffs for future you
Use Sessions for Experiments: Isolate experimental work

Troubleshooting

Issue: Can’t Find Saved Object

# Check if it exists
all_objects <- goose_list()
grep("my_object", all_objects$name, value = TRUE)

# Check with different category
goose_list(category = "models")

# Check metadata
meta <- goose_get_metadata("object_name")

Issue: Memory Getting Large

# Check what's taking space
stats <- goose_memory_stats()

# Clean old backups
goose_clean_backups(keep_last = 3)

# Remove by tags
goose_clear_tags(c("old", "deprecated", "test"))

Conclusion

gooseR’s memory and workflow tools transform how you work in R. No more losing work, struggling to remember where you left off, or manually managing file versions. The intelligent memory system combined with workflow tools creates a seamless, productive environment for data science.

For more information about gooseR’s capabilities, see the other vignettes in the package documentation.

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.