Last updated on 2026-01-17 07:50:47 CET.
| Package | ERROR | NOTE | OK |
|---|---|---|---|
| glow | 2 | 11 | |
| qs | 9 | 4 | |
| qs2 | 8 | 5 | |
| seqtrie | 3 | 10 | |
| stringfish | 8 | 5 |
Current CRAN status: NOTE: 2, OK: 11
Version: 0.13.0
Check: installed package size
Result: NOTE
installed size is 11.5Mb
sub-directories of 1Mb or more:
doc 1.3Mb
libs 9.9Mb
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64
Current CRAN status: ERROR: 9, NOTE: 4
Version: 0.27.3
Check: compiled code
Result: WARN
File ‘qs/libs/qs.so’:
Found non-API calls to R: ‘ATTRIB’, ‘CLOENV’, ‘ENCLOS’, ‘FRAME’,
‘HASHTAB’, ‘IS_S4_OBJECT’, ‘LEVELS’, ‘OBJECT’, ‘PRENV’,
‘Rf_allocSExp’, ‘SETLEVELS’, ‘SET_ATTRIB’, ‘SET_CLOENV’,
‘SET_ENCLOS’, ‘SET_FRAME’, ‘SET_HASHTAB’, ‘SET_OBJECT’,
‘SET_PRENV’, ‘SET_S4_OBJECT’, ‘SET_TRUELENGTH’
These entry points may be removed soon:
‘SET_FRAME’, ‘SET_HASHTAB’, ‘SET_ENCLOS’, ‘SET_S4_OBJECT’, ‘FRAME’, ‘HASHTAB’, ‘IS_S4_OBJECT’, ‘CLOENV’, ‘ENCLOS’, ‘OBJECT’, ‘SET_CLOENV’, ‘LEVELS’, ‘SETLEVELS’, ‘SET_TRUELENGTH’
Compiled code should not call non-API entry points in R.
See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual,
and section ‘Moving into C API compliance’ for issues with the use of
non-API entry points.
Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [174s/198s]
Running ‘qattributes_testing.R’ [40s/48s]
Running ‘qsavemload_testing.R’ [2s/3s]
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.01839 s
strings: 1, 0.008072 s
strings: 2, 0.01173 s
strings: 4, 0.01626 s
strings: 8, 0.003859 s
strings: 31, 0.01184 s
strings: 33, 0.01285 s
strings: 32, 0.02049 s
strings: 255, 0.006437 s
strings: 257, 0.006623 s
strings: 256, 0.004237 s
strings: 65535, 0.013 s
strings: 65537, 0.00507 s
strings: 65536, 0.002029 s
strings: 1e+06, 0.04803 s
Character Vectors: 0, 0.003748 s
Character Vectors: 1, 0.0003254 s
Character Vectors: 2, 0.001876 s
Character Vectors: 4, 0.001978 s
Character Vectors: 8, 0.002263 s
Character Vectors: 31, 0.003037 s
Character Vectors: 33, 0.0002697 s
Character Vectors: 32, 0.000201 s
Character Vectors: 255, 0.002719 s
Character Vectors: 257, 0.00323 s
Character Vectors: 256, 0.001819 s
Character Vectors: 65535, 0.005419 s
Character Vectors: 65537, 0.004439 s
Character Vectors: 65536, 0.005115 s
Stringfish: 0, 0.001407 s
Stringfish: 1, 0.0001713 s
Stringfish: 2, 0.001469 s
Stringfish: 4, 0.001578 s
Stringfish: 8, 0.001775 s
Stringfish: 31, 0.0001021 s
Stringfish: 33, 0.0001938 s
Stringfish: 32, 0.002267 s
Stringfish: 255, 0.002511 s
Stringfish: 257, 0.004013 s
Stringfish: 256, 0.003525 s
Stringfish: 65535, 0.005733 s
Stringfish: 65537, 0.004328 s
Stringfish: 65536, 0.004117 s
Integers: 0, 0.01026 s
Integers: 1, 0.006976 s
Integers: 2, 0.001725 s
Integers: 4, 0.005686 s
Integers: 8, 0.005032 s
Integers: 31, 0.0006406 s
Integers: 33, 0.002851 s
Integers: 32, 0.0008873 s
Integers: 255, 0.004486 s
Integers: 257, 0.003478 s
Integers: 256, 0.005483 s
Integers: 65535, 0.002463 s
Integers: 65537, 0.01357 s
Integers: 65536, 0.02322 s
Integers: 1e+06, 0.06291 s
Numeric: 0, 0.01436 s
Numeric: 1, 0.01319 s
Numeric: 2, 0.0006604 s
Numeric: 4, 0.00541 s
Numeric: 8, 0.005254 s
Numeric: 31, 0.00342 s
Numeric: 33, 0.01085 s
Numeric: 32, 0.005772 s
Numeric: 255, 0.006672 s
Numeric: 257, 0.003254 s
Numeric: 256, 0.006108 s
Numeric: 65535, 0.04796 s
Numeric: 65537, 0.007727 s
Numeric: 65536, 0.01155 s
Numeric: 1e+06, 0.07049 s
Logical: 0, 0.007845 s
Logical: 1, 0.003068 s
Logical: 2, 0.006737 s
Logical: 4, 0.0167 s
Logical: 8, 0.003755 s
Logical: 31, 0.009884 s
Logical: 33, 0.005334 s
Logical: 32, 0.00472 s
Logical: 255, 0.001809 s
Logical: 257, 0.005332 s
Logical: 256, 0.007711 s
Logical: 65535, 0.005529 s
Logical: 65537, 0.002591 s
Logical: 65536, 0.01223 s
Logical: 1e+06, 0.07937 s
List: 0, 0.01634 s
List: 1, 0.02941 s
List: 2, 0.009864 s
List: 4, 0.008147 s
List: 8, 0.003481 s
List: 31, 0.003627 s
List: 33, 0.003339 s
List: 32, 0.005137 s
List: 255, 0.006929 s
List: 257, 0.001076 s
List: 256, 0.007838 s
List: 65535, 0.06138 s
List: 65537, 0.0421 s
List: 65536, 0.03048 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-linux-x86_64-debian-clang
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [163s/180s]
Running ‘qattributes_testing.R’ [36s/42s]
Running ‘qsavemload_testing.R’ [1s/2s]
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.003366 s
strings: 1, 0.006078 s
strings: 2, 0.002462 s
strings: 4, 0.007143 s
strings: 8, 0.006568 s
strings: 31, 0.001753 s
strings: 33, 0.0006116 s
strings: 32, 0.003418 s
strings: 255, 0.007762 s
strings: 257, 0.007367 s
strings: 256, 0.003505 s
strings: 65535, 0.001628 s
strings: 65537, 0.001234 s
strings: 65536, 0.002806 s
strings: 1e+06, 0.005042 s
Character Vectors: 0, 0.005396 s
Character Vectors: 1, 0.002526 s
Character Vectors: 2, 0.004985 s
Character Vectors: 4, 0.003548 s
Character Vectors: 8, 0.002762 s
Character Vectors: 31, 0.004007 s
Character Vectors: 33, 0.0001039 s
Character Vectors: 32, 0.006719 s
Character Vectors: 255, 0.0001298 s
Character Vectors: 257, 0.00372 s
Character Vectors: 256, 0.002715 s
Character Vectors: 65535, 0.004003 s
Character Vectors: 65537, 0.002703 s
Character Vectors: 65536, 0.004312 s
Stringfish: 0, 0.002836 s
Stringfish: 1, 0.004479 s
Stringfish: 2, 0.001024 s
Stringfish: 4, 0.002719 s
Stringfish: 8, 0.003321 s
Stringfish: 31, 0.004043 s
Stringfish: 33, 0.001406 s
Stringfish: 32, 0.006943 s
Stringfish: 255, 0.001567 s
Stringfish: 257, 0.0001264 s
Stringfish: 256, 0.006053 s
Stringfish: 65535, 0.004709 s
Stringfish: 65537, 0.004464 s
Stringfish: 65536, 0.002047 s
Integers: 0, 0.004562 s
Integers: 1, 0.006745 s
Integers: 2, 0.00537 s
Integers: 4, 0.0009223 s
Integers: 8, 0.001773 s
Integers: 31, 0.00795 s
Integers: 33, 0.004393 s
Integers: 32, 0.003989 s
Integers: 255, 0.005397 s
Integers: 257, 0.001542 s
Integers: 256, 0.01382 s
Integers: 65535, 0.002929 s
Integers: 65537, 0.003344 s
Integers: 65536, 0.005255 s
Integers: 1e+06, 0.1002 s
Numeric: 0, 0.004781 s
Numeric: 1, 0.005236 s
Numeric: 2, 0.009243 s
Numeric: 4, 0.003752 s
Numeric: 8, 0.006089 s
Numeric: 31, 0.007162 s
Numeric: 33, 0.0041 s
Numeric: 32, 0.0005991 s
Numeric: 255, 0.003393 s
Numeric: 257, 0.007607 s
Numeric: 256, 0.00305 s
Numeric: 65535, 0.03274 s
Numeric: 65537, 0.002001 s
Numeric: 65536, 0.02028 s
Numeric: 1e+06, 0.0414 s
Logical: 0, 0.001272 s
Logical: 1, 0.004496 s
Logical: 2, 0.007358 s
Logical: 4, 0.004575 s
Logical: 8, 0.007692 s
Logical: 31, 0.004639 s
Logical: 33, 0.00254 s
Logical: 32, 0.009129 s
Logical: 255, 0.007413 s
Logical: 257, 0.002104 s
Logical: 256, 0.002505 s
Logical: 65535, 0.005103 s
Logical: 65537, 0.0006406 s
Logical: 65536, 0.006392 s
Logical: 1e+06, 0.06665 s
List: 0, 0.009021 s
List: 1, 0.009541 s
List: 2, 0.004886 s
List: 4, 0.003415 s
List: 8, 0.004859 s
List: 31, 0.00671 s
List: 33, 0.001373 s
List: 32, 0.003241 s
List: 255, 0.01052 s
List: 257, 0.00526 s
List: 256, 0.008909 s
List: 65535, 0.01696 s
List: 65537, 0.01381 s
List: 65536, 0.03153 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-linux-x86_64-debian-gcc
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [250s/225s]
Running ‘qattributes_testing.R’ [51s/49s]
Running ‘qsavemload_testing.R’
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.009356 s
strings: 1, 0.002449 s
strings: 2, 0.002229 s
strings: 4, 0.000997 s
strings: 8, 0.002572 s
strings: 31, 0.001246 s
strings: 33, 0.001198 s
strings: 32, 0.002173 s
strings: 255, 0.003878 s
strings: 257, 0.001953 s
strings: 256, 0.005335 s
strings: 65535, 0.003488 s
strings: 65537, 0.003043 s
strings: 65536, 0.003477 s
strings: 1e+06, 0.01308 s
Character Vectors: 0, 0.002339 s
Character Vectors: 1, 0.0002013 s
Character Vectors: 2, 0.0009305 s
Character Vectors: 4, 0.0008303 s
Character Vectors: 8, 0.0007999 s
Character Vectors: 31, 0.001693 s
Character Vectors: 33, 0.0009114 s
Character Vectors: 32, 0.0002013 s
Character Vectors: 255, 0.002958 s
Character Vectors: 257, 0.001645 s
Character Vectors: 256, 0.001059 s
Character Vectors: 65535, 0.003764 s
Character Vectors: 65537, 0.003825 s
Character Vectors: 65536, 0.003812 s
Stringfish: 0, 0.00225 s
Stringfish: 1, 0.002262 s
Stringfish: 2, 0.001155 s
Stringfish: 4, 0.001724 s
Stringfish: 8, 0.004565 s
Stringfish: 31, 0.001398 s
Stringfish: 33, 0.000667 s
Stringfish: 32, 0.001145 s
Stringfish: 255, 0.0001657 s
Stringfish: 257, 0.001605 s
Stringfish: 256, 0.001853 s
Stringfish: 65535, 0.005438 s
Stringfish: 65537, 0.004375 s
Stringfish: 65536, 0.004673 s
Integers: 0, 0.001083 s
Integers: 1, 0.002678 s
Integers: 2, 0.005478 s
Integers: 4, 0.002213 s
Integers: 8, 0.001302 s
Integers: 31, 0.001575 s
Integers: 33, 0.003069 s
Integers: 32, 0.002477 s
Integers: 255, 0.00388 s
Integers: 257, 0.005076 s
Integers: 256, 0.001208 s
Integers: 65535, 0.005897 s
Integers: 65537, 0.01503 s
Integers: 65536, 0.01505 s
Integers: 1e+06, 0.01591 s
Numeric: 0, 0.00152 s
Numeric: 1, 0.002921 s
Numeric: 2, 0.001574 s
Numeric: 4, 0.001808 s
Numeric: 8, 0.003116 s
Numeric: 31, 0.003723 s
Numeric: 33, 0.002097 s
Numeric: 32, 0.0006739 s
Numeric: 255, 0.003141 s
Numeric: 257, 0.001247 s
Numeric: 256, 0.002273 s
Numeric: 65535, 0.02083 s
Numeric: 65537, 0.0081 s
Numeric: 65536, 0.004382 s
Numeric: 1e+06, 0.05257 s
Logical: 0, 0.003636 s
Logical: 1, 0.001219 s
Logical: 2, 0.005997 s
Logical: 4, 0.002916 s
Logical: 8, 0.003631 s
Logical: 31, 0.002444 s
Logical: 33, 0.003901 s
Logical: 32, 0.002308 s
Logical: 255, 0.00233 s
Logical: 257, 0.00264 s
Logical: 256, 0.004361 s
Logical: 65535, 0.01185 s
Logical: 65537, 0.01826 s
Logical: 65536, 0.008167 s
Logical: 1e+06, 0.07363 s
List: 0, 0.001513 s
List: 1, 0.0008422 s
List: 2, 0.002466 s
List: 4, 0.002926 s
List: 8, 0.001764 s
List: 31, 0.00161 s
List: 33, 0.003572 s
List: 32, 0.001719 s
List: 255, 0.002041 s
List: 257, 0.002272 s
List: 256, 0.002196 s
List: 65535, 0.03369 s
List: 65537, 0.03046 s
List: 65536, 0.05288 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-linux-x86_64-fedora-clang
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [206s/174s]
Running ‘qattributes_testing.R’ [38s/36s]
Running ‘qsavemload_testing.R’
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.01194 s
strings: 1, 0.002643 s
strings: 2, 0.007076 s
strings: 4, 0.003403 s
strings: 8, 0.001719 s
strings: 31, 0.001956 s
strings: 33, 0.001135 s
strings: 32, 0.00106 s
strings: 255, 0.001057 s
strings: 257, 0.0008467 s
strings: 256, 0.001502 s
strings: 65535, 0.0009313 s
strings: 65537, 0.00268 s
strings: 65536, 0.002145 s
strings: 1e+06, 0.004885 s
Character Vectors: 0, 0.001691 s
Character Vectors: 1, 0.0006568 s
Character Vectors: 2, 0.0001735 s
Character Vectors: 4, 0.0001791 s
Character Vectors: 8, 0.000978 s
Character Vectors: 31, 0.0009282 s
Character Vectors: 33, 0.0003964 s
Character Vectors: 32, 0.001456 s
Character Vectors: 255, 0.0002793 s
Character Vectors: 257, 0.0009149 s
Character Vectors: 256, 0.0006732 s
Character Vectors: 65535, 0.003101 s
Character Vectors: 65537, 0.003297 s
Character Vectors: 65536, 0.003144 s
Stringfish: 0, 0.0005085 s
Stringfish: 1, 0.0004885 s
Stringfish: 2, 0.0002534 s
Stringfish: 4, 0.0001135 s
Stringfish: 8, 0.000385 s
Stringfish: 31, 0.0008083 s
Stringfish: 33, 0.0001378 s
Stringfish: 32, 0.0008851 s
Stringfish: 255, 0.0002503 s
Stringfish: 257, 0.0003552 s
Stringfish: 256, 0.0003926 s
Stringfish: 65535, 0.002723 s
Stringfish: 65537, 0.002931 s
Stringfish: 65536, 0.003515 s
Integers: 0, 0.002813 s
Integers: 1, 0.002871 s
Integers: 2, 0.0008322 s
Integers: 4, 0.002346 s
Integers: 8, 0.001182 s
Integers: 31, 0.001842 s
Integers: 33, 0.001729 s
Integers: 32, 0.001899 s
Integers: 255, 0.00248 s
Integers: 257, 0.002224 s
Integers: 256, 0.0008977 s
Integers: 65535, 0.006918 s
Integers: 65537, 0.008868 s
Integers: 65536, 0.007175 s
Integers: 1e+06, 0.05394 s
Numeric: 0, 0.002302 s
Numeric: 1, 0.0009944 s
Numeric: 2, 0.001397 s
Numeric: 4, 0.0006198 s
Numeric: 8, 0.001272 s
Numeric: 31, 0.001344 s
Numeric: 33, 0.0009165 s
Numeric: 32, 0.00151 s
Numeric: 255, 0.001325 s
Numeric: 257, 0.001497 s
Numeric: 256, 0.001297 s
Numeric: 65535, 0.005763 s
Numeric: 65537, 0.02266 s
Numeric: 65536, 0.003875 s
Numeric: 1e+06, 0.03541 s
Logical: 0, 0.001405 s
Logical: 1, 0.001444 s
Logical: 2, 0.001954 s
Logical: 4, 0.002081 s
Logical: 8, 0.0009522 s
Logical: 31, 0.001546 s
Logical: 33, 0.001771 s
Logical: 32, 0.001382 s
Logical: 255, 0.001244 s
Logical: 257, 0.00146 s
Logical: 256, 0.004053 s
Logical: 65535, 0.008552 s
Logical: 65537, 0.01698 s
Logical: 65536, 0.0465 s
Logical: 1e+06, 0.01381 s
List: 0, 0.001939 s
List: 1, 0.001472 s
List: 2, 0.001095 s
List: 4, 0.001241 s
List: 8, 0.002116 s
List: 31, 0.001408 s
List: 33, 0.0009129 s
List: 32, 0.0007467 s
List: 255, 0.001243 s
List: 257, 0.00207 s
List: 256, 0.00139 s
List: 65535, 0.03636 s
List: 65537, 0.03048 s
List: 65536, 0.02125 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-linux-x86_64-fedora-gcc
Version: 0.27.3
Check: compiled code
Result: WARN
File 'qs/libs/x64/qs.dll':
Found non-API calls to R: 'ATTRIB', 'CLOENV', 'ENCLOS', 'FRAME',
'HASHTAB', 'IS_S4_OBJECT', 'LEVELS', 'OBJECT', 'PRENV',
'Rf_allocSExp', 'SETLEVELS', 'SET_ATTRIB', 'SET_CLOENV',
'SET_ENCLOS', 'SET_FRAME', 'SET_HASHTAB', 'SET_OBJECT',
'SET_PRENV', 'SET_S4_OBJECT', 'SET_TRUELENGTH'
These entry points may be removed soon:
'SET_FRAME', 'SET_HASHTAB', 'SET_ENCLOS', 'SET_S4_OBJECT', 'FRAME', 'HASHTAB', 'IS_S4_OBJECT', 'CLOENV', 'ENCLOS', 'OBJECT', 'SET_CLOENV', 'LEVELS', 'SETLEVELS', 'SET_TRUELENGTH'
Compiled code should not call non-API entry points in R.
See 'Writing portable packages' in the 'Writing R Extensions' manual,
and section 'Moving into C API compliance' for issues with the use of
non-API entry points.
Flavor: r-devel-windows-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running 'correctness_testing.R' [148s]
Running 'qattributes_testing.R' [37s]
Running 'qsavemload_testing.R' [2s]
Running the tests in 'tests/qattributes_testing.R' failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.007045 s
strings: 1, 0.005368 s
strings: 2, 0.03614 s
strings: 4, 0.004735 s
strings: 8, 0.002034 s
strings: 31, 0.001958 s
strings: 33, 0.002105 s
strings: 32, 0.001634 s
strings: 255, 0.001448 s
strings: 257, 0.00773 s
strings: 256, 0.00128 s
strings: 65535, 0.001813 s
strings: 65537, 0.003859 s
strings: 65536, 0.002622 s
strings: 1e+06, 0.01715 s
Character Vectors: 0, 0.0003627 s
Character Vectors: 1, 0.001174 s
Character Vectors: 2, 0.0004533 s
Character Vectors: 4, 0.001174 s
Character Vectors: 8, 0.0005004 s
Character Vectors: 31, 0.001013 s
Character Vectors: 33, 0.0006286 s
Character Vectors: 32, 5e-04 s
Character Vectors: 255, 0.001065 s
Character Vectors: 257, 0.0004953 s
Character Vectors: 256, 0.000275 s
Character Vectors: 65535, 0.003052 s
Character Vectors: 65537, 0.004204 s
Character Vectors: 65536, 0.003428 s
Stringfish: 0, 0.0006653 s
Stringfish: 1, 0.000616 s
Stringfish: 2, 0.0004481 s
Stringfish: 4, 0.005405 s
Stringfish: 8, 0.000869 s
Stringfish: 31, 0.0007344 s
Stringfish: 33, 0.001042 s
Stringfish: 32, 0.0001877 s
Stringfish: 255, 0.002173 s
Stringfish: 257, 0.00203 s
Stringfish: 256, 0.00226 s
Stringfish: 65535, 0.004204 s
Stringfish: 65537, 0.004292 s
Stringfish: 65536, 0.004566 s
Integers: 0, 0.0106 s
Integers: 1, 0.001837 s
Integers: 2, 0.002172 s
Integers: 4, 0.003867 s
Integers: 8, 0.008659 s
Integers: 31, 0.002925 s
Integers: 33, 0.001387 s
Integers: 32, 0.002219 s
Integers: 255, 0.001377 s
Integers: 257, 0.001872 s
Integers: 256, 0.003394 s
Integers: 65535, 0.0112 s
Integers: 65537, 0.007672 s
Integers: 65536, 0.007096 s
Integers: 1e+06, 0.02508 s
Numeric: 0, 0.004996 s
Numeric: 1, 0.001698 s
Numeric: 2, 0.003982 s
Numeric: 4, 0.003566 s
Numeric: 8, 0.003144 s
Numeric: 31, 0.002834 s
Numeric: 33, 0.002203 s
Numeric: 32, 0.0116 s
Numeric: 255, 0.004314 s
Numeric: 257, 0.001745 s
Numeric: 256, 0.0009637 s
Numeric: 65535, 0.005668 s
Numeric: 65537, 0.01298 s
Numeric: 65536, 0.02057 s
Numeric: 1e+06, 0.1287 s
Logical: 0, 0.00346 s
Logical: 1, 0.002184 s
Logical: 2, 0.005274 s
Logical: 4, 0.001993 s
Logical: 8, 0.002455 s
Logical: 31, 0.00852 s
Logical: 33, 0.007839 s
Logical: 32, 0.00138 s
Logical: 255, 0.00227 s
Logical: 257, 0.002899 s
Logical: 256, 0.00185 s
Logical: 65535, 0.003061 s
Logical: 65537, 0.01665 s
Logical: 65536, 0.01143 s
Logical: 1e+06, 0.0432 s
List: 0, 0.009176 s
List: 1, 0.004033 s
List: 2, 0.002831 s
List: 4, 0.002344 s
List: 8, 0.004451 s
List: 31, 0.003965 s
List: 33, 0.001461 s
List: 32, 0.002055 s
List: 255, 0.0007574 s
List: 257, 0.002736 s
List: 256, 0.002682 s
List: 65535, 0.03116 s
List: 65537, 0.01978 s
List: 65536, 0.01712 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-devel-windows-x86_64
Version: 0.27.3
Check: compiled code
Result: NOTE
File ‘qs/libs/qs.so’:
Found non-API calls to R: ‘CLOENV’, ‘ENCLOS’, ‘FRAME’, ‘HASHTAB’,
‘IS_S4_OBJECT’, ‘LEVELS’, ‘OBJECT’, ‘PRENV’, ‘Rf_allocSExp’,
‘SETLEVELS’, ‘SET_CLOENV’, ‘SET_ENCLOS’, ‘SET_FRAME’,
‘SET_HASHTAB’, ‘SET_PRENV’, ‘SET_S4_OBJECT’, ‘SET_TRUELENGTH’
Compiled code should not call non-API entry points in R.
See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual,
and section ‘Moving into C API compliance’ for issues with the use of
non-API entry points.
Flavors: r-patched-linux-x86_64, r-release-linux-x86_64, r-release-macos-arm64, r-release-macos-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [187s/215s]
Running ‘qattributes_testing.R’ [39s/49s]
Running ‘qsavemload_testing.R’ [2s/3s]
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.008335 s
strings: 1, 0.001589 s
strings: 2, 0.005328 s
strings: 4, 0.004526 s
strings: 8, 0.009332 s
strings: 31, 0.002916 s
strings: 33, 0.003724 s
strings: 32, 0.02566 s
strings: 255, 0.02332 s
strings: 257, 0.009152 s
strings: 256, 0.01067 s
strings: 65535, 0.01152 s
strings: 65537, 0.004959 s
strings: 65536, 0.005337 s
strings: 1e+06, 0.005188 s
Character Vectors: 0, 0.002716 s
Character Vectors: 1, 0.003842 s
Character Vectors: 2, 0.001494 s
Character Vectors: 4, 0.002481 s
Character Vectors: 8, 0.003608 s
Character Vectors: 31, 0.001442 s
Character Vectors: 33, 0.0003881 s
Character Vectors: 32, 0.002203 s
Character Vectors: 255, 0.001411 s
Character Vectors: 257, 0.001467 s
Character Vectors: 256, 0.002894 s
Character Vectors: 65535, 0.00561 s
Character Vectors: 65537, 0.004125 s
Character Vectors: 65536, 0.00882 s
Stringfish: 0, 0.003949 s
Stringfish: 1, 0.0001089 s
Stringfish: 2, 0.004158 s
Stringfish: 4, 0.001421 s
Stringfish: 8, 0.000139 s
Stringfish: 31, 0.001419 s
Stringfish: 33, 0.000296 s
Stringfish: 32, 0.0005629 s
Stringfish: 255, 0.0002111 s
Stringfish: 257, 0.001542 s
Stringfish: 256, 0.001908 s
Stringfish: 65535, 0.005994 s
Stringfish: 65537, 0.002662 s
Stringfish: 65536, 0.007634 s
Integers: 0, 0.01164 s
Integers: 1, 0.008142 s
Integers: 2, 0.003641 s
Integers: 4, 0.009088 s
Integers: 8, 0.002116 s
Integers: 31, 0.006626 s
Integers: 33, 0.004682 s
Integers: 32, 0.001446 s
Integers: 255, 0.008514 s
Integers: 257, 0.002121 s
Integers: 256, 0.01267 s
Integers: 65535, 0.01274 s
Integers: 65537, 0.003096 s
Integers: 65536, 0.01467 s
Integers: 1e+06, 0.1545 s
Numeric: 0, 0.0009878 s
Numeric: 1, 0.007404 s
Numeric: 2, 0.009199 s
Numeric: 4, 0.03211 s
Numeric: 8, 0.005347 s
Numeric: 31, 0.002715 s
Numeric: 33, 0.003168 s
Numeric: 32, 0.005318 s
Numeric: 255, 0.004453 s
Numeric: 257, 0.003457 s
Numeric: 256, 0.002395 s
Numeric: 65535, 0.01618 s
Numeric: 65537, 0.01845 s
Numeric: 65536, 0.03002 s
Numeric: 1e+06, 0.08273 s
Logical: 0, 0.01116 s
Logical: 1, 0.008653 s
Logical: 2, 0.01326 s
Logical: 4, 0.01134 s
Logical: 8, 0.02738 s
Logical: 31, 0.003159 s
Logical: 33, 0.002186 s
Logical: 32, 0.005624 s
Logical: 255, 0.001965 s
Logical: 257, 0.002383 s
Logical: 256, 0.003695 s
Logical: 65535, 0.007632 s
Logical: 65537, 0.009059 s
Logical: 65536, 0.009762 s
Logical: 1e+06, 0.09779 s
List: 0, 0.01469 s
List: 1, 0.002958 s
List: 2, 0.01393 s
List: 4, 0.008963 s
List: 8, 0.000686 s
List: 31, 0.01696 s
List: 33, 0.003877 s
List: 32, 0.02441 s
List: 255, 0.0223 s
List: 257, 0.001793 s
List: 256, 0.007287 s
List: 65535, 0.0275 s
List: 65537, 0.03482 s
List: 65536, 0.03385 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-patched-linux-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running ‘correctness_testing.R’ [202s/215s]
Running ‘qattributes_testing.R’ [39s/50s]
Running ‘qsavemload_testing.R’ [2s/3s]
Running the tests in ‘tests/qattributes_testing.R’ failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.01837 s
strings: 1, 0.02204 s
strings: 2, 0.0189 s
strings: 4, 0.01109 s
strings: 8, 0.008238 s
strings: 31, 0.01218 s
strings: 33, 0.002444 s
strings: 32, 0.0006002 s
strings: 255, 0.006365 s
strings: 257, 0.00689 s
strings: 256, 0.01648 s
strings: 65535, 0.003615 s
strings: 65537, 0.0128 s
strings: 65536, 0.002887 s
strings: 1e+06, 0.01393 s
Character Vectors: 0, 0.002719 s
Character Vectors: 1, 0.005453 s
Character Vectors: 2, 0.003163 s
Character Vectors: 4, 0.001487 s
Character Vectors: 8, 0.003421 s
Character Vectors: 31, 0.004097 s
Character Vectors: 33, 0.006272 s
Character Vectors: 32, 0.00529 s
Character Vectors: 255, 0.002763 s
Character Vectors: 257, 0.001655 s
Character Vectors: 256, 0.0001852 s
Character Vectors: 65535, 0.006806 s
Character Vectors: 65537, 0.002654 s
Character Vectors: 65536, 0.005361 s
Stringfish: 0, 0.000152 s
Stringfish: 1, 0.002049 s
Stringfish: 2, 0.0002274 s
Stringfish: 4, 0.004925 s
Stringfish: 8, 0.003838 s
Stringfish: 31, 0.001834 s
Stringfish: 33, 0.0007126 s
Stringfish: 32, 0.00272 s
Stringfish: 255, 0.002622 s
Stringfish: 257, 0.0001994 s
Stringfish: 256, 0.004349 s
Stringfish: 65535, 0.005938 s
Stringfish: 65537, 0.005578 s
Stringfish: 65536, 0.009392 s
Integers: 0, 0.08769 s
Integers: 1, 0.03224 s
Integers: 2, 0.0169 s
Integers: 4, 0.002134 s
Integers: 8, 0.03312 s
Integers: 31, 0.008729 s
Integers: 33, 0.007833 s
Integers: 32, 0.005369 s
Integers: 255, 0.01256 s
Integers: 257, 0.004854 s
Integers: 256, 0.006501 s
Integers: 65535, 0.01974 s
Integers: 65537, 0.004936 s
Integers: 65536, 0.01949 s
Integers: 1e+06, 0.1438 s
Numeric: 0, 0.002764 s
Numeric: 1, 0.004437 s
Numeric: 2, 0.004791 s
Numeric: 4, 0.007426 s
Numeric: 8, 0.004446 s
Numeric: 31, 0.002528 s
Numeric: 33, 0.0007096 s
Numeric: 32, 0.007749 s
Numeric: 255, 0.004651 s
Numeric: 257, 0.007333 s
Numeric: 256, 0.008164 s
Numeric: 65535, 0.02643 s
Numeric: 65537, 0.01394 s
Numeric: 65536, 0.04406 s
Numeric: 1e+06, 0.3293 s
Logical: 0, 0.02515 s
Logical: 1, 0.005586 s
Logical: 2, 0.01097 s
Logical: 4, 0.006527 s
Logical: 8, 0.01524 s
Logical: 31, 0.001132 s
Logical: 33, 0.005832 s
Logical: 32, 0.009877 s
Logical: 255, 0.005539 s
Logical: 257, 0.01808 s
Logical: 256, 0.007023 s
Logical: 65535, 0.007277 s
Logical: 65537, 0.004248 s
Logical: 65536, 0.01257 s
Logical: 1e+06, 0.08052 s
List: 0, 0.00292 s
List: 1, 0.01351 s
List: 2, 0.005626 s
List: 4, 0.004891 s
List: 8, 0.008193 s
List: 31, 0.008447 s
List: 33, 0.004609 s
List: 32, 0.003241 s
List: 255, 0.007959 s
List: 257, 0.004824 s
List: 256, 0.003692 s
List: 65535, 0.05713 s
List: 65537, 0.03506 s
List: 65536, 0.03895 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-release-linux-x86_64
Version: 0.27.3
Check: compiled code
Result: NOTE
File 'qs/libs/x64/qs.dll':
Found non-API calls to R: 'CLOENV', 'ENCLOS', 'FRAME', 'HASHTAB',
'IS_S4_OBJECT', 'LEVELS', 'OBJECT', 'PRENV', 'Rf_allocSExp',
'SETLEVELS', 'SET_CLOENV', 'SET_ENCLOS', 'SET_FRAME',
'SET_HASHTAB', 'SET_PRENV', 'SET_S4_OBJECT', 'SET_TRUELENGTH'
Compiled code should not call non-API entry points in R.
See 'Writing portable packages' in the 'Writing R Extensions' manual,
and section 'Moving into C API compliance' for issues with the use of
non-API entry points.
Flavor: r-release-windows-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running 'correctness_testing.R' [151s]
Running 'qattributes_testing.R' [40s]
Running 'qsavemload_testing.R' [2s]
Running the tests in 'tests/qattributes_testing.R' failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.008561 s
strings: 1, 0.00171 s
strings: 2, 0.005249 s
strings: 4, 0.003056 s
strings: 8, 0.002309 s
strings: 31, 0.004016 s
strings: 33, 0.009999 s
strings: 32, 0.001743 s
strings: 255, 0.004769 s
strings: 257, 0.002047 s
strings: 256, 0.002198 s
strings: 65535, 0.005093 s
strings: 65537, 0.003199 s
strings: 65536, 0.002931 s
strings: 1e+06, 0.005997 s
Character Vectors: 0, 0.002318 s
Character Vectors: 1, 0.0008759 s
Character Vectors: 2, 0.001524 s
Character Vectors: 4, 0.001809 s
Character Vectors: 8, 0.0004604 s
Character Vectors: 31, 0.0006019 s
Character Vectors: 33, 0.00248 s
Character Vectors: 32, 0.0007733 s
Character Vectors: 255, 0.001336 s
Character Vectors: 257, 0.0007767 s
Character Vectors: 256, 0.002071 s
Character Vectors: 65535, 0.003541 s
Character Vectors: 65537, 0.004614 s
Character Vectors: 65536, 0.004839 s
Stringfish: 0, 0.000276 s
Stringfish: 1, 0.0005003 s
Stringfish: 2, 0.001844 s
Stringfish: 4, 0.001393 s
Stringfish: 8, 0.000484 s
Stringfish: 31, 0.000774 s
Stringfish: 33, 0.000422 s
Stringfish: 32, 0.0006622 s
Stringfish: 255, 0.0003684 s
Stringfish: 257, 0.001382 s
Stringfish: 256, 0.001017 s
Stringfish: 65535, 0.002679 s
Stringfish: 65537, 0.002414 s
Stringfish: 65536, 0.00363 s
Integers: 0, 0.003778 s
Integers: 1, 0.007276 s
Integers: 2, 0.005214 s
Integers: 4, 0.005893 s
Integers: 8, 0.001517 s
Integers: 31, 0.007046 s
Integers: 33, 0.003158 s
Integers: 32, 0.004362 s
Integers: 255, 0.001664 s
Integers: 257, 0.0008713 s
Integers: 256, 0.001821 s
Integers: 65535, 0.01888 s
Integers: 65537, 0.006801 s
Integers: 65536, 0.005494 s
Integers: 1e+06, 0.08203 s
Numeric: 0, 0.005115 s
Numeric: 1, 0.004033 s
Numeric: 2, 0.003572 s
Numeric: 4, 0.002733 s
Numeric: 8, 0.001977 s
Numeric: 31, 0.002539 s
Numeric: 33, 0.001925 s
Numeric: 32, 0.001103 s
Numeric: 255, 0.002537 s
Numeric: 257, 0.001347 s
Numeric: 256, 0.001884 s
Numeric: 65535, 0.01425 s
Numeric: 65537, 0.01205 s
Numeric: 65536, 0.0118 s
Numeric: 1e+06, 0.1535 s
Logical: 0, 0.008744 s
Logical: 1, 0.006554 s
Logical: 2, 0.002355 s
Logical: 4, 0.004763 s
Logical: 8, 0.006495 s
Logical: 31, 0.0024 s
Logical: 33, 0.002526 s
Logical: 32, 0.006323 s
Logical: 255, 0.002705 s
Logical: 257, 0.002795 s
Logical: 256, 0.002018 s
Logical: 65535, 0.01797 s
Logical: 65537, 0.02041 s
Logical: 65536, 0.01181 s
Logical: 1e+06, 0.3472 s
List: 0, 0.003481 s
List: 1, 0.00426 s
List: 2, 0.004317 s
List: 4, 0.002355 s
List: 8, 0.00327 s
List: 31, 0.001282 s
List: 33, 0.00444 s
List: 32, 0.004178 s
List: 255, 0.004183 s
List: 257, 0.006318 s
List: 256, 0.003825 s
List: 65535, 0.02745 s
List: 65537, 0.02311 s
List: 65536, 0.05368 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-release-windows-x86_64
Version: 0.27.3
Check: installed package size
Result: NOTE
installed size is 9.2Mb
sub-directories of 1Mb or more:
doc 1.1Mb
libs 7.8Mb
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64
Version: 0.27.3
Check: tests
Result: ERROR
Running 'correctness_testing.R' [212s]
Running 'qattributes_testing.R' [46s]
Running 'qsavemload_testing.R' [2s]
Running the tests in 'tests/qattributes_testing.R' failed.
Complete output:
> total_time <- Sys.time()
>
> suppressMessages(library(Rcpp))
> suppressMessages(library(dplyr))
> suppressMessages(library(data.table))
> suppressMessages(library(qs))
> suppressMessages(library(stringfish))
> options(warn = 1)
>
> do_gc <- function() {
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ gc(full = TRUE)
+ } else {
+ gc()
+ }
+ }
>
> # because sourceCpp uses setwd, we need absolute path to R_TESTS when run within R CMD check
> R_TESTS <- Sys.getenv("R_TESTS") # startup.Rs
> if (nzchar(R_TESTS)) {
+ R_TESTS_absolute <- normalizePath(R_TESTS)
+ Sys.setenv(R_TESTS = R_TESTS_absolute)
+ }
> sourceCpp(code="#include <Rcpp.h>
+ using namespace Rcpp;
+ // [[Rcpp::plugins(cpp11)]]
+ // [[Rcpp::export(rng=false)]]
+ CharacterVector splitstr(std::string x, std::vector<double> cuts){
+ CharacterVector ret(cuts.size() - 1);
+ for(uint64_t i=1; i<cuts.size(); i++) {
+ ret[i-1] = x.substr(std::round(cuts[i-1])-1, std::round(cuts[i])-std::round(cuts[i-1]));
+ }
+ return ret;
+ }
+ // [[Rcpp::export(rng=false)]]
+ int setlev(SEXP x, int i) {
+ return SETLEVELS(x,i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ void setobj(SEXP x, int i) {
+ return SET_OBJECT(x, i);
+ }
+ // [[Rcpp::export(rng=false)]]
+ List generateList(std::vector<int> list_elements){
+ auto randchar = []() -> char
+ {
+ const char charset[] =
+ \"0123456789\"
+ \"ABCDEFGHIJKLMNOPQRSTUVWXYZ\"
+ \"abcdefghijklmnopqrstuvwxyz\";
+ const size_t max_index = (sizeof(charset) - 1);
+ return charset[ rand() % max_index ];
+ };
+ List ret(list_elements.size());
+ std::string str(10,0);
+ for(size_t i=0; i<list_elements.size(); i++) {
+ switch(list_elements[i]) {
+ case 1:
+ ret[i] = R_NilValue;
+ break;
+ case 2:
+ std::generate_n( str.begin(), 10, randchar );
+ ret[i] = str;
+ break;
+ case 3:
+ ret[i] = rand();
+ break;
+ case 4:
+ ret[i] = static_cast<double>(rand());
+ break;
+ }
+ }
+ return ret;
+ }")
> if (nzchar(R_TESTS)) Sys.setenv(R_TESTS = R_TESTS)
>
> args <- commandArgs(T)
> if (nzchar(R_TESTS) || ((length(args) > 0) && args[1] == "check")) { # do fewer tests within R CMD check so it completes within a reasonable amount of time
+ reps <- 2
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6)
+ test_points_slow <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16) # for Character Vector, stringfish and list
+ max_size <- 1e6
+ } else {
+ reps <- 3
+ test_points <- c(0, 1, 2, 4, 8, 2^5 - 1, 2^5 + 1, 2^5, 2^8 - 1, 2^8 + 1, 2^8, 2^16 - 1, 2^16 + 1, 2^16, 1e6, 1e7)
+ test_points_slow <- test_points
+ max_size <- 1e7
+ }
> myfile <- tempfile()
>
> obj_size <- 0
> get_obj_size <- function() {
+ get("obj_size", envir = globalenv())
+ }
> set_obj_size <- function(x) {
+ assign("obj_size", get_obj_size() + as.numeric(object.size(x)), envir = globalenv())
+ return(get_obj_size());
+ }
> random_object_generator <- function(N, with_envs = FALSE) { # additional input: global obj_size, max_size
+ if (sample(3, 1) == 1) {
+ ret <- as.list(1:N)
+ } else if (sample(2, 1) == 1) {
+ ret <- as.pairlist(1:N)
+ } else {
+ ret <- as.pairlist(1:N)
+ setlev(ret, sample(2L^12L, 1L) - 1L)
+ setobj(ret, 1L)
+ }
+
+ for (i in 1:N) {
+ if (get_obj_size() > get("max_size", envir = globalenv())) break;
+ otype <- sample(12, size = 1)
+ z <- NULL
+ is_attribute <- ifelse(i == 1, F, sample(c(F, T), size = 1))
+ if (otype == 1) {z <- rnorm(1e4); set_obj_size(z);}
+ else if (otype == 2) { z <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (otype == 3) { z <- sample(c(T, F, NA), size = 1e4, replace = T); set_obj_size(z); }
+ else if (otype == 4) { z <- (sample(256, size = 1e4, replace = T) - 1) %>% as.raw; set_obj_size(z); }
+ else if (otype == 5) { z <- replicate(sample(1e4, size = 1), {rep(letters, length.out = sample(10, size = 1)) %>% paste(collapse = "")}); set_obj_size(z); }
+ else if (otype == 6) { z <- rep(letters, length.out = sample(1e4, size = 1)) %>% paste(collapse = ""); set_obj_size(z); }
+ else if (otype == 7) { z <- as.formula("y ~ a + b + c : d", env = globalenv()); attr(z, "blah") <- sample(1e4) - 5e2; set_obj_size(z); }
+ else if (with_envs && otype %in% c(8, 9)) { z <- function(x) {x + runif(1)} }
+ # else if(with_envs && otype %in% c(10,11)) { z <- new.env(); z$x <- random_object_generator(N, with_envs); makeActiveBinding("y", function() runif(1), z) }
+ else { z <- random_object_generator(N, with_envs) }
+ if (is_attribute) {
+ attr(ret[[i - 1]], runif(1) %>% as.character()) <- z
+ } else {
+ ret[[i]] <- z
+ }
+ }
+ return(ret)
+ }
>
> rand_strings <- function(n) {
+ s <- sample(0:100, size = n, replace = T)
+ x <- lapply(unique(s), function(si) {
+ stringfish::random_strings(sum(s == si), si, vector_mode = "normal")
+ }) %>% unlist %>% sample
+ x[sample(n, size = n/10)] <- NA
+ return(x)
+ }
>
> nested_tibble <- function() {
+ sub_tibble <- function(nr = 600, nc = 4) {
+ z <- lapply(1:nc, function(i) rand_strings(nr)) %>%
+ setNames(make.unique(paste0(sample(letters, nc), rand_strings(nc)))) %>%
+ bind_cols %>%
+ as_tibble
+ }
+ tibble(
+ col1 = rand_strings(100),
+ col2 = rand_strings(100),
+ col3 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col4 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4)),
+ col5 = lapply(1:100, function(i) sub_tibble(nr = 600, nc = 4))
+ ) %>% setNames(make.unique(paste0(sample(letters, 5), rand_strings(5))))
+ }
>
> printCarriage <- function(x) {
+ cat(x, "\r")
+ }
>
> attributes_serialize_identical <- function(attributes, full_object) {
+ identical(serialize(attributes(full_object), NULL), serialize(attributes, NULL))
+ }
>
> attributes_identical <- function(attributes, full_object) {
+ identical(attributes, attributes(full_object))
+ }
>
> ################################################################################################
>
> qsave_rand <- function(x, file) {
+ alg <- sample(c("lz4", "zstd", "lz4hc", "zstd_stream", "uncompressed"), 1)
+ # alg <- "zstd_stream"
+ nt <- sample(5,1)
+ sc <- sample(0:15,1)
+ cl <- sample(10,1)
+ ch <- sample(c(T,F),1)
+ qsave(x, file = file, preset = "custom", algorithm = alg,
+ compress_level = cl, shuffle_control = sc, nthreads = nt, check_hash = ch)
+ }
>
> qattributes_rand <- function(file) {
+ # ar <- sample(c(T,F),1)
+ # don't use altrep to avoid serialization differences
+ # attributes_serialize_identical won't pass with ALTREP
+ ar <- FALSE
+ nt <- sample(5,1)
+ qattributes(file, use_alt_rep = ar, nthreads = nt, strict = T)
+ }
>
> ################################################################################################
>
> for (q in 1:reps) {
+ cat("Rep", q, "of", reps, "\n")
+ # String correctness
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rep(letters, length.out = tp) %>% paste(collapse = "")
+ x1 <- c(NA, "", x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("strings: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Character vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ # qs_use_alt_rep(F)
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Character Vectors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # stringfish character vectors -- require R > 3.5.0
+ if (utils::compareVersion(as.character(getRversion()), "3.5.0") != -1) {
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- rep(as.raw(sample(255)), length.out = tp*10) %>% rawToChar
+ cuts <- sample(tp*10, tp + 1) %>% sort %>% as.numeric
+ x1 <- splitstr(x1, cuts)
+ x1 <- c(NA, "", x1)
+ x1 <- stringfish::convert_to_sf(x1)
+ qsave_rand(x1, file = myfile)
+ time[i] <- Sys.time()
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Stringfish: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+ }
+
+ # Integers
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- sample(1:tp, replace = T)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Integers: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Doubles
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ x1 <- rnorm(tp)
+ x1 <- c(NA, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Numeric: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Logical
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+
+ x1 <- sample(c(T, F, NA), replace = T, size = tp)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Logical: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ # List
+ time <- vector("numeric", length = 3)
+ for (tp in test_points_slow) {
+ for (i in 1:3) {
+ x1 <- generateList(sample(1:4, replace = T, size = tp))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("List: %s, %s s",tp, signif(mean(time),4)))
+ }
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.frame(str = x1,num = runif(1:1000), stringsAsFactors = F)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Data.frame test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- data.table(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_serialize_identical(z, x1))
+ }
+ cat("Data.table test")
+ cat("\n")
+
+ for (i in 1:3) {
+ x1 <- rep( replicate(1000, { rep(letters, length.out = 2^7 + sample(10, size = 1)) %>% paste(collapse = "") }), length.out = 1e6 )
+ x1 <- tibble(str = x1,num = runif(1:1e6))
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ cat("Tibble test")
+ cat("\n")
+
+ # Encoding test
+ if (Sys.info()[['sysname']] != "Windows") {
+ for (i in 1:3) {
+ x1 <- "己所不欲,勿施于人" # utf 8
+ x2 <- x1
+ Encoding(x2) <- "latin1"
+ x3 <- x1
+ Encoding(x3) <- "bytes"
+ x4 <- rep(x1, x2, length.out = 1e4) %>% paste(collapse = ";")
+ x1 <- c(x1, x2, x3, x4)
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage("Encoding test")
+ } else {
+ printCarriage("(Encoding test not run on windows)")
+ }
+ cat("\n")
+
+ # complex vectors
+ time <- vector("numeric", length = 3)
+ for (tp in test_points) {
+ for (i in 1:3) {
+ re <- rnorm(tp)
+ im <- runif(tp)
+ x1 <- complex(real = re, imaginary = im)
+ x1 <- c(NA_complex_, x1)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Complex: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # factors
+ for (tp in test_points) {
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- factor(rep(letters, length.out = tp), levels = sample(letters), ordered = TRUE)
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Factors: %s, %s s",tp, signif(mean(time), 4)))
+ }
+ cat("\n")
+
+ # Random objects
+ time <- vector("numeric", length = 8)
+ for (i in 1:8) {
+ # qs_use_alt_rep(sample(c(T, F), size = 1))
+ obj_size <- 0
+ x1 <- random_object_generator(12)
+ printCarriage(sprintf("Random objects: %s bytes", object.size(x1) %>% as.numeric))
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Random objects: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # nested attributes
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- as.list(1:26)
+ attr(x1[[26]], letters[26]) <- rnorm(100)
+ for (i in 25:1) {
+ attr(x1[[i]], letters[i]) <- x1[[i + 1]]
+ }
+ time[i] <- Sys.time()
+ for(j in 1:length(x1)) {
+ qsave_rand(x1[[j]], file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1[[j]]))
+ }
+ }
+ printCarriage(sprintf("Nested attributes: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ # alt-rep -- should serialize the unpacked object
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- 1:max_size
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ stopifnot(attributes_identical(z, x1))
+ }
+ printCarriage(sprintf("Alt rep integer: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+
+ # Environment test
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- new.env()
+ x1[["a"]] <- 1:max_size
+ x1[["b"]] <- runif(max_size)
+ x1[["c"]] <- stringfish::random_strings(1e4, vector_mode = "normal")
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z[["a"]], x1[["a"]]))
+ stopifnot(attributes_identical(z[["b"]], x1[["b"]]))
+ stopifnot(attributes_identical(z[["c"]], x1[["c"]]))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("Environment test: %s s", signif(mean(time), 4)))
+ cat("\n")
+
+ time <- vector("numeric", length = 3)
+ for (i in 1:3) {
+ x1 <- nested_tibble()
+ time[i] <- Sys.time()
+ qsave_rand(x1, file = myfile)
+ z <- qattributes_rand(file = myfile)
+ stopifnot(attributes_identical(z, x1))
+ time[i] <- Sys.time() - time[i]
+ do_gc()
+ }
+ printCarriage(sprintf("nested tibble test: %s s", signif(mean(time), 4)))
+ cat("\n")
+ }
Rep 1 of 2
strings: 0, 0.01174 s
strings: 1, 0.003947 s
strings: 2, 0.003776 s
strings: 4, 0.01145 s
strings: 8, 0.003445 s
strings: 31, 0.002419 s
strings: 33, 0.00523 s
strings: 32, 0.004462 s
strings: 255, 0.003632 s
strings: 257, 0.00594 s
strings: 256, 0.001785 s
strings: 65535, 0.005208 s
strings: 65537, 0.004357 s
strings: 65536, 0.004274 s
strings: 1e+06, 0.008871 s
Character Vectors: 0, 0.001218 s
Character Vectors: 1, 0.0005983 s
Character Vectors: 2, 0.003309 s
Character Vectors: 4, 0.001579 s
Character Vectors: 8, 0.000471 s
Character Vectors: 31, 0.001272 s
Character Vectors: 33, 0.002278 s
Character Vectors: 32, 0.00241 s
Character Vectors: 255, 0.0004561 s
Character Vectors: 257, 0.001495 s
Character Vectors: 256, 0.00061 s
Character Vectors: 65535, 0.00509 s
Character Vectors: 65537, 0.004777 s
Character Vectors: 65536, 0.003014 s
Stringfish: 0, 0.0003951 s
Stringfish: 1, 0.0004636 s
Stringfish: 2, 0.002055 s
Stringfish: 4, 0.0004384 s
Stringfish: 8, 0.0008453 s
Stringfish: 31, 0.001457 s
Stringfish: 33, 0.0004164 s
Stringfish: 32, 0.001762 s
Stringfish: 255, 0.001164 s
Stringfish: 257, 0.0004313 s
Stringfish: 256, 0.0001803 s
Stringfish: 65535, 0.003583 s
Stringfish: 65537, 0.002838 s
Stringfish: 65536, 0.004577 s
Integers: 0, 0.007214 s
Integers: 1, 0.01507 s
Integers: 2, 0.00458 s
Integers: 4, 0.002087 s
Integers: 8, 0.002797 s
Integers: 31, 0.002248 s
Integers: 33, 0.003257 s
Integers: 32, 0.001511 s
Integers: 255, 0.003347 s
Integers: 257, 0.003918 s
Integers: 256, 0.003399 s
Integers: 65535, 0.008494 s
Integers: 65537, 0.0109 s
Integers: 65536, 0.008564 s
Integers: 1e+06, 0.1231 s
Numeric: 0, 0.004419 s
Numeric: 1, 0.002441 s
Numeric: 2, 0.004929 s
Numeric: 4, 0.003651 s
Numeric: 8, 0.006104 s
Numeric: 31, 0.001874 s
Numeric: 33, 0.001147 s
Numeric: 32, 0.004684 s
Numeric: 255, 0.004772 s
Numeric: 257, 0.001331 s
Numeric: 256, 0.002012 s
Numeric: 65535, 0.002955 s
Numeric: 65537, 0.02928 s
Numeric: 65536, 0.00912 s
Numeric: 1e+06, 0.07685 s
Logical: 0, 0.007746 s
Logical: 1, 0.001581 s
Logical: 2, 0.004394 s
Logical: 4, 0.00419 s
Logical: 8, 0.005615 s
Logical: 31, 0.01182 s
Logical: 33, 0.001253 s
Logical: 32, 0.00464 s
Logical: 255, 0.004296 s
Logical: 257, 0.001352 s
Logical: 256, 0.002164 s
Logical: 65535, 0.01136 s
Logical: 65537, 0.0221 s
Logical: 65536, 0.004366 s
Logical: 1e+06, 0.04932 s
List: 0, 0.00815 s
List: 1, 0.006197 s
List: 2, 0.003685 s
List: 4, 0.003249 s
List: 8, 0.002093 s
List: 31, 0.00579 s
List: 33, 0.001918 s
List: 32, 0.003809 s
List: 255, 0.0009727 s
List: 257, 0.009939 s
List: 256, 0.006806 s
List: 65535, 0.03 s
List: 65537, 0.01992 s
List: 65536, 0.02768 s
Data.frame test
Error: attributes_serialize_identical(z, x1) is not TRUE
Execution halted
Flavor: r-oldrel-windows-x86_64
Current CRAN status: NOTE: 8, OK: 5
Version: 0.1.6
Check: compiled code
Result: NOTE
File ‘qs2/libs/qs2.so’:
Found non-API calls to R: ‘ATTRIB’, ‘SET_ATTRIB’
Compiled code should not call non-API entry points in R.
See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual,
and section ‘Moving into C API compliance’ for issues with the use of
non-API entry points.
Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc
Version: 0.1.6
Check: compiled code
Result: NOTE
File 'qs2/libs/x64/qs2.dll':
Found non-API calls to R: 'ATTRIB', 'SET_ATTRIB'
Compiled code should not call non-API entry points in R.
See 'Writing portable packages' in the 'Writing R Extensions' manual,
and section 'Moving into C API compliance' for issues with the use of
non-API entry points.
Flavor: r-devel-windows-x86_64
Version: 0.1.6
Check: installed package size
Result: NOTE
installed size is 8.8Mb
sub-directories of 1Mb or more:
libs 8.6Mb
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64
Version: 0.1.6
Check: for GNU extensions in Makefiles
Result: NOTE
GNU make is a SystemRequirements.
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-x86_64
Current CRAN status: NOTE: 3, OK: 10
Version: 0.3.5
Check: installed package size
Result: NOTE
installed size is 6.0Mb
sub-directories of 1Mb or more:
data 1.1Mb
libs 4.4Mb
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64
Version: 0.3.5
Check: for GNU extensions in Makefiles
Result: NOTE
GNU make is a SystemRequirements.
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-x86_64
Version: 0.3.5
Check: package dependencies
Result: NOTE
Package suggested but not available for checking: ‘pwalign’
Flavor: r-oldrel-macos-x86_64
Current CRAN status: NOTE: 8, OK: 5
Version: 0.17.0
Check: compiled code
Result: NOTE
File ‘stringfish/libs/stringfish.so’:
Found non-API call to R: ‘ATTRIB’
Compiled code should not call non-API entry points in R.
See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual,
and section ‘Moving into C API compliance’ for issues with the use of
non-API entry points.
Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc
Version: 0.17.0
Check: compiled code
Result: NOTE
File 'stringfish/libs/x64/stringfish.dll':
Found non-API call to R: 'ATTRIB'
Compiled code should not call non-API entry points in R.
See 'Writing portable packages' in the 'Writing R Extensions' manual,
and section 'Moving into C API compliance' for issues with the use of
non-API entry points.
Flavor: r-devel-windows-x86_64
Version: 0.17.0
Check: for GNU extensions in Makefiles
Result: NOTE
GNU make is a SystemRequirements.
Flavors: r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-x86_64
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.