summary

John Mount

2017-05-13

replyr_summary example.

replyr_summary works on various data sources, counts NA, and returns a data.frame (instead of text).


d <- data.frame(x=c(NA,'b'), y=c(1,NA), stringsAsFactors= FALSE)

summary(d)
 #        x                   y    
 #   Length:2           Min.   :1  
 #   Class :character   1st Qu.:1  
 #   Mode  :character   Median :1  
 #                      Mean   :1  
 #                      3rd Qu.:1  
 #                      Max.   :1  
 #                      NA's   :1

replyr::replyr_summary(d)
 #    column index     class nrows nna nunique min max mean sd lexmin lexmax
 #  1      x     1 character     2   1      NA  NA  NA   NA NA      b      b
 #  2      y     2   numeric     2   1      NA   1   1    1 NA   <NA>   <NA>

my_db <- dplyr::src_sqlite(":memory:", create = TRUE)
dbData <- dplyr::copy_to(my_db, d)

summary(dbData)
 #      Length Class          Mode
 #  src 2      src_sqlite     list
 #  ops 3      op_base_remote list

replyr::replyr_summary(dbData)
 #    column index     class nrows nna nunique min max mean sd lexmin lexmax
 #  1      x     1 character     2   1      NA  NA  NA   NA NA      b      b
 #  2      y     2   numeric     2   1      NA   1   1    1 NA   <NA>   <NA>

# glimpse works more like str or head
dplyr::glimpse(dbData)
 #  Observations: NA
 #  Variables: 2
 #  $ src <S3: src_sqlite> NA, "b"
 #  $ ops <S3: op_base_remote> 1, NA
rm(list=ls())
gc()
 #            used (Mb) gc trigger  (Mb) max used  (Mb)
 #  Ncells  916280 49.0    1442291  77.1  1442291  77.1
 #  Vcells 9896221 75.6   26979071 205.9 25976122 198.2