Reducing paper results by joining keywords

Alfonso R. Reyes

2019-01-10

This is a demonstration on how the number of papers can be reduced using additional keywords to control the number of results returned.

library(petro.One)

test #1

We start with these keywords: water injection water flooding machine-learning artificial intelligence neural networks

# provide two different set of keywords to combine as vectors
major  <- c("water injection", "water flooding")
minor  <- c("machine-learning", "artificial intelligence")
lesser <- c("neural networks")

result_object <- join_keywords(major, minor, lesser, get_papers = TRUE)
result_object
#> $keywords
#> # A tibble: 4 x 6
#>   Var1    Var2      Var3    paper_count sf               url               
#>   <chr>   <chr>     <chr>         <dbl> <chr>            <chr>             
#> 1 water ~ machine-~ neural~          62 'water+injectio~ "https://www.onep~
#> 2 water ~ machine-~ neural~          34 'water+flooding~ "https://www.onep~
#> 3 water ~ artifici~ neural~         116 'water+injectio~ "https://www.onep~
#> 4 water ~ artifici~ neural~          53 'water+flooding~ "https://www.onep~
#> 
#> $papers
#> # A tibble: 265 x 7
#>    book_title      paper_id  dc_type  authors       year source keyword    
#>    <fct>           <fct>     <fct>    <chr>        <int> <fct>  <chr>      
#>  1 Selection and ~ SPE-7916~ confere~ Shokir, E.M~  2002 SPE    'water+flo~
#>  2 Dynamic Layere~ SPE-1900~ confere~ Li, Yuanjun~  2018 SPE    'water+flo~
#>  3 IOR Evaluation~ SPE-5930~ confere~ Surguchev, ~  2000 SPE    'water+flo~
#>  4 Artificial Int~ SPE-8945~ journal~ Weiss, Will~  2006 SPE    'water+flo~
#>  5 Artificial Int~ SPE-8945~ confere~ Weiss, Will~  2004 SPE    'water+flo~
#>  6 Application of~ SPWLA-20~ confere~ Alakeely, A~  2014 SPWLA  'water+flo~
#>  7 Application of~ SPE-1914~ confere~ Dang, Cuong~  2018 SPE    'water+flo~
#>  8 Video:        ~ SPE-1914~ present~ Dang, Cuong~  2018 SPE    'water+flo~
#>  9 A Methodologic~ SPE-2839~ confere~ Mohaghegh, ~  1994 SPE    'water+flo~
#> 10 A Neural Netwo~ SPE-1651~ confere~ Foroutan, S~  2013 SPE    'water+flo~
#> # ... with 255 more rows
# save findings
# save the three objects as one
papers <- result_object
wat_inj_ml_1 <- petro.One:::as_named_list(major, minor, lesser, papers)
save(wat_inj_ml_1, file = paste0("wat_inj_ml_1", ".rda"))

# load previous save
load(file = paste0("wat_inj_ml_1", ".rda"))
papers <- wat_inj_ml_1$papers
papers
#> $keywords
#> # A tibble: 4 x 6
#>   Var1    Var2      Var3    paper_count sf               url               
#>   <chr>   <chr>     <chr>         <dbl> <chr>            <chr>             
#> 1 water ~ machine-~ neural~          62 'water+injectio~ "https://www.onep~
#> 2 water ~ machine-~ neural~          34 'water+flooding~ "https://www.onep~
#> 3 water ~ artifici~ neural~         116 'water+injectio~ "https://www.onep~
#> 4 water ~ artifici~ neural~          53 'water+flooding~ "https://www.onep~
#> 
#> $papers
#> # A tibble: 265 x 7
#>    book_title      paper_id  dc_type  authors       year source keyword    
#>    <fct>           <fct>     <fct>    <chr>        <int> <fct>  <chr>      
#>  1 Selection and ~ SPE-7916~ confere~ Shokir, E.M~  2002 SPE    'water+flo~
#>  2 Dynamic Layere~ SPE-1900~ confere~ Li, Yuanjun~  2018 SPE    'water+flo~
#>  3 IOR Evaluation~ SPE-5930~ confere~ Surguchev, ~  2000 SPE    'water+flo~
#>  4 Artificial Int~ SPE-8945~ journal~ Weiss, Will~  2006 SPE    'water+flo~
#>  5 Artificial Int~ SPE-8945~ confere~ Weiss, Will~  2004 SPE    'water+flo~
#>  6 Application of~ SPWLA-20~ confere~ Alakeely, A~  2014 SPWLA  'water+flo~
#>  7 Application of~ SPE-1914~ confere~ Dang, Cuong~  2018 SPE    'water+flo~
#>  8 Video:        ~ SPE-1914~ present~ Dang, Cuong~  2018 SPE    'water+flo~
#>  9 A Methodologic~ SPE-2839~ confere~ Mohaghegh, ~  1994 SPE    'water+flo~
#> 10 A Neural Netwo~ SPE-1651~ confere~ Foroutan, S~  2013 SPE    'water+flo~
#> # ... with 255 more rows

Using a new function to replace multiple commands

paper_results <- run_papers_search(major, minor, lesser,
                                   get_papers = TRUE,       # return with papers
                                   verbose = FALSE,         # show progress
                                   len_keywords = 4,        # naming the data file
                                   allow_duplicates = FALSE) # by paper title and id
#> 
#> NULL
names(paper_results)
#> [1] "papers"          "keywords"        "search_keywords" "rda_filename"

test #2

Then, we increase the number of keywords:

water injection     water flooding
machine-learning    machine learning    intelligent
neural network      SVM                 genetic
algorithm

Reducing test 5.2

  waterflooding
  machine-learning    artificial intelligence
  algorithm
  data-mining
  data-driven
# provide two different set of keywords to combine as vectors
maj <- c("waterflooding")
min <- c("machine-learning", "artificial intelligence")
les <- c("algorithm")
anr <- c("data-mining", "data-driven")

paper_results_5 <- run_papers_search(maj, min, les, anr, 
                                   get_papers = TRUE,       # return with papers
                                   verbose = FALSE,         # show progress
                                   len_keywords = 4,        # naming the data file
                                   allow_duplicates = FALSE) # by paper title and id
#> 
#> NULL

paper_results_5$keywords
#> # A tibble: 4 x 7
#>   Var1    Var2     Var3   Var4  paper_count sf              url            
#>   <chr>   <chr>    <chr>  <chr>       <dbl> <chr>           <chr>          
#> 1 waterf~ machine~ algor~ data~          32 'waterflooding~ "https://www.o~
#> 2 waterf~ artific~ algor~ data~          30 'waterflooding~ "https://www.o~
#> 3 waterf~ machine~ algor~ data~          34 'waterflooding~ "https://www.o~
#> 4 waterf~ artific~ algor~ data~          23 'waterflooding~ "https://www.o~
paper_results_5$search_keywords
#> [[1]]
#> [1] "waterflooding"
#> 
#> [[2]]
#> [1] "machine-learning"        "artificial intelligence"
#> 
#> [[3]]
#> [1] "algorithm"
#> 
#> [[4]]
#> [1] "data-mining" "data-driven"