welch16
2019-09-10 fad2d50d4084cd610692123903bbe72cb6abc2fc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
 
library(magrittr)
library(tidyverse)
 
library(BiocParallel)
 
mp = MulticoreParam(workers = 8)
 
results_dr <- here::here("results/2019_08_15_Emca4_interactions/predictions")
 
all_files = results_dr %>% list.files(pattern = "rds", full.names = TRUE)
 
 
results <- tibble( file = all_files) %>% 
    mutate(
        aux = basename(all_files),
        type = case_when(
            str_detect(aux, "model_fit") ~ "model",
            str_detect(aux, "train_metrics") ~ "train_metrics",
            str_detect(aux, "test_metrics") ~ "test_metrics",
            str_detect(aux, "train_prob") ~ "train_prob",
            str_detect(aux, "test_prob") ~ "test_prob"
        )) 
 
results %<>% 
    mutate(
        aim = if_else(str_detect(aux,"aim1"),"aim1","aim2"),
        geno = if_else(str_detect(aux,"geno"),"genotype","risk_allele"),
        cov = str_split(aux,"_cvfold") %>% map_chr(1) %>% 
            str_split("\\_") %>% map_chr( ~ paste(.[-1],collapse = "_")),
        adaptive = if_else(str_detect(file,"adaptive"),"yes","no"),
        fit = case_when(
            str_detect(aux,"decision") ~ "decision_trees",
            str_detect(aux,"random") ~ "random_forests",
            str_detect(aux,"logis") ~ "logistic_regression",
            str_detect(aux,"elasti") ~ "elastic_nets",
            str_detect(aux,"lasso") ~ "lasso_regression",
            str_detect(aux,"ridge") ~ "ridge_regression"),
        aux = str_split(aux,"\\_"),
        time = map_chr(aux, ~ .[nchar(.) == 1]),
        cv_fold = map_chr(aux, ~ .[nchar(.) == 2]))
 
results  %<>% select(-aux)  %>% spread(type,file)
 
 
read_filelist <- function(files,mp)
{
    bplapply(files, readRDS, BPPARAM = mp)
}
 
results  %<>%   
    mutate_at(vars(contains("prob")), list( ~ read_filelist(.,mp))) %>% 
    mutate_at(vars(contains("metric")), list( ~ read_filelist(.,mp)))
 
 
auc <- function(prob_estimate,mp)
{
    estimate_df <- bplapply(prob_estimate, 
        function(x)yardstick::roc_auc(x,status,prob),BPPARAM = mp)
    map_dbl(estimate_df,".estimate")
}
 
aupr <- function(prob_estimate,mp)
{
    estimate_df <- bplapply(prob_estimate, 
        function(x)yardstick::pr_auc(x,status,prob),BPPARAM = mp)
    map_dbl(estimate_df,".estimate")
}
 
results %<>% 
    mutate_at(vars(contains("prob")), 
        list( ~ auc(.,mp),~ aupr(.,mp))) 
 
 
results  %>% saveRDS(file.path(dirname(results_dr),"2019_08_15_predictive_results.rds"))