Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

CimpleG, an R package to find (small) CpG signatures.

License

NotificationsYou must be signed in to change notification settings

CostaLab/CimpleG

Repository files navigation

Overview

CimpleG, an R package to find (small) CpG signatures.

R-CMD-check

Installation

# Install from CRAN:install.packages("CimpleG")# Install dev version from github:devtools::install_github("costalab/CimpleG")

Getting started

library("CimpleG")data(train_data)data(train_targets)data(test_data)data(test_targets)# check the train_targets table to see# what other columns can be used as targets# colnames(train_targets)# mini example with just 4 target signaturesset.seed(42)cimpleg_result<- CimpleG(train_data=train_data,train_targets=train_targets,test_data=test_data,test_targets=test_targets,method="CimpleG",has_annotation=TRUE,target_columns= c("neurons","glia","blood_cells","fibroblasts"  ))cimpleg_result$results
# check generated signaturescimpleg_result$signatures#>      neurons         glia  blood_cells  fibroblasts#> "cg24548498" "cg14501977" "cg04785083" "cg03369247"

Get signature annotation

# Get it directly from the results objectcimpleg_result$annotation#> # A tibble: 4 × 8#>   IlmnID     CHR_hg38 Start_hg38  End_hg38 UCSC_RefGene_Name  UCSC_RefGene_Group#>   <chr>      <chr>         <dbl>     <dbl> <chr>              <chr>#> 1 cg24548498 chr2      181684680 181684682 <NA>               <NA>#> 2 cg14501977 chr12     123948446 123948448 CCDC92             5'UTR#> 3 cg04785083 chr1        8971202   8971204 CA6                Body#> 4 cg03369247 chr8       20174518  20174520 SLC18A1;SLC18A1;S… Body;Body;Body;Bo…#> # ℹ 2 more variables: UCSC_CpG_Islands_Name <chr>,#> #   Relation_to_UCSC_CpG_Island <chr># or idependently through the "get_cpg_annotation" functionsignature_annotation<- get_cpg_annotation(cimpleg_result$signatures)# check signature annotationsignature_annotation#> # A tibble: 4 × 8#>   IlmnID     CHR_hg38 Start_hg38  End_hg38 UCSC_RefGene_Name  UCSC_RefGene_Group#>   <chr>      <chr>         <dbl>     <dbl> <chr>              <chr>#> 1 cg24548498 chr2      181684680 181684682 <NA>               <NA>#> 2 cg14501977 chr12     123948446 123948448 CCDC92             5'UTR#> 3 cg04785083 chr1        8971202   8971204 CA6                Body#> 4 cg03369247 chr8       20174518  20174520 SLC18A1;SLC18A1;S… Body;Body;Body;Bo…#> # ℹ 2 more variables: UCSC_CpG_Islands_Name <chr>,#> #   Relation_to_UCSC_CpG_Island <chr>

Plot generated signatures

# adjust target names to match signature names# check generated signaturesplt<- signature_plot(cimpleg_result,train_data,train_targets,sample_id_column="gsm",true_label_column="cell_type")print(plt$plot)

Difference of means vs Sum of variances (dmsv) plots

We have two different functions to produce these plots, one with asimpler interface (and arguably cleaner look) than the other. I mightunify these interfaces in the future.

basic plot

plt<- dmsv_plot(dat=train_data,target_vector=train_targets$neurons==1)print(plt)

plt<- diffmeans_sumvariance_plot(data=train_data,target_vector=train_targets$neurons==1)print(plt)

adding color, highlighting selected features

df_dmeansvar<- compute_diffmeans_sumvar(data=train_data,target_vector=train_targets$neurons==1)parab_param<-.7df_dmeansvar$is_selected<- select_features(x=df_dmeansvar$diff_means,y=df_dmeansvar$sum_variance,a=parab_param)

With the simpler interface

plt<- dmsv_plot(dat=df_dmeansvar,label_var1="Neurons",highlight_var="is_selected",display_var="is_selected",point_color="purple")print(plt)

With the more complex interface

plt<- diffmeans_sumvariance_plot(data=df_dmeansvar,label_var1="Neurons",color_all_points="purple",threshold_func=function(x,a) (a*x)^2,is_feature_selected_col="is_selected",func_factor=parab_param)print(plt)

labeling specific features

# labeling best signature found by CimpleGdf_dmeansvar$best_neuron_sig<- (df_dmeansvar$id%in%cimpleg_result$signatures["neurons"])plt<- dmsv_plot(dat=df_dmeansvar,label_var1="Neurons",highlight_var="is_selected",display_var="best_neuron_sig",point_color="red")print(plt)

Deconvolution plots

mini example with just 4 signatures

deconv_result<- run_deconvolution(cpg_obj=cimpleg_result,new_data=test_data)plt<- deconvolution_barplot(deconvoluted_data=deconv_result,meta_data=test_targets,sample_id="gsm",true_label="cell_type")print(plt$plot)

Benchmarking example

This example is a little more advanced

first lets create additional deconvolution results so that we can compare them

In this example, we’ll create two additional models made with CimpleG.One using only hypermethylated signatures, and the other using 3 CpGsper signature instead of just one. Then we will benchmark them againsteachother. This is similar to the approach that we use in the paperexcept there we use real data.

set.seed(42)cimpleg_hyper<- CimpleG(train_data=train_data,train_targets=train_targets,test_data=test_data,test_targets=test_targets,method="CimpleG",pred_type="hyper",target_columns= c("neurons","glia","blood_cells","fibroblasts"  ))#> Training for target 'neurons' with 'CimpleG' has finished.: 0.251 sec elapsed#> Training for target 'glia' with 'CimpleG' has finished.: 0.253 sec elapsed#> Training for target 'blood_cells' with 'CimpleG' has finished.: 0.29 sec elapsed#> Training for target 'fibroblasts' with 'CimpleG' has finished.: 0.268 sec elapseddeconv_hyper<- run_deconvolution(cpg_obj=cimpleg_hyper,new_data=test_data)set.seed(42)cimpleg_3sigs<- CimpleG(train_data=train_data,train_targets=train_targets,test_data=test_data,test_targets=test_targets,method="CimpleG",n_sigs=3,target_columns= c("neurons","glia","blood_cells","fibroblasts"  ))#> Training for target 'neurons' with 'CimpleG' has finished.: 0.315 sec elapsed#> Training for target 'glia' with 'CimpleG' has finished.: 0.296 sec elapsed#> Training for target 'blood_cells' with 'CimpleG' has finished.: 0.349 sec elapsed#> Training for target 'fibroblasts' with 'CimpleG' has finished.: 0.307 sec elapseddeconv_3sigs<- run_deconvolution(cpg_obj=cimpleg_3sigs,new_data=test_data)

let’s also create some simulated true values just so that we can compare all the results

remember this is just an example, the results themselves are meaningless!

deconv_3sigs$prop_3sigs<-deconv_3sigs$proportiondeconv_hyper$prop_hyper<-deconv_hyper$proportiondeconv_result$prop_cimpleg<-deconv_result$proportiondummy_deconvolution_data<-deconv_result|>dplyr::mutate(true_vals=proportion+ runif(nrow(deconv_result),min=-0.1,max=0.1))|>dplyr::select(cell_type,sample_id,prop_cimpleg,true_vals)|>dplyr::left_join(deconv_hyper|>dplyr::select(-proportion),by= c("sample_id","cell_type"))|>dplyr::left_join(deconv_3sigs|>dplyr::select(-proportion),by= c("sample_id","cell_type"))|>dplyr::mutate_if(is.numeric,function(x) {    ifelse(x<0,0,x)  })|>dplyr::mutate_if(is.numeric,function(x) {    ifelse(x>1,1,x)  })|>tibble::as_tibble()

let’s now make use of some plotting functions designed to compare deconvolution results

first we can check how the true values compare against the predicted values

scatter_plts<-CimpleG:::deconv_pred_obs_plot(deconv_df=dummy_deconvolution_data,true_values_col="true_vals",predicted_cols= c("prop_cimpleg","prop_hyper","prop_3sigs"),sample_id_col="sample_id",group_col="cell_type")scatter_panel<-scatter_plts|>patchwork::wrap_plots(ncol=1)print(scatter_panel)

now, more interestingly, we can see in detail and rank one of the measures used to evaluate the deconvolution results

rank_plts<-CimpleG:::deconv_ranking_plot(deconv_df=dummy_deconvolution_data,true_values_col="true_vals",predicted_cols= c("prop_cimpleg","prop_hyper","prop_3sigs"),sample_id_col="sample_id",group_col="cell_type",metrics="rmse")rank_panel<-list(rank_plts$perf_boxplt[[1]],rank_plts$nemenyi_plt[[1]])|>patchwork::wrap_plots()print(rank_panel)


[8]ページ先頭

©2009-2025 Movatter.jp