NR_mutation_Analysis

setwd(R"(D:\OneDrive - St John's National Academy of Health Sciences\Shared Folder\Snijesh\data\cbioportal\brca)")
library(maftools)
library(dplyr)
nrs <- c("AR", "ESR1", "PGR", "NR3C1", "VDR")
# genes %>% dplyr::filter(Hugo_Symbol %in% rows_to_filter)

METABRIC

Samples: 2433

df = read.maf(maf = "brca_metabric_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 4212 
-Summarizing
--Possible FLAGS among top ten genes:
  MUC16
  AHNAK2
  SYNE1
  DNAH11
-Processing clinical data
--Missing clinical data
-Finished in 0.750s elapsed (0.500s cpu) 
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
MB-5275 0000661500081
MB-4791 000040 600046
MB-4667 001031 801041
MTS-T1284000030 401035
MTS-T0340110025 401032
MB-4938 110025 102030
MB-0897 000022 600028
MB-3525 000024 101026
MB-4079 201020 100024
MB-3363 300016 103023
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
PIK3CA 5 53421065 01 001112975975
TP53 12140297 5101260520 885861861
MUC16 6 4 82 465 140 00 499409409
AHNAK2 4 1 02 524 51 00 537395395
SYNE1 2 1 30 314 110 60 337293293
KMT2C 6719 40 148 670 90 314277277
GATA3 5080 51 53 70810 277267267
MAP3K110075142 78 510160 336236236
CDH1 7251 70 28 630181 240233233
DNAH11 1 2 20 224 120 00 241226226
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 1 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
NR3C10000120000121212
sets = c("MUC16", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

TCGA

Samples: 1066

df = read.maf(maf = "brca_tcga_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
--Removed 4243 duplicated variants
-Silent variants: 43355 
-Summarizing
--Possible FLAGS among top ten genes:
  TTN
  MUC16
  FLG
-Processing clinical data
--Missing clinical data
-Finished in 17.2s elapsed (13.2s cpu) 
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
TCGA-AN-A046-01 312 004472757690 05340
TCGA-AC-A23H-01 16 3 103622403773104135
TCGA-EW-A2FV-013825 0 00 19 0034 03878
TCGA-D8-A27V-012868 0 10 140 16044 13070
TCGA-BH-A18G-01 12940120 969 46133 21232
TCGA-AN-A0AK-01 13247332 808 47033 11103
TCGA-A8-A09Z-01 10945221 818 39334 21073
TCGA-D8-A1XK-01 6815 20 780 17261 0 945
TCGA-BH-A0HF-01 1 1 00 745 49027 0 823
TCGA-AO-A128-01 30 3 20 728 26223 1 815
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
PIK3CA 0 0151370 00 00386346346
TP53 4714 50216460240352346346
TTN 21 0 11259200 60308186186
GATA3 2465 10 13 40230130127127
CDH1 3432 40 15310130129127127
MUC16 15 1 00128 80 00152109109
KMT2C 2210 10 45340 30115 97 97
MAP3K13932 40 29220 40130 89 89
FLG 1 0 20 69 40 00 76 66 66
RYR2 2 0 00 74 30 40 83 65 65
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 5 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
ESR1 101061000999
AR 100051100888
PGR 010070000877
NR3C1000031000444
VDR 000020000222
sets = c("TTN", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

Alterations in PTEN and ESR1 promote clinical resistance to alpelisib plus aromatase inhibitors

Breast Cancer (MSK, Nature Cancer 2020)

REF: 32864625

Samples: 141

df = read.maf(maf = "breast_alpelisib_2020_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 55 
-Summarizing
--Mutiple centers found
NA;MSKCC-Processing clinical data
--Missing clinical data
-Finished in 0.140s elapsed (0.040s cpu) 
A data.table: 10 × 10
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><dbl>
P040-04-Post-cfDNA201054111069
P009-04-Post-cfDNA010052 80061
P040-02-Pre-cfDNA 200147 91060
P009-02-Pre-cfDNA 000027 30030
P002-02-Pre-cfDNA 220016 60026
P-0000247-T02-IM5 010017 20020
P046-04-Post-cfDNA000017 10018
P054-04-Post-cfDNA000016 00016
P-0000138-T02-IM3 000013 10014
P-0000216-T02-IM3 0000 8 100 9
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
PIK3CA0010139 000140110110
ESR1 0000 60 000 60 38 38
TP53 7110 451730 74 36 36
ARID1A2400 12 600 24 23 23
CDH1 4700 5 111 19 19 19
NF1 2000 15 920 28 17 17
APC 0000 22 200 24 12 12
BRCA2 0020 17 200 21 12 12
MTOR 0010 13 300 17 12 12
PTEN 1100 11 300 16 12 12
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 2 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
ESR1000060000603838
AR 00001200012 7 7
sets = c("ARID1A", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

Proteogenomic Landscape of Breast Cancer Tumorigenesis and Targeted Therapy

33212010 Proteogenomic landscape of breast cancer (CPTAC, Cell 2020)

Samples : 122

df = read.maf(maf = "brca_cptac_2020_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 9470 
-Summarizing
--Possible FLAGS among top ten genes:
  TTN
  MUC16
  HMCN1
  AHNAK
  OBSCN
  FLG
-Processing clinical data
--Missing clinical data
-Finished in 2.710s elapsed (1.720s cpu) 
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
X01BR043 8 1 2173283941108167859
X18BR003 2 0 01 891 843 15 1 997
X11BR00310018 50 623 271 23 0 797
X15BR003 3 1 00 579 584 12 1 658
X05BR038 0 0 10 332 420 6 1 382
X05BR029 4611 42 256 180 10 1 348
X11BR031 3 0 00 304 330 6 0 346
X01BR018 18 0123 256 101 8 0 308
X21BR001 5 0 10 229 180 9 1 263
X01BR027 4 0 00 149 120 6 1 172
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 80202714020535050
PIK3CA015039 0000454040
TTN 000086 3000893333
MUC16 200034 1000371515
HMCN1 000020 2020241313
AHNAK 000020 1000211313
OBSCN 000019 1000201313
ABCA13000015 4000191313
FLG 000024 0000241212
MAP3K14110 6 2020161111
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 4 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
AR 000030000333
PGR 000020000222
ESR1 000010000111
NR3C1000010000111
sets = c("TTN", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

Dynamics of genomic clones in breast cancer patient xenografts at single-cell resolution

REF : 25470049 Breast Cancer Xenografts (British Columbia, Nature 2015)

Samples: 117

df = read.maf(maf = "brca_bccrc_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 29 
-Summarizing
--Mutiple centers found
BC;--Possible FLAGS among top ten genes:
  USH2A
-Processing clinical data
--Missing clinical data
-Finished in 0.500s elapsed (0.260s cpu) 
A data.table: 10 × 10
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_Sitetotal
<fct><int><int><int><int><int><int><int><int><dbl>
SA214 00001741801193
SA106 4220116 305132
SA218 7110111 600126
SA06515060 85 901116
SA071 1000 86 104 92
SA054 2000 65 304 74
SA077 0010 64 500 70
SA031 1000 61 201 65
SA084 0100 521000 63
SA225 2020 52 700 63
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 100023803353535
PIK3CA0010 6000 7 7 7
USH2A 0000 7000 7 6 6
MYO3A 0000 5100 6 6 6
PTEN 2000 4000 6 5 5
ATR 1000 4000 5 4 4
COL6A30000 3100 4 4 4
GPR1120000 4000 4 4 4
LRP2 0000 4000 4 4 4
MDN1 0000 3100 4 4 4
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 1 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
NR3C110002000333
sets = c("USH2A", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

Sequence analysis of mutations and translocations across breast cancer subtypes

REF: 22722202 Breast Invasive Carcinoma (Broad, Nature 2012)

Samples: 103

df = read.maf(maf = "brca_broad_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 1282 
-Summarizing
--Possible FLAGS among top ten genes:
  TTN
  FLG
  MUC16
-Processing clinical data
--Missing clinical data
-Finished in 0.880s elapsed (0.560s cpu) 
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
BR-M-191111020321130231
BR-M-037100012620030150
BR-V-0433000113 5061128
BR-V-0270000102 8130114
BR-V-0672000 93 8020105
BR-M-0452000 89 5041101
BR-M-1165950 74 4031101
BR-V-0020000 64 9030 76
BR-V-0372000 66 2031 74
BR-M-0551100 59 4000 65
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 2000214040313030
PIK3CA0000300000302828
TTN 0000112000131212
KMT2C 1000 32010 7 7 7
AKT1 0000 60000 6 6 6
FLG 0000 60000 6 6 6
MUC16 0000 60000 6 6 6
MUC2 0010 50000 6 6 6
DMD 0300 20000 5 5 5
RYR3 0000 50000 5 4 4
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 3 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
ESR1000010000111
PGR 010000000111
VDR 000010000111
sets = c("TTN", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

The landscape of cancer genes and mutational processes in breast cancer

REF: 22722201 Breast Invasive Carcinoma (Sanger, Nature 2012) Samples: 100

df = read.maf(maf = "brca_sanger_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 1889 
-Summarizing
--Possible FLAGS among top ten genes:
  TTN
  MUC16
  SYNE1
-Processing clinical data
--Missing clinical data
-Finished in 0.690s elapsed (0.500s cpu) 
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
PD4203a 2100405380151462
PD4120a 1000413391 50459
PD4937a 0100319210 60347
PD4127a 3000200310 20236
PD4100a22230151 91 81197
PD4601a 1101133170 20155
PD4119a 0000133140 50152
PD4596a 1010111150 30131
PD4123a 2010100 60 40113
PD4844a 6030 95 70 20113
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 6 021206030383737
PIK3CA0 010330000343030
TTN 0 110272010322626
GATA3 11300 10010161515
MLL3 1 200 72010131111
MUC16 0 000 91000101010
CDH1 4 000 3300010 8 8
FSIP2 0 000 62000 8 7 7
SYNE1 0 000 71000 8 7 7
BIRC6 0 000 70000 7 7 7
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 2 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
AR 000010000111
PGR000010000111
sets = c("TTN", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

PIK3CA and MAP3K1 alterations imply luminal A status and are associated with clinical benefit from pan-PI3K inhibitor buparlisib and letrozole in ER+ metastatic breast cancer

REF: 31552290 Breast Cancer (MSK, NPJ Breast Cancer 2019)

Samples : 70

df = read.maf(maf = "brca_mskcc_2019_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
--Removed 1 duplicated variants
-Silent variants: 2 
-Summarizing
--Mutiple centers found
MSK-IMPACT341;MSK-IMPACT-Processing clinical data
--Missing clinical data
-Finished in 0.160s elapsed (0.070s cpu) 
A data.table: 10 × 10
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><dbl>
s_DS_bkm_057_T 01002120024
s_DS_bkm_081_T 00002310024
s_DS_bkm_067_T 02101620021
s_DS_bkm_078_T100011530019
s_DS_bkm_078_T200011530019
s_DS_bkm_065_T 21011300017
s_DS_bkm_020_T 10001140016
s_DS_bkm_061_T 00001401116
s_DS_bkm_073_T 20001400016
s_DS_bkm_076_T 00001500015
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
PIK3CA011040000423535
TP53 010020210242222
ATM 000018010191616
CDH1 4300 3510161515
MAP3K16100 8300181313
GATA3 3610 3000131212
KMT2C 4000 6300131212
NOTCH2600014000201010
ERBB2 000010000101010
MDC1 0010 7100 9 9 9
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 2 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
ESR100006000655
AR 00100000111
sets = c("ATM", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

The clonal and mutational evolution spectrum of primary triple-negative breast cancers

REF: 22495314 Breast Invasive Carcinoma (British Columbia, Nature 2012)

Samples : 65

df = read.maf(maf = "brca_bccrc_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 29 
-Summarizing
--Mutiple centers found
BC;--Possible FLAGS among top ten genes:
  USH2A
-Processing clinical data
--Missing clinical data
-Finished in 0.370s elapsed (0.260s cpu) 
A data.table: 10 × 10
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_Sitetotal
<fct><int><int><int><int><int><int><int><int><dbl>
SA214 00001741801193
SA106 4220116 305132
SA218 7110111 600126
SA06515060 85 901116
SA071 1000 86 104 92
SA054 2000 65 304 74
SA077 0010 64 500 70
SA031 1000 61 201 65
SA084 0100 521000 63
SA225 2020 52 700 63
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 100023803353535
PIK3CA0010 6000 7 7 7
USH2A 0000 7000 7 6 6
MYO3A 0000 5100 6 6 6
PTEN 2000 4000 6 5 5
ATR 1000 4000 5 4 4
COL6A30000 3100 4 4 4
GPR1120000 4000 4 4 4
LRP2 0000 4000 4 4 4
MDN1 0000 3100 4 4 4
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 1 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
NR3C110002000333
sets = c("USH2A", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

Whole-Exome Sequencing Analysis of the Progression from Non-Low-Grade Ductal Carcinoma In Situ to Invasive Ductal Carcinoma

REF: 32220886 Breast Cancer (MSK, Clinical Cancer Res 2020)

Samples: 60

df = read.maf(maf = "brca_pareja_msk_2020_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 45 
-Summarizing
--Possible FLAGS among top ten genes:
  MUC16
  TTN
-Processing clinical data
--Missing clinical data
-Finished in 0.400s elapsed (0.220s cpu) 
A data.table: 10 × 10
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_Sitetotal
<fct><int><int><int><int><int><int><int><int><dbl>
30DCIS 10002653423305
30IDC 20102142613247
25DCIS110301561612189
25IDC 110201501412180
19IDC 11040112 203132
21IDC 3010110 603123
23DCIS 5140100 804122
21DCIS 4010100 503113
2IDCA 6110 95 203108
23IDC 4120 89 704107
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 442113401292929
PIK3CA 020022000242222
GATA3 3800 1102151515
MUC16 0000 9000 9 9 9
AGGF1 0000 8000 8 8 8
TTN 0000 9000 9 7 7
CDC27 0000 6200 8 7 7
CCAR1 3000 0400 7 7 7
KIF5A 0000 7000 7 7 7
ARHGAP220020 2200 6 6 6
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 0 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
sets = c("GATA3", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

The Metastatic Breast Cancer Project (Provisional, December 2021)

Sample: 379

df = read.maf(maf = "brca_mbcproject_2022_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 24952 
-Summarizing
--Possible FLAGS among top ten genes:
  TTN
  MUC16
  HMCN1
-Processing clinical data
--Missing clinical data
-Finished in 3.280s elapsed (2.510s cpu) 
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
MBC-MBCProject_7wCjtKIW-Tumor-SM-GQCN4 8410107612301021224
MBC-MBCProject_57iLiJIl-Tumor-SM-CGLIV 2000 650 652 84 731
RP-1156_MBCProject_JXUNUQI8_BLOOD_P_v1_Exome1010 488 600150 565
RP-1156_MBCProject_rLt0uZhz_BLOOD_P_v1_Exome1110 330 312 50 371
RP-1156_MBCProject_bBIxhQUD_BLOOD_P_v2_Exome2001 314 350 51 358
MBC-MBCProject_K7f6fdUz-Tumor-SM-AZ5MA 1000 311 281 60 347
RP-1156_MBCProject_0jUXcgsJ_BLOOD_P_v2_Exome2110 288 261 50 324
RP-1156_MBCProject_W4FBsLSx_T3_v2_Exome 3510 269 280 30 309
MBC-MBCProject_99CdCOHm-Tumor-SM-CGLF4 1200 270 21 10 277
RP-1156_MBCProject_W4FBsLSx_T1_v2_Exome 2010 242 272 11 276
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 10 740 7811040114112112
TTN 1 200 90 9000102 81 81
PIK3CA 0 110 76 0000 78 76 76
CDH1 11 500 1019072 54 54 54
IGDCC4 0 000120 0000120 50 50
ESR1 0 000 49 1000 50 43 43
MUC16 0 211 39 2010 46 40 40
HMCN1 0 300 32 1030 39 38 38
ZKSCAN1 03400 2 2000 38 37 37
GIMAP6 0 000 41 3000 44 36 36
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 4 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
ESR1 0000491000504343
PGR 0000 40000 4 4 4
NR3C10000 10010 2 2 2
VDR 0000 01000 1 1 1
sets = c("TTN", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

Mutational Profile of Metastatic Breast Cancers: A Retrospective Analysis

REF: 28027327 Metastatic Breast Cancer (INSERM, PLoS Med 2016) Samples: 216

df = read.maf(maf = "brca_igr_2015_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 7046 
-Summarizing
--Possible FLAGS among top ten genes:
  TTN
  MUC16
-Processing clinical data
--Missing clinical data
-Finished in 1.670s elapsed (1.170s cpu) 
A data.table: 10 × 10
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><dbl>
MBC_1890 80769794100870
MBC_8 0 20628741 42711
MBC_45 0 50563582130641
MBC_71 0 81396481 60460
MBC_82 1 70379500 10438
MBC_2070 33295332 45345
MBC_92 0 31296293 80340
MBC_29 0 21272381 60320
MBC_31 0101234230 71276
MBC_1450 10210162 30232
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 11134914090878484
PIK3CA0 2467 1000746565
TTN 1 1244 7000554040
GATA3 0180 5 1000242222
ESR1 0 0221 0000232222
RYR2 0 0019 3010231919
MAP3K10150 3 6000241818
FSIP2 0 2114 0000171616
CDH1 0110 1 1020151515
MUC16 0 0015 3000181414
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)
Warning message in titv(maf = maf, useSyn = TRUE, plot = FALSE):
"Non standard Ti/Tv class: 4TRUE"

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 3 × 12
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><dbl><int><int>
ESR1002210000232222
AR 010 30000 4 4 4
VDR 000 10010 2 2 2
sets = c("TTN", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

Multi-omics profiling of younger Asian breast cancers reveals distinctive molecular signatures

REF: 29713003 Breast Cancer (SMC 2018) Samples : 186

df = read.maf(maf = "brca_smc_2018_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
-Silent variants: 22 
-Summarizing
--Possible FLAGS among top ten genes:
  TTN
  MUC16
  SYNE1
-Processing clinical data
--Missing clinical data
-Finished in 0.780s elapsed (0.540s cpu) 
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
brca_smc_2018_BB01_1111 01027333071316
brca_smc_2018_BR255 71169161 7040205
brca_smc_2018_BB01_1032 101152 9051171
brca_smc_2018_BB01_1122 111128 8110143
brca_smc_2018_BB01_0473 10012311021141
brca_smc_2018_BR097 6 102115 4130132
brca_smc_2018_BB01_0611 010117 9000128
brca_smc_2018_BB01_0881 200 9916100119
brca_smc_2018_BB01_0990 000108 7021118
brca_smc_2018_BR069 2 210 99 4040112
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
TP53 9 4205416040898989
PIK3CA0 20056 0000585353
TTN 1 00028 0000292323
GATA3 12000 1 0020242323
MUC16 0 00013 0000131212
CSMD3 0 000 9 001010 9 9
MAP3K12 210 5 000010 9 9
MAML3 0 008 0 0000 8 8 8
ARID1A2 300 1 2000 8 7 7
SYNE1 0 000 7 1000 8 7 7
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 1 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
AR000130000444
sets = c("TTN", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

INK4 Tumor Suppressor Proteins Mediate Resistance to CDK4/6 Kinase Inhibitors

Metastatic Breast Cancer (MSK, Cancer Discovery 2022) REF: 34544752

Samples: 1365

df = read.maf(maf = "breast_ink4_msk_2021_data_mutations.txt")
-Reading
-Validating
--Removed 24 duplicated variants
-Silent variants: 21 
-Summarizing
--Mutiple centers found
MSKCC;-Processing clinical data
--Missing clinical data
-Finished in 0.610s elapsed (0.400s cpu) 
samp = getSampleSummary(df)
head(samp, 10)
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
P-0039857-T01-IM6010067903181
P-0009602-T01-IM5100040500046
P-0011567-T02-IM6001034712045
P-0004987-T01-IM5110034404044
P-0002713-T01-IM3000037311042
P-0009364-T02-IM6120034301041
P-0027986-T01-IM6010031602040
P-0002124-T01-IM3000035400039
P-0030930-T01-IM6501028401039
P-0003233-T04-IM6000034310038
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
PIK3CA 2 0170614 00 00633533533
TP53 40 17 60240490350387373373
ESR1 1 1 60296 00 10305283283
CDH1 69 54 40 21760293256253253
GATA3 52142 21 35 60140252245245
KMT2C 37 7 20 68660 30183152152
MAP3K154 36 50 37250 50162123123
ARID1A32 19 00 20430 30117104104
AKT1 0 0 14100 10 00106104104
FOXA1 10 3192 77 10 00112103103
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 3 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
ESR111602960010305283283
AR 0020 143000 19 19 19
PGR 0000 70000 7 7 7
sets = c("CDH1", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

The Genomic Landscape of Endocrine-Resistant Advanced Breast Cancers

Breast Cancer (MSK, Cancer Cell 2018) REF: 30205045

Samples: 1918

df = read.maf("breast_msk_2018_data_mutations.txt")
samp = getSampleSummary(df)
head(samp, 10)
-Reading
-Validating
--Removed 50 duplicated variants
-Silent variants: 46 
-Summarizing
--Mutiple centers found
MSKCC;-Processing clinical data
--Missing clinical data
-Finished in 1.020s elapsed (0.660s cpu) 
A data.table: 10 × 11
Tumor_Sample_BarcodeFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_Sitetotal
<fct><int><int><int><int><int><int><int><int><int><dbl>
P-0016773-T01-IM60000389410140444
P-0009602-T01-IM51000 40 50 00 46
P-0004987-T01-IM51100 34 40 40 44
P-0002713-T01-IM30000 37 31 10 42
P-0002124-T01-IM30000 35 40 00 39
P-0002713-T02-IM60000 29 60 01 36
P-0000138-T01-IM31000 24 70 10 33
P-0002858-T01-IM31000 29 10 10 32
P-0004555-T01-IM51000 23 70 00 31
P-0014136-T01-IM50000 29 20 00 31
genes = getGeneSummary(df)
head(genes,10)
A data.table: 10 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
PIK3CA 3 1201801 00 00826725725
TP53 90 341923851060590695680680
CDH1 80 66 70 25 860384306303303
GATA3 51168 51 47 70190298288288
ESR1 0 1 40168 10 10175164164
MAP3K169 50 50 48 350130220155155
PTEN 34 22 70 51 270140155137137
MLL3 27 9 20 55 500 50148130130
AKT1 1 0 03104 10 00109107107
ARID1A27 20 11 24 360 40113106106
plotmafSummary(maf = df, rmOutlier = TRUE, addStat = 'median', dashboard = TRUE, titvRaw = FALSE)

png

genes %>% dplyr::filter(Hugo_Symbol %in% nrs)
A data.table: 3 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
ESR101401681010175164164
AR 0021 142000 19 19 19
PGR 0000 31010 5 4 4
sets = c("CDH1", "TP53", "PIK3CA")
sts <- c(sets, nrs)
oncoplot(maf=df, genes = sts)

png

WARNING !!!!!!!!!!!

The Analysis is Completed. Below are test data. Don't Consider

dplyr::filter(as.data.frame(genes),
              Hugo_Symbol == "ESR1" | Hugo_Symbol == "PGR" | Hugo_Symbol == "NR3C1" | Hugo_Symbol == "AR" )
A data.frame: 3 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
ESR101401681010175164164
AR 0021 142000 19 19 19
PGR 0000 31010 5 4 4
rows_to_filter <- c("AR", "ESR1", "PGR", "NR3C1", "VDR")
genes %>% dplyr::filter(Hugo_Symbol %in% rows_to_filter)
A data.table: 3 × 13
Hugo_SymbolFrame_Shift_DelFrame_Shift_InsIn_Frame_DelIn_Frame_InsMissense_MutationNonsense_MutationNonstop_MutationSplice_SiteTranslation_Start_SitetotalMutatedSamplesAlteredSamples
<chr><int><int><int><int><int><int><int><int><int><dbl><int><int>
ESR101401681010175164164
AR 0021 142000 19 19 19
PGR 0000 31010 5 4 4
genes = c("TP53", "PIK3CA", "GATA3", "NR3C1", "PGR", "AR", "ESR1")
oncoplot(maf=df, genes = genes)

png

# First, let's create your dataframe
data <- data.frame(
    ID = c("a1", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", "a10", "a11", "a12"),
    SA1 = c(9, 2, 10, 4, 0, 10, 1, 8, 5, 1, 1, 5),
    SA2 = c(10, 8, 1, 7, 0, 4, 0, 1, 9, 9, 2, 5),
    SA3 = c(9, 2, 10, 2, 3, 3, 5, 1, 5, 9, 5, 0),
    SA4 = c(5, 9, 0, 9, 2, 1, 9, 3, 9, 1, 1, 6),
    SA5 = c(8, 10, 9, 2, 8, 9, 8, 7, 10, 6, 10, 7),
    SA6 = c(0, 10, 2, 4, 0, 2, 7, 7, 4, 6, 4, 7),
    SA7 = c(2, 5, 2, 4, 1, 4, 6, 5, 7, 3, 6, 6))
data
# # Define the list of rows you want to filter
# rows_to_filter <- c("a2", "a5", "a6", "a9", "a11")

# # Filter the dataframe based on the specified rows
# filtered_data <- data %>% filter(ID %in% rows_to_filter)

# # Print the filtered dataframe
# filtered_data
A data.frame: 12 × 8
IDSA1SA2SA3SA4SA5SA6SA7
<chr><dbl><dbl><dbl><dbl><dbl><dbl><dbl>
a1 910 95 8 02
a2 2 8 2910105
a3 10 1100 9 22
a4 4 7 29 2 44
a5 0 0 32 8 01
a6 10 4 31 9 24
a7 1 0 59 8 76
a8 8 1 13 7 75
a9 5 9 5910 47
a10 1 9 91 6 63
a11 1 2 5110 46
a12 5 5 06 7 76
# Define the list of columns you want to filter
columns_to_filter <- c("SA1", "SA3", "SA4", "SA7", "AGFH")
# Find the intersection of specified columns and existing columns in the dataframe
valid_columns <- intersect(columns_to_filter, colnames(data))
valid_columns
<style> .list-inline {list-style: none; margin:0; padding: 0} .list-inline>li {display: inline-block} .list-inline>li:not(:last-child)::after {content: "\00b7"; padding: 0 .5ex} </style>
  1. 'SA1'
  2. 'SA3'
  3. 'SA4'
  4. 'SA7'
# Filter the dataframe based on the valid columns
filtered_data <- data[, c("ID", valid_columns)]
filtered_data
A data.frame: 12 × 5
IDSA1SA3SA4SA7
<chr><dbl><dbl><dbl><dbl>
a1 9 952
a2 2 295
a3 101002
a4 4 294
a5 0 321
a6 10 314
a7 1 596
a8 8 135
a9 5 597
a10 1 913
a11 1 516
a12 5 066
# Define the list of columns you want to filter
columns_to_filter <- c("SA1", "SA3", "SA4", "SA7", "AGFH")

# Find the intersection of specified columns and existing columns in the dataframe
valid_columns <- intersect(columns_to_filter, colnames(data))

# Filter the dataframe based on the valid columns
filtered_data <- data[, c("ID", valid_columns)]

# Print the filtered dataframe
print(filtered_data)