`tidySCE` Speedup aggregate_cells
Closed this issue · 2 comments
stemangiola commented
This has taken inspiration and motivation from
stemangiola commented
stemangiola commented
Data preparation
data(pbmc_small)
df <- pbmc_small
ids = df |> unite("id", factor, string) |> pull(id)
scuttle
microbenchmark(aggregateAcrossCells(df, ids), times = 10L, unit = "seconds")
Unit: seconds
expr min lq mean median uq max neval
aggregateAcrossCells(df, ids) 0.1972021 0.2095963 0.214034 0.2129438 0.2222606 0.2374125 10
OLD tidySingleCellExperiment
microbenchmark(aggregate_cells(df, c(factor, string)), times = 10L)
Unit: seconds
expr min lq mean median uq max neval
aggregate_cells(df, c(factor, string)) 2.046285 2.106658 2.25975 2.129606 2.296099 2.88916 10
FIRST iteration 2x improvement b60f538
microbenchmark(aggregate_cells(df, c(factor, string)), times = 10L, unit = "second")
Unit: seconds
expr min lq mean median uq max neval
aggregate_cells(df, c(factor, string)) 0.9484644 0.9818873 0.9893115 0.9847042 0.9996955 1.063225 10
SECOND iteration 1/3x further improvement d302642
microbenchmark(aggregate_cells(df, c(factor, string)), times = 10L, unit = "second")
Unit: seconds
expr min lq mean median uq max neval
aggregate_cells(df, c(factor, string)) 0.6526753 0.6631348 0.7401931 0.6699366 0.6837512 1.064324 10
THIRD iteration 2x further improvement 4195aa8
microbenchmark(aggregate_cells(df, c(factor, string)), times = 10L, unit = "second")
Unit: seconds
expr min lq mean median uq max neval
aggregate_cells(df, c(factor, string)) 0.3838859 0.3932368 0.4351129 0.4167958 0.4408787 0.633195 10
FOURTH iteration 1.5x further improvement 5fb88fa
microbenchmark(aggregate_cells(df, c(factor, string)), times = 10L, unit = "second")
Unit: seconds
expr min lq mean median uq max neval
aggregate_cells(df, c(factor, string)) 0.2763835 0.2814492 0.2886746 0.2838345 0.2929347 0.3170838 10