--- title: "High Performance Benchmarks" author: "Joseph Wood" date: "11/30/2023" output: rmarkdown::html_vignette: toc: true number_sections: false vignette: > %\VignetteIndexEntry{High Performance Benchmarks} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- This document serves as an overview for measuring the performance of `RcppAlgos` against other tools for generating combinations, permutations, and partitions. This stackoverflow post: [How to generate permutations or combinations of object in R?]() has some benchmarks. You will note that the examples in that post are relatively small. The benchmarks below will focus on larger examples where performance really matters and for this reason we only consider the packages [arrangements](), [partitions](), and [RcppAlgos](). ## Setup Information For the benchmarks below, we used a `2022 Macbook Air Apple M2 24 GB` machine. ``` r library(RcppAlgos) library(partitions) library(arrangements) #> #> Attaching package: 'arrangements' #> The following object is masked from 'package:partitions': #> #> compositions library(microbenchmark) options(digits = 4) options(width = 90) pertinent_output <- capture.output(sessionInfo()) cat(paste(pertinent_output[1:3], collapse = "\n")) #> R version 4.3.1 (2023-06-16) #> Platform: aarch64-apple-darwin20 (64-bit) #> Running under: macOS Ventura 13.4.1 pkgs <- c("RcppAlgos", "arrangements", "partitions", "microbenchmark") sapply(pkgs, packageVersion, simplify = FALSE) #> $RcppAlgos #> [1] '2.8.3' #> #> $arrangements #> [1] '1.1.9' #> #> $partitions #> [1] '1.10.7' #> #> $microbenchmark #> [1] '1.4.10' numThreads <- min(as.integer(RcppAlgos::stdThreadMax() / 2), 6) numThreads #> [1] 4 ``` ## Combinations ### Combinations - Distinct ``` r set.seed(13) v1 <- sort(sample(100, 30)) m <- 21 t1 <- comboGeneral(v1, m, Parallel = T) t2 <- combinations(v1, m) stopifnot(identical(t1, t2)) dim(t1) #> [1] 14307150 21 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v1, m), cbArrangements = combinations(v1, m), times = 15, unit = "relative") #> Warning in microbenchmark(cbRcppAlgosPar = comboGeneral(v1, m, nThreads = numThreads), : #> less accurate nanosecond times to avoid potential integer overflows #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a #> cbRcppAlgosSer 3.495 3.040 3.040 2.996 2.997 2.905 15 b #> cbArrangements 3.520 3.068 3.065 3.028 3.019 2.934 15 b ``` ### Combinations - Repetition ``` r v2 <- v1[1:10] m <- 20 t1 <- comboGeneral(v2, m, repetition = TRUE, nThreads = numThreads) t2 <- combinations(v2, m, replace = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 10015005 20 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v2, m, TRUE, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v2, m, TRUE), cbArrangements = combinations(v2, m, replace = TRUE), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a #> cbRcppAlgosSer 2.840 2.944 2.983 2.934 2.929 3.727 15 b #> cbArrangements 2.952 2.927 2.905 2.919 2.908 2.761 15 b ``` ### Combinations - Multisets ``` r myFreqs <- c(2, 4, 4, 5, 3, 2, 2, 2, 3, 4, 1, 4, 2, 5) v3 <- as.integer(c(1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610)) t1 <- comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads) t2 <- combinations(freq = myFreqs, k = 20, x = v3) stopifnot(identical(t1, t2)) dim(t1) #> [1] 14594082 20 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = comboGeneral(v3, 20, freqs = myFreqs, nThreads = numThreads), cbRcppAlgosSer = comboGeneral(v3, 20, freqs = myFreqs), cbArrangements = combinations(freq = myFreqs, k = 20, x = v3), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.086 3.078 3.036 3.051 2.999 2.958 10 b #> cbArrangements 5.661 5.782 5.683 5.735 5.618 5.522 10 c ``` ## Permutations ### Permutations - Distinct ``` r v4 <- as.integer(c(2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59)) t1 <- permuteGeneral(v4, 6, nThreads = numThreads) t2 <- permutations(v4, 6) stopifnot(identical(t1, t2)) dim(t1) #> [1] 8910720 6 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v4, 6, nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v4, 6), cbArrangements = permutations(v4, 6), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 15 a #> cbRcppAlgosSer 1.489 1.498 1.392 1.463 1.179 1.322 15 b #> cbArrangements 2.521 2.521 2.311 2.469 2.127 1.984 15 c ## Indexing permutation example with the partitions package t1 <- permuteGeneral(11, nThreads = 4) t2 <- permutations(11) t3 <- perms(11) dim(t1) #> [1] 39916800 11 stopifnot(identical(t1, t2), identical(t1, t(as.matrix(t3)))) rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(11, nThreads = 4), cbRcppAlgosSer = permuteGeneral(11), cbArrangements = permutations(11), cbPartitions = perms(11), times = 5, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 5 a #> cbRcppAlgosSer 2.456 2.466 2.175 2.421 1.793 1.975 5 b #> cbArrangements 4.003 4.008 3.651 4.088 3.169 3.316 5 c #> cbPartitions 7.857 7.936 7.062 8.019 5.915 6.338 5 d ``` ### Permutations - Repetition ``` r v5 <- v3[1:5] t1 <- permuteGeneral(v5, 10, repetition = TRUE, nThreads = numThreads) t2 <- permutations(v5, 10, replace = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 9765625 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v5, 10, TRUE, nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v5, 10, TRUE), cbArrangements = permutations(x = v5, k = 10, replace = TRUE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 2.958 2.842 2.946 2.825 2.781 3.835 10 b #> cbArrangements 3.591 3.478 3.590 3.447 3.391 4.590 10 c ``` ### Permutations - Multisets ``` r v6 <- sort(runif(12)) t1 <- permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads) t2 <- permutations(freq = rep(1:3, 4), k = 7, x = v6) stopifnot(identical(t1, t2)) dim(t1) #> [1] 19520760 7 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = permuteGeneral(v6, 7, freqs = rep(1:3, 4), nThreads = numThreads), cbRcppAlgosSer = permuteGeneral(v6, 7, freqs = rep(1:3, 4)), cbArrangements = permutations(freq = rep(1:3, 4), k = 7, x = v6), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.214 3.312 3.065 3.285 3.251 1.954 10 b #> cbArrangements 3.635 3.636 3.374 3.609 3.572 2.131 10 c ``` ## Partitions ### Partitions - Distinct #### All Distinct Partitions ``` r t1 <- comboGeneral(0:140, freqs=c(140, rep(1, 140)), constraintFun = "sum", comparisonFun = "==", limitConstraints = 140) t2 <- partitions(140, distinct = TRUE) t3 <- diffparts(140) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 140 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 140), all(rowSums(t2) == 140), all(colSums(t3) == 140)) dim(t1) #> [1] 9617150 16 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:140, freqs=c(140, rep(1, 140)), nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(0:140, freqs=c(140, rep(1, 140))), cbArrangements = partitions(140, distinct = TRUE), cbPartitions = diffparts(140), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.320 3.252 3.036 3.510 2.670 2.539 10 b #> cbArrangements 2.638 2.592 2.361 2.566 2.152 1.974 10 c #> cbPartitions 17.403 17.723 16.021 18.527 14.143 13.119 10 d ``` #### Restricted Distinct Partitions ``` r t1 <- comboGeneral(160, 10, constraintFun = "sum", comparisonFun = "==", limitConstraints = 160) t2 <- partitions(160, 10, distinct = TRUE) stopifnot(identical(t1, t2)) dim(t1) #> [1] 8942920 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(160, 10, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(160, 10), cbArrangements = partitions(160, 10, distinct = TRUE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 10 a #> cbRcppAlgosSer 3.439 3.433 3.007 3.332 2.235 2.564 10 b #> cbArrangements 4.509 4.519 3.834 4.379 2.919 2.928 10 c ``` ### Partitions - Repetition #### All Partitions ``` r t1 <- comboGeneral(0:65, repetition = TRUE, constraintFun = "sum", comparisonFun = "==", limitConstraints = 65) t2 <- partitions(65) t3 <- parts(65) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 65 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 65), all(rowSums(t2) == 65), all(colSums(t3) == 65)) dim(t1) #> [1] 2012558 65 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(0:65, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(0:65, repetition = TRUE), cbArrangements = partitions(65), cbPartitions = parts(65), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.0000 20 a #> cbRcppAlgosSer 2.902 2.644 2.376 2.624 2.571 0.8792 20 b #> cbArrangements 2.310 2.030 1.916 1.999 1.974 1.2587 20 b #> cbPartitions 8.951 8.916 9.342 11.188 10.938 3.7147 20 c ``` #### Restricted Partitions ``` r t1 <- comboGeneral(100, 15, TRUE, constraintFun = "sum", comparisonFun = "==", limitConstraints = 100) t2 <- partitions(100, 15) stopifnot(identical(t1, t2)) dim(t1) #> [1] 9921212 15 rm(t1, t2) # This takes a really long time... not because of restrictedparts, # but because apply is not that fast. This transformation is # needed for proper comparisons. As a result, we will compare # a smaller example # t3 <- t(apply(as.matrix(restrictedparts(100, 15, include.zero = F)), 2, sort)) t3 <- t(apply(as.matrix(restrictedparts(50, 15, include.zero = F)), 2, sort)) stopifnot(identical(partitions(50, 15), t3)) rm(t3) invisible(gc()) microbenchmark(cbRcppAlgosPar = partitionsGeneral(100, 15, TRUE, nThreads = numThreads), cbRcppAlgosSer = partitionsGeneral(100, 15, TRUE), cbArrangements = partitions(100, 15), cbPartitions = restrictedparts(100, 15, include.zero = FALSE), times = 10, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.00 1.000 1.000 10 a #> cbRcppAlgosSer 3.439 3.445 2.586 2.74 2.802 1.290 10 b #> cbArrangements 4.210 4.210 3.126 3.35 3.266 1.534 10 c #> cbPartitions 15.238 15.460 11.480 12.26 11.754 6.212 10 d ``` ### Partitions - Multisets Currenlty, `RcppAlgos` is the only package capable of efficiently generating partitions of multisets. Therefore, we will only time `RcppAlgos` and use this as a reference for future improvements. ``` r t1 <- comboGeneral(120, 10, freqs=rep(1:8, 15), constraintFun = "sum", comparisonFun = "==", limitConstraints = 120) dim(t1) #> [1] 7340225 10 stopifnot(all(rowSums(t1) == 120)) microbenchmark(cbRcppAlgos = partitionsGeneral(120, 10, freqs=rep(1:8, 15)), times = 10) #> Unit: milliseconds #> expr min lq mean median uq max neval #> cbRcppAlgos 273.9 275.5 279.1 276.4 283.6 286.8 10 ``` ## Compositions ### Compositions - Repetition #### All Compositions (Small case) ``` r t1 <- compositionsGeneral(0:15, repetition = TRUE) t2 <- arrangements::compositions(15) t3 <- partitions::compositions(15) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 15 stopifnot(identical(dim(t1), dim(t2)), identical(dim(t1), dim(t(t3))), all(rowSums(t1) == 15), all(rowSums(t2) == 15), all(colSums(t3) == 15)) dim(t1) #> [1] 16384 15 rm(t1, t2, t3) invisible(gc()) microbenchmark(cbRcppAlgosSer = compositionsGeneral(0:15, repetition = TRUE), cbArrangements = arrangements::compositions(15), cbPartitions = partitions::compositions(15), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosSer 1.000 1.000 1.000 1.000 1.00 1.000 20 a #> cbArrangements 1.201 1.178 1.189 1.175 1.19 1.195 20 a #> cbPartitions 129.521 148.587 169.709 173.088 190.08 207.932 20 b ``` For the next two examples, we will exclude the `partitions` package for efficiency reasons. #### All Compositions (Larger case) ``` r t1 <- compositionsGeneral(0:23, repetition = TRUE) t2 <- arrangements::compositions(23) # Each package has different output formats... we only examine dimensions # and that each result is a partition of 23 stopifnot(identical(dim(t1), dim(t2)), all(rowSums(t1) == 23), all(rowSums(t2) == 23)) dim(t1) #> [1] 4194304 23 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = compositionsGeneral(0:23, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = compositionsGeneral(0:23, repetition = TRUE), cbArrangements = arrangements::compositions(23), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 a #> cbRcppAlgosSer 3.388 3.397 3.419 3.445 3.404 3.439 20 b #> cbArrangements 3.741 3.786 3.822 3.835 3.829 3.869 20 c ``` #### Restricted Compositions ``` r t1 <- compositionsGeneral(30, 10, repetition = TRUE) t2 <- arrangements::compositions(30, 10) stopifnot(identical(t1, t2), all(rowSums(t1) == 30)) dim(t1) #> [1] 10015005 10 rm(t1, t2) invisible(gc()) microbenchmark(cbRcppAlgosPar = compositionsGeneral(30, 10, repetition = TRUE, nThreads = numThreads), cbRcppAlgosSer = compositionsGeneral(30, 10, repetition = TRUE), cbArrangements = arrangements::compositions(30, 10), times = 20, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgosPar 1.000 1.000 1.000 1.000 1.000 1.000 20 a #> cbRcppAlgosSer 3.057 3.088 3.167 3.083 3.032 4.699 20 b #> cbArrangements 3.165 3.131 3.154 3.143 3.196 3.085 20 b ``` ## Iterators We will show one example from each category to demonstrate the efficiency of the iterators in `RcppAlgos`. The results are similar for the rest of the cases not shown. ### Combinations ``` r pkg_arrangements <- function(n, total) { a <- icombinations(n, as.integer(n / 2)) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- comboIter(n, as.integer(n / 2)) for (i in 1:total) a@nextIter() } total <- comboCount(18, 9) total #> [1] 48620 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(18, total), cbArrangements = pkg_arrangements(18, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.0 1.00 15 a #> cbArrangements 19.29 19.19 18.54 19.08 18.8 14.04 15 b ``` ### Permutations ``` r pkg_arrangements <- function(n, total) { a <- ipermutations(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- permuteIter(n) for (i in 1:total) a@nextIter() } total <- permuteCount(8) total #> [1] 40320 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(8, total), cbArrangements = pkg_arrangements(8, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a #> cbArrangements 19.14 18.88 18.24 17.93 18.21 17.09 15 b ``` ### Partitions ``` r pkg_partitions <- function(n, total) { a <- firstpart(n) for (i in 1:(total - 1)) a <- nextpart(a) } pkg_arrangements <- function(n, total) { a <- ipartitions(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- partitionsIter(0:n, repetition = TRUE) for (i in 1:total) a@nextIter() } total <- partitionsCount(0:40, repetition = TRUE) total #> [1] 37338 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(40, total), cbArrangements = pkg_arrangements(40, total), cbPartitions = pkg_partitions(40, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a #> cbArrangements 15.50 15.53 15.28 15.53 15.68 13.35 15 b #> cbPartitions 25.28 25.30 25.00 25.50 24.79 23.28 15 c ``` ### Compositions ``` r pkg_partitions <- function(n, total) { a <- firstcomposition(n) for (i in 1:(total - 1)) a <- nextcomposition(a, FALSE) } pkg_arrangements <- function(n, total) { a <- icompositions(n) for (i in 1:total) a$getnext() } pkg_RcppAlgos <- function(n, total) { a <- compositionsIter(0:n, repetition = TRUE) for (i in 1:total) a@nextIter() } total <- compositionsCount(0:15, repetition = TRUE) total #> [1] 16384 microbenchmark(cbRcppAlgos = pkg_RcppAlgos(15, total), cbArrangements = pkg_arrangements(15, total), cbPartitions = pkg_partitions(15, total), times = 15, unit = "relative") #> Unit: relative #> expr min lq mean median uq max neval cld #> cbRcppAlgos 1.00 1.00 1.00 1.00 1.00 1.00 15 a #> cbArrangements 13.87 13.69 13.47 13.53 13.23 13.00 15 b #> cbPartitions 45.04 44.26 43.93 43.14 43.83 47.16 15 c ```