TileDBArray 1.14.1
TileDB implements a framework for local and remote storage of dense and sparse arrays.
We can use this as a DelayedArray
backend to provide an array-level abstraction,
thus allowing the data to be used in many places where an ordinary array or matrix might be used.
The TileDBArray package implements the necessary wrappers around TileDB-R
to support read/write operations on TileDB arrays within the DelayedArray framework.
TileDBArray
Creating a TileDBArray
is as easy as:
X <- matrix(rnorm(1000), ncol=10)
library(TileDBArray)
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 1.4379257 0.5899878 -1.1246726 . -0.3183372 0.1677965
## [2,] 0.4428908 1.0175601 1.0051714 . 0.9909609 -0.1800655
## [3,] -0.3709840 1.9603032 1.2033325 . -0.8272382 -1.2589653
## [4,] -0.4764077 -0.4334604 -1.2056809 . 0.7747664 -1.1213591
## [5,] 0.5811226 -2.1323277 0.3906020 . 0.2103091 1.4890560
## ... . . . . . .
## [96,] -1.11981955 0.28900896 -1.24461851 . 0.002046795 -0.794698883
## [97,] 0.88109759 1.01576196 0.67382806 . 0.829595949 1.664077921
## [98,] 0.21591081 -1.04552756 0.73747737 . 1.124804113 -0.598457764
## [99,] -0.08006674 0.99394200 -0.35173969 . 0.486036394 1.554590995
## [100,] -1.63067099 -0.55495904 -0.51107924 . -1.530984518 -1.124967588
Alternatively, we can use coercion methods:
as(X, "TileDBArray")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] 1.4379257 0.5899878 -1.1246726 . -0.3183372 0.1677965
## [2,] 0.4428908 1.0175601 1.0051714 . 0.9909609 -0.1800655
## [3,] -0.3709840 1.9603032 1.2033325 . -0.8272382 -1.2589653
## [4,] -0.4764077 -0.4334604 -1.2056809 . 0.7747664 -1.1213591
## [5,] 0.5811226 -2.1323277 0.3906020 . 0.2103091 1.4890560
## ... . . . . . .
## [96,] -1.11981955 0.28900896 -1.24461851 . 0.002046795 -0.794698883
## [97,] 0.88109759 1.01576196 0.67382806 . 0.829595949 1.664077921
## [98,] 0.21591081 -1.04552756 0.73747737 . 1.124804113 -0.598457764
## [99,] -0.08006674 0.99394200 -0.35173969 . 0.486036394 1.554590995
## [100,] -1.63067099 -0.55495904 -0.51107924 . -1.530984518 -1.124967588
This process works also for sparse matrices:
Y <- Matrix::rsparsematrix(1000, 1000, density=0.01)
writeTileDBArray(Y)
## <1000 x 1000> sparse TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] 0 0 0 . 0 0
## [2,] 0 0 0 . 0 0
## [3,] 0 0 0 . 0 0
## [4,] 0 0 0 . 0 0
## [5,] 0 0 0 . 0 0
## ... . . . . . .
## [996,] 0 0 0 . 0 0
## [997,] 0 0 0 . 0 0
## [998,] 0 0 0 . 0 0
## [999,] 0 0 0 . 0 0
## [1000,] 0 0 0 . 0 0
Logical and integer matrices are supported:
writeTileDBArray(Y > 0)
## <1000 x 1000> sparse TileDBMatrix object of type "logical":
## [,1] [,2] [,3] ... [,999] [,1000]
## [1,] FALSE FALSE FALSE . FALSE FALSE
## [2,] FALSE FALSE FALSE . FALSE FALSE
## [3,] FALSE FALSE FALSE . FALSE FALSE
## [4,] FALSE FALSE FALSE . FALSE FALSE
## [5,] FALSE FALSE FALSE . FALSE FALSE
## ... . . . . . .
## [996,] FALSE FALSE FALSE . FALSE FALSE
## [997,] FALSE FALSE FALSE . FALSE FALSE
## [998,] FALSE FALSE FALSE . FALSE FALSE
## [999,] FALSE FALSE FALSE . FALSE FALSE
## [1000,] FALSE FALSE FALSE . FALSE FALSE
As are matrices with dimension names:
rownames(X) <- sprintf("GENE_%i", seq_len(nrow(X)))
colnames(X) <- sprintf("SAMP_%i", seq_len(ncol(X)))
writeTileDBArray(X)
## <100 x 10> TileDBMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 1.4379257 0.5899878 -1.1246726 . -0.3183372 0.1677965
## GENE_2 0.4428908 1.0175601 1.0051714 . 0.9909609 -0.1800655
## GENE_3 -0.3709840 1.9603032 1.2033325 . -0.8272382 -1.2589653
## GENE_4 -0.4764077 -0.4334604 -1.2056809 . 0.7747664 -1.1213591
## GENE_5 0.5811226 -2.1323277 0.3906020 . 0.2103091 1.4890560
## ... . . . . . .
## GENE_96 -1.11981955 0.28900896 -1.24461851 . 0.002046795 -0.794698883
## GENE_97 0.88109759 1.01576196 0.67382806 . 0.829595949 1.664077921
## GENE_98 0.21591081 -1.04552756 0.73747737 . 1.124804113 -0.598457764
## GENE_99 -0.08006674 0.99394200 -0.35173969 . 0.486036394 1.554590995
## GENE_100 -1.63067099 -0.55495904 -0.51107924 . -1.530984518 -1.124967588
TileDBArray
sTileDBArray
s are simply DelayedArray
objects and can be manipulated as such.
The usual conventions for extracting data from matrix-like objects work as expected:
out <- as(X, "TileDBArray")
dim(out)
## [1] 100 10
head(rownames(out))
## [1] "GENE_1" "GENE_2" "GENE_3" "GENE_4" "GENE_5" "GENE_6"
head(out[,1])
## GENE_1 GENE_2 GENE_3 GENE_4 GENE_5 GENE_6
## 1.4379257 0.4428908 -0.3709840 -0.4764077 0.5811226 -0.6816615
We can also perform manipulations like subsetting and arithmetic.
Note that these operations do not affect the data in the TileDB backend;
rather, they are delayed until the values are explicitly required,
hence the creation of the DelayedMatrix
object.
out[1:5,1:5]
## <5 x 5> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5
## GENE_1 1.4379257 0.5899878 -1.1246726 1.3441225 1.6367143
## GENE_2 0.4428908 1.0175601 1.0051714 0.3526786 -0.8096759
## GENE_3 -0.3709840 1.9603032 1.2033325 -0.8240308 -0.9466671
## GENE_4 -0.4764077 -0.4334604 -1.2056809 0.5477803 0.5723469
## GENE_5 0.5811226 -2.1323277 0.3906020 -1.5658427 -0.7405174
out * 2
## <100 x 10> DelayedMatrix object of type "double":
## SAMP_1 SAMP_2 SAMP_3 ... SAMP_9 SAMP_10
## GENE_1 2.8758514 1.1799756 -2.2493452 . -0.6366744 0.3355930
## GENE_2 0.8857817 2.0351203 2.0103429 . 1.9819218 -0.3601309
## GENE_3 -0.7419680 3.9206065 2.4066651 . -1.6544764 -2.5179305
## GENE_4 -0.9528153 -0.8669208 -2.4113619 . 1.5495327 -2.2427182
## GENE_5 1.1622453 -4.2646555 0.7812040 . 0.4206183 2.9781119
## ... . . . . . .
## GENE_96 -2.2396391 0.5780179 -2.4892370 . 0.004093591 -1.589397767
## GENE_97 1.7621952 2.0315239 1.3476561 . 1.659191899 3.328155843
## GENE_98 0.4318216 -2.0910551 1.4749547 . 2.249608226 -1.196915527
## GENE_99 -0.1601335 1.9878840 -0.7034794 . 0.972072788 3.109181989
## GENE_100 -3.2613420 -1.1099181 -1.0221585 . -3.061969037 -2.249935177
We can also do more complex matrix operations that are supported by DelayedArray:
colSums(out)
## SAMP_1 SAMP_2 SAMP_3 SAMP_4 SAMP_5 SAMP_6 SAMP_7
## -3.565063 4.085723 -10.509080 6.055996 17.734411 -3.128914 -5.227093
## SAMP_8 SAMP_9 SAMP_10
## 8.844767 10.563064 5.866686
out %*% runif(ncol(out))
## [,1]
## GENE_1 2.38060521
## GENE_2 0.67947376
## GENE_3 -2.15155462
## GENE_4 -0.83716425
## GENE_5 -2.59322806
## GENE_6 0.60285917
## GENE_7 -1.67171717
## GENE_8 -0.06157165
## GENE_9 1.35117385
## GENE_10 3.31046458
## GENE_11 1.35069127
## GENE_12 2.14687007
## GENE_13 1.29985533
## GENE_14 1.91540361
## GENE_15 -0.31365026
## GENE_16 1.45798754
## GENE_17 -0.27368306
## GENE_18 -1.96159259
## GENE_19 0.04190560
## GENE_20 1.71562776
## GENE_21 3.94221428
## GENE_22 -1.75225608
## GENE_23 -0.11691609
## GENE_24 3.26416461
## GENE_25 0.09647148
## GENE_26 0.58001289
## GENE_27 1.59481381
## GENE_28 0.78452856
## GENE_29 1.59289001
## GENE_30 -1.42371365
## GENE_31 -0.59794529
## GENE_32 1.09320130
## GENE_33 -1.46442699
## GENE_34 -0.73957261
## GENE_35 2.68114467
## GENE_36 1.63430069
## GENE_37 -0.50657510
## GENE_38 -0.67845662
## GENE_39 -0.90650592
## GENE_40 0.19754142
## GENE_41 1.48461033
## GENE_42 1.88360795
## GENE_43 5.22734718
## GENE_44 -2.32200684
## GENE_45 0.17976063
## GENE_46 0.50882319
## GENE_47 -4.62151675
## GENE_48 0.48668471
## GENE_49 -0.05541538
## GENE_50 -1.30013606
## GENE_51 -0.98996344
## GENE_52 -0.50917221
## GENE_53 2.04569732
## GENE_54 -3.20328773
## GENE_55 -0.54222950
## GENE_56 1.51299959
## GENE_57 3.20543429
## GENE_58 0.10364728
## GENE_59 0.38781584
## GENE_60 -2.00526152
## GENE_61 -0.45855129
## GENE_62 0.74654386
## GENE_63 -0.74842282
## GENE_64 1.66096395
## GENE_65 -0.60596019
## GENE_66 -1.54686623
## GENE_67 -2.26064161
## GENE_68 2.26811745
## GENE_69 -0.95233875
## GENE_70 0.64444763
## GENE_71 0.42706057
## GENE_72 -0.39414003
## GENE_73 -1.74431198
## GENE_74 -1.19556901
## GENE_75 0.23849010
## GENE_76 -2.55309337
## GENE_77 -0.46803353
## GENE_78 2.59724934
## GENE_79 1.15043273
## GENE_80 -0.85443158
## GENE_81 3.74296600
## GENE_82 3.34033139
## GENE_83 -0.51659823
## GENE_84 -1.93880197
## GENE_85 -1.60660341
## GENE_86 1.28238168
## GENE_87 -0.18345074
## GENE_88 2.98410571
## GENE_89 0.07650198
## GENE_90 -1.34366569
## GENE_91 -0.46437621
## GENE_92 0.21417123
## GENE_93 -1.26766498
## GENE_94 -3.59305554
## GENE_95 0.77793800
## GENE_96 -3.38132035
## GENE_97 3.69320508
## GENE_98 -0.49760769
## GENE_99 2.36831015
## GENE_100 -2.49628734
We can adjust some parameters for creating the backend with appropriate arguments to writeTileDBArray()
.
For example, the example below allows us to control the path to the backend
as well as the name of the attribute containing the data.
X <- matrix(rnorm(1000), ncol=10)
path <- tempfile()
writeTileDBArray(X, path=path, attr="WHEE")
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -0.16996644 -1.08481947 2.17687339 . 0.9351372 0.3307608
## [2,] -1.14607168 0.84550008 0.18012226 . -0.2444608 -0.3679324
## [3,] -0.67734245 1.97197026 1.58569338 . 0.9396334 1.2771135
## [4,] -1.25367492 0.53601263 1.80318371 . 2.0777811 0.1601868
## [5,] 0.36388907 0.06134160 -0.05072959 . -0.8491397 -0.2873912
## ... . . . . . .
## [96,] -1.0241235 0.2149028 0.9689573 . -0.57992836 -0.68240617
## [97,] 0.7927903 0.3005051 -0.2009687 . 1.62406314 -0.03901031
## [98,] 1.5848010 -0.5853967 0.6151573 . 1.42366839 -0.17475720
## [99,] 1.5122185 1.3849414 -1.4930721 . 1.18984623 2.71224430
## [100,] -0.6902239 0.3262110 -2.2721384 . 0.56696724 0.50118043
As these arguments cannot be passed during coercion, we instead provide global variables that can be set or unset to affect the outcome.
path2 <- tempfile()
setTileDBPath(path2)
as(X, "TileDBArray") # uses path2 to store the backend.
## <100 x 10> TileDBMatrix object of type "double":
## [,1] [,2] [,3] ... [,9] [,10]
## [1,] -0.16996644 -1.08481947 2.17687339 . 0.9351372 0.3307608
## [2,] -1.14607168 0.84550008 0.18012226 . -0.2444608 -0.3679324
## [3,] -0.67734245 1.97197026 1.58569338 . 0.9396334 1.2771135
## [4,] -1.25367492 0.53601263 1.80318371 . 2.0777811 0.1601868
## [5,] 0.36388907 0.06134160 -0.05072959 . -0.8491397 -0.2873912
## ... . . . . . .
## [96,] -1.0241235 0.2149028 0.9689573 . -0.57992836 -0.68240617
## [97,] 0.7927903 0.3005051 -0.2009687 . 1.62406314 -0.03901031
## [98,] 1.5848010 -0.5853967 0.6151573 . 1.42366839 -0.17475720
## [99,] 1.5122185 1.3849414 -1.4930721 . 1.18984623 2.71224430
## [100,] -0.6902239 0.3262110 -2.2721384 . 0.56696724 0.50118043
sessionInfo()
## R version 4.4.1 (2024-06-14)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 22.04.4 LTS
##
## Matrix products: default
## BLAS: /home/biocbuild/bbs-3.19-bioc/R/lib/libRblas.so
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.10.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_GB LC_COLLATE=C
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: America/New_York
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] RcppSpdlog_0.0.18 TileDBArray_1.14.1 DelayedArray_0.30.1
## [4] SparseArray_1.4.8 S4Arrays_1.4.1 abind_1.4-8
## [7] IRanges_2.38.1 S4Vectors_0.42.1 MatrixGenerics_1.16.0
## [10] matrixStats_1.4.1 BiocGenerics_0.50.0 Matrix_1.7-0
## [13] BiocStyle_2.32.1
##
## loaded via a namespace (and not attached):
## [1] bit_4.0.5 jsonlite_1.8.8 compiler_4.4.1
## [4] BiocManager_1.30.25 crayon_1.5.3 Rcpp_1.0.13
## [7] nanoarrow_0.5.0.1 jquerylib_0.1.4 yaml_2.3.10
## [10] fastmap_1.2.0 lattice_0.22-6 R6_2.5.1
## [13] RcppCCTZ_0.2.12 XVector_0.44.0 tiledb_0.30.0
## [16] knitr_1.48 bookdown_0.40 bslib_0.8.0
## [19] rlang_1.1.4 cachem_1.1.0 xfun_0.47
## [22] sass_0.4.9 bit64_4.0.5 cli_3.6.3
## [25] zlibbioc_1.50.0 spdl_0.0.5 digest_0.6.37
## [28] grid_4.4.1 lifecycle_1.0.4 data.table_1.16.0
## [31] evaluate_0.24.0 nanotime_0.3.9 zoo_1.8-12
## [34] rmarkdown_2.28 tools_4.4.1 htmltools_0.5.8.1