This vignette covers common zarr array operations: persistent storage, compression, resizing, filters, and advanced indexing.
Persistent arrays
Create an array on disk, close the session, and reopen it later.
path <- file.path(tempdir(), "example.zarr")
# Create a persistent array backed by a DirectoryStore
z <- zarr_open_array(
store = path, mode = "w",
shape = c(5, 10), chunks = c(5, 5), dtype = "<f4"
)
# Write data
z$set_item("...", array(1:50, dim = c(5, 10)))
#> NULL
z$get_shape()
#> [1] 5 10Reopen the same path in read mode:
z2 <- zarr_open_array(store = path, mode = "r")
z2$get_shape()
#> [1] 5 10
z2$get_item("...")$data
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 1 6 11 16 21 26 31 36 41 46
#> [2,] 2 7 12 17 22 27 32 37 42 47
#> [3,] 3 8 13 18 23 28 33 38 43 48
#> [4,] 4 9 14 19 24 29 34 39 44 49
#> [5,] 5 10 15 20 25 30 35 40 45 50For quick save/load of an existing array:
save_path <- file.path(tempdir(), "saved.zarr")
# Save an R array directly
zarr_save_array(save_path, zarr_create_array(
data = volcano, shape = dim(volcano), dtype = "<f8"
))
#> <ZarrArray> /
#> Shape : (87, 61)
#> Chunks : (87, 61)
#> Data type : <f8
#> Fill value : 0
#> Order : F
#> Read-only : FALSE
#> Compressor : ZstdCodec
#> Store type : DirectoryStore
#> Zarr format : 2
# Reopen
z3 <- zarr_open_array(save_path, mode = "r")
all.equal(z3$as.array(), volcano)
#> [1] TRUECompression
By default, pizzarr uses Zstandard compression. You can choose a different compressor when creating an array.
Zstandard (default)
z_zstd <- zarr_create(
shape = c(100, 100), dtype = "<f4",
compressor = ZstdCodec$new(level = 3)
)
z_zstd$get_compressor()$get_config()
#> $id
#> [x] "zstd"
#>
#> $level
#> [x] 3Gzip
Gzip compression is interoperable with zarr-python and other
implementations, but is slower than Zstandard because R lacks an
in-memory gzip API. For best write performance, prefer
ZstdCodec.
z_gzip <- zarr_create(
shape = c(100, 100), dtype = "<f4",
compressor = GzipCodec$new(level = 5)
)
z_gzip$get_compressor()$get_config()
#> $id
#> [x] "gzip"
#>
#> $level
#> [x] 5Blosc (with algorithm selection)
z_blosc <- zarr_create(
shape = c(100, 100), dtype = "<f4",
compressor = BloscCodec$new(cname = "lz4", clevel = 5, shuffle = TRUE)
)
z_blosc$get_compressor()$get_config()
#> $id
#> [x] "blosc"
#>
#> $cname
#> [x] "lz4"
#>
#> $clevel
#> [x] 5
#>
#> $shuffle
#> [x] 1
#>
#> $blocksize
#> [x] 0No compression
z_none <- zarr_create(
shape = c(100, 100), dtype = "<f4",
compressor = NA
)
is.na(z_none$get_compressor())
#> Warning in is.na(z_none$get_compressor()): is.na() applied to non-(list or
#> vector) of type 'environment'
#> [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSEResizing arrays
Arrays can be resized after creation. Data in the overlapping region is preserved; new regions are filled with the fill value.
z <- zarr_create(
shape = c(5, 10), chunks = c(5, 5),
dtype = "<i4", fill_value = 0L,
compressor = "default"
)
z$set_item("...", array(1:50, dim = c(5, 10)))
#> NULL
z$get_shape()
#> [1] 5 10
# Grow the array
z$resize(10, 20)
z$get_shape()
#> [1] 10 20
# Original data is preserved in the top-left corner
z[1:5, 1:10]$data
#> [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
#> [1,] 1 6 11 16 21 26 31 36 41 46
#> [2,] 2 7 12 17 22 27 32 37 42 47
#> [3,] 3 8 13 18 23 28 33 38 43 48
#> [4,] 4 9 14 19 24 29 34 39 44 49
#> [5,] 5 10 15 20 25 30 35 40 45 50
# New region is filled with fill_value
z[6:10, 1:5]$data
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0 0 0 0 0
#> [2,] 0 0 0 0 0
#> [3,] 0 0 0 0 0
#> [4,] 0 0 0 0 0
#> [5,] 0 0 0 0 0Shrinking removes chunks that fall outside the new shape:
z$resize(3, 4)
z$get_shape()
#> [1] 3 4
z$get_item("...")$data
#> [,1] [,2] [,3] [,4]
#> [1,] 1 6 11 16
#> [2,] 2 7 12 17
#> [3,] 3 8 13 18Appending data
Use append() to grow an array along an axis, adding new
data at the end. This is equivalent to zarr-python’s
z.append(data, axis=0), but uses R’s 1-based axis indexing
(axis 1 = first dimension).
z <- zarr_create(
shape = c(3, 4), chunks = c(3, 4),
dtype = "<i4", fill_value = 0L
)
z$set_item("...", array(1:12, dim = c(3, 4)))
#> NULL
z$as.array()
#> [,1] [,2] [,3] [,4]
#> [1,] 1 4 7 10
#> [2,] 2 5 8 11
#> [3,] 3 6 9 12Append new rows (axis 1, the default):
new_rows <- array(13:20, dim = c(2, 4))
z$append(new_rows)
#> NULL
z$get_shape()
#> [1] 5 4
z$as.array()
#> [,1] [,2] [,3] [,4]
#> [1,] 1 4 7 10
#> [2,] 2 5 8 11
#> [3,] 3 6 9 12
#> [4,] 13 15 17 19
#> [5,] 14 16 18 20Append new columns (axis 2):
Filters
Filters transform chunk data before compression. They are codec
instances passed as a list to the filters parameter. A
common use case is variable-length UTF-8 string arrays, which require
VLenUtf8Codec as a filter.
words <- c("alpha", "bravo", "charlie", "delta")
z_str <- zarr_create_array(
data = array(words, dim = length(words)),
shape = length(words), dtype = "|O",
object_codec = VLenUtf8Codec$new()
)
z_str$get_item("...")$data
#> [1] "alpha" "bravo" "charlie" "delta"
z_str$get_filters()
#> [[1]]
#> <VLenUtf8Codec>
#> Inherits from: <Codec>
#> Public:
#> clone: function (deep = FALSE)
#> decode: function (buf, zarr_arr)
#> encode: function (buf, zarr_arr)
#> get_config: function ()Advanced indexing
Beyond basic slicing with slice() or [,
pizzarr supports orthogonal indexing for independent selection along
each dimension.
Setup
z <- zarr_create_array(
data = matrix(1:30, nrow = 5, ncol = 6),
shape = c(5, 6), dtype = "<i4"
)
z$as.array()
#> [,1] [,2] [,3] [,4] [,5] [,6]
#> [1,] 1 6 11 16 21 26
#> [2,] 2 7 12 17 22 27
#> [3,] 3 8 13 18 23 28
#> [4,] 4 9 14 19 24 29
#> [5,] 5 10 15 20 25 30Basic slicing with [
The bracket operator uses orthogonal indexing internally:
# Select rows 1-3, columns 2-4
z[1:3, 2:4]$data
#> [,1] [,2] [,3]
#> [1,] 6 11 16
#> [2,] 7 12 17
#> [3,] 8 13 18Orthogonal selection with integer arrays
Select specific rows and columns independently. Note that
get_orthogonal_selection uses zero-based indices (like
zarr-python), while the [ operator uses R’s one-based
indexing:
Slicing with step
Select every other row, every third column using seq()
in bracket notation:
