byte_store() is a
key-to-bytes substrate. It deliberately stops below array semantics:
metadata parsing, chunk layout, compression, and type conversion live in
ordinary R code or in a package layered above Ropendal.
This vignette builds a tiny uncompressed, Zarr-like integer matrix reader to show where those boundaries sit.
library(Ropendal)
root <- tempfile("ropendal-zarr-")
dir.create(root)
fs <- opendal("fs", root = root)
store <- byte_store(fs, "array.zarr")The store only knows keys and bytes. The toy format below stores one
text metadata object and 2 x 2 little-endian integer chunks
under c/<row>/<col>.
encode_i32 <- function(x) writeBin(as.integer(x), raw(), size = 4L, endian = "little")
decode_i32 <- function(x, n) readBin(x, integer(), n = n, size = 4L, endian = "little")
chunk_key <- function(i, j) sprintf("c/%d/%d", i - 1L, j - 1L)
write_array <- function(store, x, chunk_dim = c(2L, 2L)) {
meta <- paste0(
"dim=", nrow(x), ",", ncol(x), "\n",
"chunk_dim=", chunk_dim[[1L]], ",", chunk_dim[[2L]], "\n",
"type=int32\n"
)
store_write(store, "zarr.json", charToRaw(meta))
for (i0 in seq(1L, nrow(x), by = chunk_dim[[1L]])) {
for (j0 in seq(1L, ncol(x), by = chunk_dim[[2L]])) {
rows <- i0:min(i0 + chunk_dim[[1L]] - 1L, nrow(x))
cols <- j0:min(j0 + chunk_dim[[2L]] - 1L, ncol(x))
key <- chunk_key((i0 - 1L) %/% chunk_dim[[1L]] + 1L, (j0 - 1L) %/% chunk_dim[[2L]] + 1L)
store_write(store, key, encode_i32(as.vector(x[rows, cols])))
}
}
invisible(store)
}Reading is symmetric: fetch bytes, explicitly materialize bytes where needed, and parse above the store layer.
parse_meta <- function(bytes) {
lines <- strsplit(rawToChar(as.raw(bytes)), "\n", fixed = TRUE)[[1L]]
pairs <- strsplit(lines[nzchar(lines)], "=", fixed = TRUE)
values <- setNames(lapply(pairs, `[[`, 2L), vapply(pairs, `[[`, character(1), 1L))
list(
dim = as.integer(strsplit(values$dim, ",", fixed = TRUE)[[1L]]),
chunk_dim = as.integer(strsplit(values$chunk_dim, ",", fixed = TRUE)[[1L]]),
type = values$type
)
}
read_array <- function(store) {
meta <- parse_meta(store_read(store, "zarr.json"))
out <- matrix(NA_integer_, nrow = meta$dim[[1L]], ncol = meta$dim[[2L]])
for (i0 in seq(1L, meta$dim[[1L]], by = meta$chunk_dim[[1L]])) {
for (j0 in seq(1L, meta$dim[[2L]], by = meta$chunk_dim[[2L]])) {
rows <- i0:min(i0 + meta$chunk_dim[[1L]] - 1L, meta$dim[[1L]])
cols <- j0:min(j0 + meta$chunk_dim[[2L]] - 1L, meta$dim[[2L]])
key <- chunk_key((i0 - 1L) %/% meta$chunk_dim[[1L]] + 1L, (j0 - 1L) %/% meta$chunk_dim[[2L]] + 1L)
raw <- store_read(store, key, mode = "raw")
out[rows, cols] <- matrix(decode_i32(raw, length(rows) * length(cols)), nrow = length(rows))
}
}
out
}x <- matrix(seq_len(16L), nrow = 4L)
write_array(store, x)
store_list(store, recursive = TRUE)
#> [[1]]
#> [[1]]$path
#> [1] "c/0/0"
#>
#> [[1]]$type
#> [1] "file"
#>
#> [[1]]$size
#> [1] 16
#>
#> [[1]]$etag
#> NULL
#>
#> [[1]]$last_modified
#> [1] "2026-06-12T11:05:13.056713112Z"
#>
#> [[1]]$version
#> NULL
#>
#> [[1]]$content_type
#> NULL
#>
#> [[1]]$content_encoding
#> NULL
#>
#>
#> [[2]]
#> [[2]]$path
#> [1] "c/0/1"
#>
#> [[2]]$type
#> [1] "file"
#>
#> [[2]]$size
#> [1] 16
#>
#> [[2]]$etag
#> NULL
#>
#> [[2]]$last_modified
#> [1] "2026-06-12T11:05:13.059713192Z"
#>
#> [[2]]$version
#> NULL
#>
#> [[2]]$content_type
#> NULL
#>
#> [[2]]$content_encoding
#> NULL
#>
#>
#> [[3]]
#> [[3]]$path
#> [1] "c/0/"
#>
#> [[3]]$type
#> [1] "dir"
#>
#> [[3]]$size
#> [1] 4096
#>
#> [[3]]$etag
#> NULL
#>
#> [[3]]$last_modified
#> [1] "2026-06-12T11:05:13.059713192Z"
#>
#> [[3]]$version
#> NULL
#>
#> [[3]]$content_type
#> NULL
#>
#> [[3]]$content_encoding
#> NULL
#>
#>
#> [[4]]
#> [[4]]$path
#> [1] "c/1/0"
#>
#> [[4]]$type
#> [1] "file"
#>
#> [[4]]$size
#> [1] 16
#>
#> [[4]]$etag
#> NULL
#>
#> [[4]]$last_modified
#> [1] "2026-06-12T11:05:13.060713219Z"
#>
#> [[4]]$version
#> NULL
#>
#> [[4]]$content_type
#> NULL
#>
#> [[4]]$content_encoding
#> NULL
#>
#>
#> [[5]]
#> [[5]]$path
#> [1] "c/1/1"
#>
#> [[5]]$type
#> [1] "file"
#>
#> [[5]]$size
#> [1] 16
#>
#> [[5]]$etag
#> NULL
#>
#> [[5]]$last_modified
#> [1] "2026-06-12T11:05:13.062343071Z"
#>
#> [[5]]$version
#> NULL
#>
#> [[5]]$content_type
#> NULL
#>
#> [[5]]$content_encoding
#> NULL
#>
#>
#> [[6]]
#> [[6]]$path
#> [1] "c/1/"
#>
#> [[6]]$type
#> [1] "dir"
#>
#> [[6]]$size
#> [1] 4096
#>
#> [[6]]$etag
#> NULL
#>
#> [[6]]$last_modified
#> [1] "2026-06-12T11:05:13.062343071Z"
#>
#> [[6]]$version
#> NULL
#>
#> [[6]]$content_type
#> NULL
#>
#> [[6]]$content_encoding
#> NULL
#>
#>
#> [[7]]
#> [[7]]$path
#> [1] "c/"
#>
#> [[7]]$type
#> [1] "dir"
#>
#> [[7]]$size
#> [1] 4096
#>
#> [[7]]$etag
#> NULL
#>
#> [[7]]$last_modified
#> [1] "2026-06-12T11:05:13.060713219Z"
#>
#> [[7]]$version
#> NULL
#>
#> [[7]]$content_type
#> NULL
#>
#> [[7]]$content_encoding
#> NULL
#>
#>
#> [[8]]
#> [[8]]$path
#> [1] "zarr.json"
#>
#> [[8]]$type
#> [1] "file"
#>
#> [[8]]$size
#> [1] 33
#>
#> [[8]]$etag
#> NULL
#>
#> [[8]]$last_modified
#> [1] "2026-06-12T11:05:13.055713086Z"
#>
#> [[8]]$version
#> NULL
#>
#> [[8]]$content_type
#> NULL
#>
#> [[8]]$content_encoding
#> NULL
read_array(store)
#> [,1] [,2] [,3] [,4]
#> [1,] 1 5 9 13
#> [2,] 2 6 10 14
#> [3,] 3 7 11 15
#> [4,] 4 8 12 16A cache is another store wrapper. Complete chunk-key reads can be cached; partial range reads still go to the parent store.
cached <- store_cache(store, tempfile("ropendal-zarr-cache-"), validate = "last_modified_size")
read_array(cached)
#> [,1] [,2] [,3] [,4]
#> [1,] 1 5 9 13
#> [2,] 2 6 10 14
#> [3,] 3 7 11 15
#> [4,] 4 8 12 16
# Mutating the parent changes validation metadata, so the cached read refreshes.
store_replace(store, "c/0/0", encode_i32(rep(99L, 4L)))
#> [1] TRUE
read_array(cached)
#> [,1] [,2] [,3] [,4]
#> [1,] 99 99 9 13
#> [2,] 99 99 10 14
#> [3,] 3 7 11 15
#> [4,] 4 8 12 16The example is intentionally small. Real Zarr support would add metadata schema handling, chunk codecs, fill values, dimension names, and array slicing above this byte-store layer rather than inside it.