Getting started with tccquickr

tccquickr compiles a small, declared subset of R to C. You annotate a function’s argument types with declare(type(...)), and tccq_compile() returns a compiled closure you call like any R function. This vignette is a tour; for the exact accepted subset and the boundary/optimization model see vignette("the-r-subset").

A reduction kernel

A kernel that reduces a vector expression to one scalar:

sum_kernel <- function(x, y) {
  declare(type(x = double(NA), y = double(NA)))
  sum((sin(x) + y) * y)
}

compiled_sum <- tccq_compile(sum_kernel)
x <- as.double(seq(-2, 2, length.out = 10))
y <- as.double(seq(1, 3, length.out = 10))
compiled_sum(x, y)
#> [1] 48.90504

You can stop before the backend and inspect the intermediate representation or the emitted C:

tccq_compile(sum_kernel, mode = "ir")
#> <tccq_module>
#>   entry: tccq_entry 
#>   formals:
#>    - x : double[NA] 
#>    - y : double[NA] 
#>   ir: program 
#>   kernel: fold 
#>   result type: double
cat(tccq_compile(sum_kernel, mode = "code"))
#> #include <R.h>
#> #include <Rinternals.h>
#> #include <Rmath.h>
#> #include <math.h>
#> #include <limits.h>
#> 
#> #ifndef REAL_RO
#> #define REAL_RO(x) REAL(x)
#> #endif
#> #ifndef INTEGER_RO
#> #define INTEGER_RO(x) INTEGER(x)
#> #endif
#> #ifndef LOGICAL_RO
#> #define LOGICAL_RO(x) LOGICAL(x)
#> #endif
#> #ifndef TCCQ_UNUSED
#> # if defined(__GNUC__)
#> #  define TCCQ_UNUSED __attribute__((unused))
#> # else
#> #  define TCCQ_UNUSED
#> # endif
#> #endif
#> 
#> static TCCQ_UNUSED R_xlen_t tccq_checked_index1(R_xlen_t idx, R_xlen_t len, const char *name) {
#>   if (idx < 1 || idx > len) {
#>     Rf_error("index out of bounds for %s", name);
#>   }
#>   return idx - 1;
#> }
#> 
#> static TCCQ_UNUSED int tccq_lgl_not(int x) {
#>   return x == NA_LOGICAL ? NA_LOGICAL : (!x);
#> }
#> 
#> static TCCQ_UNUSED int tccq_int_idiv(int a, int b) {
#>   if (a == NA_INTEGER || b == NA_INTEGER || b == 0) return NA_INTEGER;
#>   int q = a / b;
#>   int r = a % b;
#>   if (r != 0 && ((a < 0) != (b < 0))) --q;
#>   return q;
#> }
#> 
#> static TCCQ_UNUSED int tccq_int_checked(long long x) {
#>   if (x > INT_MAX || x <= INT_MIN) return NA_INTEGER;
#>   return (int)x;
#> }
#> 
#> static TCCQ_UNUSED int tccq_int_add(int a, int b) {
#>   if (a == NA_INTEGER || b == NA_INTEGER) return NA_INTEGER;
#>   return tccq_int_checked((long long)a + (long long)b);
#> }
#> 
#> static TCCQ_UNUSED int tccq_int_sub(int a, int b) {
#>   if (a == NA_INTEGER || b == NA_INTEGER) return NA_INTEGER;
#>   return tccq_int_checked((long long)a - (long long)b);
#> }
#> 
#> static TCCQ_UNUSED int tccq_int_mul(int a, int b) {
#>   if (a == NA_INTEGER || b == NA_INTEGER) return NA_INTEGER;
#>   return tccq_int_checked((long long)a * (long long)b);
#> }
#> 
#> static TCCQ_UNUSED int tccq_int_neg(int a) {
#>   if (a == NA_INTEGER) return NA_INTEGER;
#>   return tccq_int_checked(-((long long)a));
#> }
#> 
#> static TCCQ_UNUSED int tccq_lgl_and(int a, int b) {
#>   if (a == 0 || b == 0) return 0;
#>   if (a == NA_LOGICAL || b == NA_LOGICAL) return NA_LOGICAL;
#>   return 1;
#> }
#> 
#> static TCCQ_UNUSED int tccq_lgl_or(int a, int b) {
#>   if (a == 1 || b == 1) return 1;
#>   if (a == NA_LOGICAL || b == NA_LOGICAL) return NA_LOGICAL;
#>   return 0;
#> }
#> 
#> static TCCQ_UNUSED int tccq_cond_check(int c) {
#>   if (c == NA_LOGICAL) Rf_error("missing value where TRUE/FALSE needed");
#>   return c;
#> }
#> 
#> static TCCQ_UNUSED int tccq_switch_check(int k, int n) {
#>   if (k == NA_INTEGER || k < 1 || k > n) Rf_error("switch: index out of range");
#>   return k;
#> }
#> 
#> SEXP tccq_entry(SEXP arg_x, SEXP arg_y) {
#>   if (TYPEOF(arg_x) != REALSXP) {
#>     Rf_error("argument %s has wrong R type", "x");
#>   }
#>   R_xlen_t n_x = XLENGTH(arg_x);
#>   const double *tccq_arg_ptr_cache_x = NULL;
#>   #define p_x (tccq_arg_ptr_cache_x == NULL ? (tccq_arg_ptr_cache_x = REAL_RO(arg_x)) : tccq_arg_ptr_cache_x)
#>   if (TYPEOF(arg_y) != REALSXP) {
#>     Rf_error("argument %s has wrong R type", "y");
#>   }
#>   R_xlen_t n_y = XLENGTH(arg_y);
#>   const double *tccq_arg_ptr_cache_y = NULL;
#>   #define p_y (tccq_arg_ptr_cache_y == NULL ? (tccq_arg_ptr_cache_y = REAL_RO(arg_y)) : tccq_arg_ptr_cache_y)
#>   int tccq_nprotect = 0;
#>   R_xlen_t n_out = n_y;
#>   if (n_x != n_out) {
#>     Rf_error("vector length mismatch in shared shape domain");
#>   }
#>   double acc = 0.0;
#>   for (R_xlen_t i = 0; i < n_out; ++i) {
#>     double v = (double)(((double)(((double)(sin((double)(p_x[i]))) + (double)(p_y[i]))) * (double)(p_y[i])));
#>     if (R_IsNA(v)) { acc = NA_REAL; break; }
#>     if (R_IsNaN(v)) { acc = R_NaN; break; }
#>     acc += v;
#>   }
#>   SEXP out = PROTECT(Rf_allocVector(REALSXP, 1));
#>   ++tccq_nprotect;
#>   REAL(out)[0] = (double) (acc);
#>   UNPROTECT(tccq_nprotect);
#>   return out;
#> }

This path lowers to an explicit fold over a producer — the reduction and its element expression are represented in the IR, not discovered while printing C.

A vector-return kernel

A kernel that produces a whole vector lowers to materialize(producer):

vec_kernel <- function(x, y) {
  declare(type(x = double(NA), y = double(NA)))
  sin(x) + y * y
}

compiled_vec <- tccq_compile(vec_kernel)
unname(compiled_vec(x, y)[1:4])
#> [1] 0.09070257 0.49394330 1.19022755 2.15940797

Choosing a backend

tccq_compile() targets different C backends explicitly:

tccq_compile(f, backend = tccq_backend_source())  # return emitted C, do not compile
tccq_compile(f, backend = tccq_backend_tinycc())  # compile in memory via Rtinycc (default)
tccq_compile(f, backend = tccq_backend_shlib())   # compile via R CMD SHLIB, load via .Call()

The shared-library route is in the same space as callme. Before compiling, tccq_compile() validates backend capabilities against the target, the compile context, and any explicit boundary APIs the module uses.

A larger example: Viterbi

This follows the shape of the classic quickr Viterbi example while keeping the declared-subset contract. It exercises braced declarations, matrix row/column views, matrix writes, local vector fills, nested for loops, max(), and which.max().

viterbi <- function(observations, states, initial_probs,
                    transition_probs, emission_probs) {
  declare({
    type(observations = integer(num_steps))
    type(states = integer(num_states))
    type(initial_probs = double(num_states))
    type(transition_probs = double(num_states, num_states))
    type(emission_probs = double(num_states, num_obs))
  })

  num_states <- length(states)
  num_steps <- length(observations)

  trellis <- matrix(0, nrow = length(states), ncol = length(observations))
  backpointer <- matrix(0L, nrow = length(states), ncol = length(observations))

  trellis[, 1] <- initial_probs * emission_probs[, observations[1]]

  for (step in 2:num_steps) {
    for (current_state in 1:num_states) {
      probabilities <- trellis[, step - 1] * transition_probs[, current_state]
      trellis[current_state, step] <- max(probabilities) *
        emission_probs[current_state, observations[step]]
      backpointer[current_state, step] <- which.max(probabilities)
    }
  }

  path <- integer(length(observations))
  path[num_steps] <- which.max(trellis[, num_steps])
  for (step in seq((num_steps - 1), 1)) {
    path[step] <- backpointer[path[step + 1], step + 1]
  }

  states[path]
}

Compile once, then call the compiled closure like a normal R function and check it against the interpreter:

set.seed(42)
num_steps <- 50L; num_states <- 6L; num_obs <- 20L
observations <- sample.int(num_obs, num_steps, replace = TRUE)
states <- seq_len(num_states)
initial_probs <- runif(num_states); initial_probs <- initial_probs / sum(initial_probs)
transition_probs <- matrix(runif(num_states^2), nrow = num_states)
transition_probs <- transition_probs / rowSums(transition_probs)
emission_probs <- matrix(runif(num_states * num_obs), nrow = num_states)
emission_probs <- emission_probs / rowSums(emission_probs)

compiled_viterbi <- tccq_compile(viterbi)
args <- list(observations, states, initial_probs, transition_probs, emission_probs)
identical(do.call(compiled_viterbi, args), do.call(viterbi, args))
#> [1] TRUE

Excluding compile time, the compiled closure is markedly faster than the interpreter:

bench::mark(
  R = do.call(viterbi, args),
  tccquickr = do.call(compiled_viterbi, args),
  iterations = 100, check = FALSE
)[, c("expression", "median", "itr/sec", "mem_alloc")]
#> # A tibble: 2 × 4
#>   expression   median `itr/sec` mem_alloc
#>   <bch:expr> <bch:tm>     <dbl> <bch:byt>
#> 1 R           452.2µs     2182.    4.09KB
#> 2 tccquickr    66.6µs    14114.    4.09KB

Where to go next

  • vignette("the-r-subset") — the precise accepted subset, the boundary model, and the optimization passes.
  • tccq_compile(f, mode = "ir") — inspect the typed IR for any kernel.
  • The architecture decisions live in docs/decisions/ on GitHub.