Contents
1. Install
WRSVMr is a thin reticulate wrapper around the Python wrsvm package. You need a working Python interpreter with wrsvm installed; then install the R side.
# 1) Python backend (one-time)
install.packages("reticulate")
reticulate::install_python(version = "3.11.9")
reticulate::py_install("wrsvm", pip = TRUE)
# 2) R wrapper
install.packages("remotes")
remotes::install_github("annicenajafi/WRSVM", subdir = "WRSVMr")
# Or point reticulate at an existing Python interpreter
# (recommended on Windows to avoid the pyenv-win / WindowsApps stub):
Sys.setenv(RETICULATE_PYTHON = "C:/path/to/python.exe")
library(WRSVMr)
2. Load a dataset
The Iris dataset ships with base R. Scale features and split into train and test.
library(WRSVMr)
set.seed(0)
data(iris)
X <- as.matrix(iris[, 1:4])
y <- as.integer(iris$Species)
train_idx <- sample(seq_len(nrow(X)), size = round(0.7 * nrow(X)))
X_tr <- X[train_idx, ]; y_tr <- y[train_idx]
X_te <- X[-train_idx, ]; y_te <- y[-train_idx]
mu <- colMeans(X_tr); sd <- apply(X_tr, 2, sd)
X_tr <- sweep(sweep(X_tr, 2, mu), 2, sd, "/")
X_te <- sweep(sweep(X_te, 2, mu), 2, sd, "/")
cat(sprintf("train N=%d features=%d classes=%s\n",
nrow(X_tr), ncol(X_tr), paste(sort(unique(y_tr)), collapse = ",")))
3. First fit and prediction
Fit the default Crammer–Singer strategy with the RBF kernel, then score on held-out data.
fit <- wrsvm_fit(X_tr, y_tr,
strategy = "cs", kernel = "rbf",
C = 100, gamma = 0.1, upsilon = 0.2)
preds <- wrsvm_predict(fit, X_te)
cat(sprintf("test accuracy: %.4f\n", mean(preds == y_te)))
cat("first 5 predictions:", head(preds, 5), "\n")
4. Compare kernels
Pass the kernel argument to pick one of the five supported kernels. Polynomial and sigmoid use degree and coef0 in addition to gamma.
for (k in c("rbf", "linear", "poly", "sigmoid", "laplacian")) {
fit <- wrsvm_fit(X_tr, y_tr, kernel = k,
C = 100, gamma = 0.1, upsilon = 0.2,
degree = 3L, coef0 = 1.0)
acc <- mean(wrsvm_predict(fit, X_te) == y_te)
cat(sprintf(" %-10s test_acc = %.3f\n", k, acc))
}
5. Compare decomposition strategies
Pass strategy to pick cs, simmsvm, ovo, or ovr. simmsvm is the fastest because it solves a single QP in N dual variables instead of N × K.
for (s in c("cs", "simmsvm", "ovo", "ovr")) {
t0 <- Sys.time()
fit <- wrsvm_fit(X_tr, y_tr, strategy = s,
C = 100, gamma = 0.1, upsilon = 0.2)
dt <- as.numeric(Sys.time() - t0, units = "secs") * 1000
acc <- mean(wrsvm_predict(fit, X_te) == y_te)
cat(sprintf(" %-8s fit_time = %6.1f ms test_acc = %.3f\n", s, dt, acc))
}
6. Cross-validation loop
A minimal 5-fold manual grid search over C and gamma. Swap in the caret package if you need full model-selection machinery.
grid <- expand.grid(C = c(10, 100, 1000),
gamma = c(0.01, 0.1, 1.0))
folds <- sample(rep(1:5, length.out = nrow(X_tr)))
scores <- numeric(nrow(grid))
for (g in seq_len(nrow(grid))) {
acc_folds <- sapply(1:5, function(k) {
tr <- folds != k; va <- folds == k
fit <- wrsvm_fit(X_tr[tr, ], y_tr[tr],
C = grid$C[g], gamma = grid$gamma[g], upsilon = 0.2)
mean(wrsvm_predict(fit, X_tr[va, ]) == y_tr[va])
})
scores[g] <- mean(acc_folds)
}
best <- which.max(scores)
cat(sprintf("best: C=%g gamma=%g mean_cv_acc=%.3f\n",
grid$C[best], grid$gamma[best], scores[best]))
7. Imbalanced data and label noise
Use inject_outliers_minority() to flip a fraction of minority-class labels and see how upsilon protects accuracy against label corruption.
y_noisy <- inject_outliers_minority(X_tr, y_tr,
outlier_rate = 0.2, seed = 0L)
for (ups in c(0.0, 0.1, 0.5, 2.0)) {
fit <- wrsvm_fit(X_tr, y_noisy, strategy = "simmsvm",
C = 100, gamma = 0.1, upsilon = ups)
acc <- mean(wrsvm_predict(fit, X_te) == y_te)
cat(sprintf(" upsilon=%.1f clean_test_acc = %.3f\n", ups, acc))
}