wilson-labs/cola

Worksheet GP example fails with ValueError

activatedgeek opened this issue · 2 comments

Complete code:

!wget -O bike.mat "https://www.andpotap.com/static/bike.mat"

from jax import numpy as jnp
import os
import numpy as np
from math import floor
from scipy.io import loadmat
import cola


def load_uci_data(data_dir, dataset, train_p=0.75, test_p=0.15):
    file_path = os.path.join(data_dir, dataset + '.mat')
    data = np.array(loadmat(file_path)['data'])
    X = data[:, :-1]
    y = data[:, -1]

    X = X - X.min(0)[None]
    X = 2.0 * (X / X.max(0)[None]) - 1.0
    y -= y.mean()
    y /= y.std()

    train_n = int(floor(train_p * X.shape[0]))
    valid_n = int(floor((1. - train_p - test_p) * X.shape[0]))

    split = split_dataset(X, y, train_n, valid_n)
    train_x, train_y, valid_x, valid_y, test_x, test_y = split

    return train_x, train_y, test_x, test_y, valid_x, valid_y


def split_dataset(x, y, train_n, valid_n):
    train_x = x[:train_n, :]
    train_y = y[:train_n]

    valid_x = x[train_n:train_n + valid_n, :]
    valid_y = y[train_n:train_n + valid_n]

    test_x = x[train_n + valid_n:, :]
    test_y = y[train_n + valid_n:]
    return train_x, train_y, valid_x, valid_y, test_x, test_y


train_x, train_y, *_, test_x, test_y = load_uci_data(data_dir="./", dataset="bike")

dtype = jnp.float32
train_x, train_y = jnp.array(train_x, dtype=dtype), jnp.array(train_y, dtype=dtype)
test_x, test_y = jnp.array(test_x, dtype=dtype), jnp.array(test_y, dtype=dtype)

train_x, train_y = train_x[:1000], train_y[:1000]

def compute_rbf_cov(xi, xj):
    xi, xj = jnp.expand_dims(xi, -2), jnp.expand_dims(xj, -3)
    res = jnp.exp(jnp.sum((xi - xj)**2, axis=-1))
    return res

ls = jnp.array(100., dtype=dtype)
noise = jnp.array(1., dtype=dtype)
oscale = jnp.array(1., dtype=dtype)
K_train_train = cola.ops.Dense(oscale * compute_rbf_cov(train_x / ls, train_x / ls))
K_test_train = cola.ops.Dense(oscale * compute_rbf_cov(test_x / ls, train_x / ls))
K_test_test = cola.ops.Dense(oscale * compute_rbf_cov(test_x / ls, test_x / ls))
K = cola.ops.PSD(K_train_train + noise * cola.ops.I_like(K_train_train))
mu = K_test_train @ inverse(K) @ train_y
Sigma = K_test_test - K_test_train @ inverse(K) @ K_test_train.T

The difference of matrices operation - fails.

Error logs

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[71], line 10
      8 mu = K_test_train @ inverse(K) @ train_y
      9 A = K_test_train @ inverse(K) @ K_test_train.T
---> 10 Sigma = K_test_test + A

File [~/cola/cola/ops/operator_base.py:119](/cola/docs/notebooks/~/cola/cola/ops/operator_base.py:119), in LinearOperator.__add__(self, other)
    118 def __add__(self, other):
--> 119     if other == 0:
    120         return self
    121     return cola.fns.add(self, other)

File [/.conda/envs/cola/lib/python3.10/site-packages/jax/_src/array.py:257](/.conda/envs/cola/lib/python3.10/site-packages/jax/_src/array.py:257), in ArrayImpl.__bool__(self)
    256 def __bool__(self):
--> 257   return bool(self._value)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

Note that it works from the slice of 1000 samples is changed to slice of 2000 samples.

mfinzi commented

Should be fixed now with this commit

Can confirm now. This is fixed.