diff --git a/examples/plot_benchmark.py b/examples/plot_benchmark.py new file mode 100644 index 0000000..de74d3d --- /dev/null +++ b/examples/plot_benchmark.py @@ -0,0 +1,136 @@ +""" +Benchmarking the TensorLy PARAFAC methods +================================================= + +This example demonstrates how to benchmark Tensorly methods, using dataset available in Tensorly. +The user may also compare built-in methods with any external method. + +.. note:: + Our goal is to provide more generalized benchmark functions to Tensorly users in the future versions of Tensorly. +""" + +############################################################################## +# Introduction +# ----------------------- +# As of Autumn 2022, Tensorly includes a few dataset: +# +# 1. IL2data +# * Mutein treatment responses +# 2. Covid19 +# * Three-mode tensor of samples, antigens, and receptors +# 3. Indian pines +# * Hyperspectral image +# 4. Kinetic +# * 4-way quantified measurement data +# +# Each dataset includes meta-information such as a scholar reference, the name of the modes, +# a suggested data mining task and a description. This information is used as labels when visualizing the results. + +import tensorly as tl +from tensorly.metrics import RMSE +from tensorly.datasets import load_IL2data +import numpy as np + +########################################################################## +# First let us load the IL2data dataset. The syntax is inspired from scikit-learn. +# For simplicity here we replaced missing values in the data with zeroes. +dataset = load_IL2data() +tensor = dataset.tensor +tensor[np.isnan(tensor)] = 0 +dims = dataset.dims +print(dims) + + +############################################################################## +# How to compare several methods +# -------------------------------------------- +# To compare several methods fairly, we suggest to use same initial factors and errors +# for all of them. Here we use `initialize_cp` as an example. You can also define +# your initial CPTensor. + +from tensorly.decomposition._nn_cp import initialize_cp +rank = 5 + +initial_tensor = initialize_cp(tensor, rank=rank, init="random", non_negative=True) +first_error = tl.norm(tensor - tl.cp_to_tensor(initial_tensor), 2) / tl.norm(tensor, 2) + +########################################################################## +# Let us import some decomposition methods from Tensorly. We are going to compare three built-in algorithms: +# - non_negative_parafac (using multiplicative updates) +# - non_negative_parafac_hals (using block-coordinate updates) +# - parafac (an implementation of alternating least squares, without nonnegativity) +# Then, we put them in a list below. You can also append your method to this list to compare your own method +# with them. + +from tensorly.decomposition import non_negative_parafac_hals, non_negative_parafac, parafac +method = [non_negative_parafac, non_negative_parafac_hals, parafac] + +########################################################################## +# If you have a custom algorithm `my_alg` to compare with, simply add the function name as follows +# method = [non_negative_parafac, non_negative_parafac_hals, parafac, my_alg] +# Note that it should have a signature including the following inputs +# my_alg(tensor, rank, n_iter_max=, init=, tol=, return_errors=) + + +########################################################################### +# Here we define some variables to compare selected methods. + +import time + +# A few storage arrays +cp_tensors = [] +errors = [] +rmse = [] +proc_time = [] +# Algorithms hyperparameters +n_iter_max = 100 +tol = 1-16 + +########################################################################## +# We can run each method in a loop and save the results. Then, we print +# root-mean-square error and processing time for each method. + +for i in range(len(method)): + tic = time.time() + cp_tensor_res, error = method[i](tensor, rank, n_iter_max=n_iter_max, init=initial_tensor.cp_copy(), tol=tol, + return_errors=True) + proc_time.append(time.time() - tic) + error.insert(0, first_error) # assuming the methods do not compute initial error, as in tensorly. + cp_tensors.append(cp_tensor_res.cp_copy()) + errors.append(error) + rmse.append(RMSE(tensor, tl.cp_to_tensor(cp_tensor_res))) + print("RMSE with" + ' ' + str(method[i].__name__) + ":" + str("{:.2f}".format(rmse[i]))) + +for i in range(len(proc_time)): + print("Processing time of" + ' ' + str(method[i].__name__) + ":", str("{:.2f}".format(proc_time[i]))) + +########################################################################## +# We can plot error per iteration now. Since we inserted same first error for +# each method, error will start to decrease from the same point. + +import matplotlib.pyplot as plt + +plt.figure() +plt.title("Error for each iteration") +for i in range(len(errors)): + plt.plot(errors[i]) +plt.legend([str(method[i].__name__) for i in range(len(errors))]) +plt.show() + +########################################################################## +# We saved factors from all the methods in cp_tensors. Now, we will plot +# them by using a suitable function from tlviz. First, in order to benefit +# from label information, we will convert tensorly dataset dictionary to xarray. + +import xarray as xr +from tlviz.visualisation import component_comparison_plot +from tlviz.postprocessing import postprocess + +dataset = xr.DataArray(dataset.tensor, coords=dataset.ticks, + dims=dataset.dims) +fig, axes = component_comparison_plot({method[i].__name__: postprocess(cp_tensors[i], dataset) for i in range(len(method))}) +plt.show() + +########################################################################### +# It is also possible to use your own ndarray as a tensor. However, figures +# won't have labels.