-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathplot_benchmark.py
136 lines (110 loc) · 5.51 KB
/
plot_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
Benchmarking the TensorLy PARAFAC methods
=================================================
This example demonstrates how to benchmark Tensorly methods, using dataset available in Tensorly.
The user may also compare built-in methods with any external method.
.. note::
Our goal is to provide more generalized benchmark functions to Tensorly users in the future versions of Tensorly.
"""
##############################################################################
# Introduction
# -----------------------
# As of Autumn 2022, Tensorly includes a few dataset:
#
# 1. IL2data
# * Mutein treatment responses
# 2. Covid19
# * Three-mode tensor of samples, antigens, and receptors
# 3. Indian pines
# * Hyperspectral image
# 4. Kinetic
# * 4-way quantified measurement data
#
# Each dataset includes meta-information such as a scholar reference, the name of the modes,
# a suggested data mining task and a description. This information is used as labels when visualizing the results.
import tensorly as tl
from tensorly.metrics import RMSE
from tensorly.datasets import load_IL2data
import numpy as np
##########################################################################
# First let us load the IL2data dataset. The syntax is inspired from scikit-learn.
# For simplicity here we replaced missing values in the data with zeroes.
dataset = load_IL2data()
tensor = dataset.tensor
tensor[np.isnan(tensor)] = 0
dims = dataset.dims
print(dims)
##############################################################################
# How to compare several methods
# --------------------------------------------
# To compare several methods fairly, we suggest to use same initial factors and errors
# for all of them. Here we use `initialize_cp` as an example. You can also define
# your initial CPTensor.
from tensorly.decomposition._nn_cp import initialize_cp
rank = 5
initial_tensor = initialize_cp(tensor, rank=rank, init="random", non_negative=True)
first_error = tl.norm(tensor - tl.cp_to_tensor(initial_tensor), 2) / tl.norm(tensor, 2)
##########################################################################
# Let us import some decomposition methods from Tensorly. We are going to compare three built-in algorithms:
# - non_negative_parafac (using multiplicative updates)
# - non_negative_parafac_hals (using block-coordinate updates)
# - parafac (an implementation of alternating least squares, without nonnegativity)
# Then, we put them in a list below. You can also append your method to this list to compare your own method
# with them.
from tensorly.decomposition import non_negative_parafac_hals, non_negative_parafac, parafac
method = [non_negative_parafac, non_negative_parafac_hals, parafac]
##########################################################################
# If you have a custom algorithm `my_alg` to compare with, simply add the function name as follows
# method = [non_negative_parafac, non_negative_parafac_hals, parafac, my_alg]
# Note that it should have a signature including the following inputs
# my_alg(tensor, rank, n_iter_max=, init=, tol=, return_errors=)
###########################################################################
# Here we define some variables to compare selected methods.
import time
# A few storage arrays
cp_tensors = []
errors = []
rmse = []
proc_time = []
# Algorithms hyperparameters
n_iter_max = 100
tol = 1-16
##########################################################################
# We can run each method in a loop and save the results. Then, we print
# root-mean-square error and processing time for each method.
for i in range(len(method)):
tic = time.time()
cp_tensor_res, error = method[i](tensor, rank, n_iter_max=n_iter_max, init=initial_tensor.cp_copy(), tol=tol,
return_errors=True)
proc_time.append(time.time() - tic)
error.insert(0, first_error) # assuming the methods do not compute initial error, as in tensorly.
cp_tensors.append(cp_tensor_res.cp_copy())
errors.append(error)
rmse.append(RMSE(tensor, tl.cp_to_tensor(cp_tensor_res)))
print("RMSE with" + ' ' + str(method[i].__name__) + ":" + str("{:.2f}".format(rmse[i])))
for i in range(len(proc_time)):
print("Processing time of" + ' ' + str(method[i].__name__) + ":", str("{:.2f}".format(proc_time[i])))
##########################################################################
# We can plot error per iteration now. Since we inserted same first error for
# each method, error will start to decrease from the same point.
import matplotlib.pyplot as plt
plt.figure()
plt.title("Error for each iteration")
for i in range(len(errors)):
plt.plot(errors[i])
plt.legend([str(method[i].__name__) for i in range(len(errors))])
plt.show()
##########################################################################
# We saved factors from all the methods in cp_tensors. Now, we will plot
# them by using a suitable function from tlviz. First, in order to benefit
# from label information, we will convert tensorly dataset dictionary to xarray.
import xarray as xr
from tlviz.visualisation import component_comparison_plot
from tlviz.postprocessing import postprocess
dataset = xr.DataArray(dataset.tensor, coords=dataset.ticks,
dims=dataset.dims)
fig, axes = component_comparison_plot({method[i].__name__: postprocess(cp_tensors[i], dataset) for i in range(len(method))})
plt.show()
###########################################################################
# It is also possible to use your own ndarray as a tensor. However, figures
# won't have labels.