Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ktensor.score and associated tests. #47

Merged
merged 2 commits into from
Oct 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 137 additions & 3 deletions pyttb/ktensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1360,9 +1360,143 @@ def shape(self):
"""
return tuple([f.shape[0] for f in self.factor_matrices])

# TODO implement
def score(self, other, **kwargs):
assert False, "Not yet implemented" # pragma: no cover
def score(self, other, weight_penalty=True, threshold=0.99, greedy=True):
"""
Checks if two ktensor instances match except for permutation.

We define matching as follows. If A (self) and B (other) are single component
ktensors that have been normalized so that their weights are weights_a and
weights_b, then the score is defined as

score = penalty * (a1.T*b1) * (a2.T*b2) * ... * (aR.T*bR),

where the penalty is defined by the weights such that

penalty = 1 - abs(weights_a - weights_b) / max(weights_a, weights_b).

The score of multi-component ktensors is a normalized sum of the
scores across the best permutation of the components of A. A can have
more components than B --- any extra components are ignored in terms of
the matching score.

Parameters
----------
other: :class:`pyttb.ktensor`
`ktensor` to match against
weight_penalty: bool
Flag indicating whether or not to consider the weights in the calculations.
Default: true
threshold: float
Threshold specified in the formula above for determining a match.
Default: 0.99
greedy: bool
Flag indicating whether or not to consider all possible matchings
(exponentially expensive) or just do a greedy matching. Default: true

Returns
-------
int
Score (between 0 and 1)
:class:`pyttb.ktensor`
Copy of `self`, which has been normalized and permuted to best match `other`
bool
Flag indicating a match according to a user-specified threshold
:class:`Numpy.ndarray`
Permutation (i.e. array of indices of the modes of self) of the components
of self that was used to best match other

Example
-------
Create two `ktensor` instances:

>>> A = ttb.ktensor.from_data(np.array([2, 1, 3]), np.ones((3,3)), np.ones((4,3)), np.ones((5,3)))
>>> B = ttb.ktensor.from_data(np.array([2, 4]), np.ones((3,2)), np.ones((4,2)), np.ones((5,2)))

Compute `score` using `ktensor.weights`:

>>> score,Aperm,flag,perm = A.score(B)
>>> print(score)
0.875
>>> print(perm)
[0 2 1]

Compute `score` not using `ktensor.weights`:

>>> score,Aperm,flag,perm = A.score(B,weight_penalty=False)
>>> print(score)
1.0
>>> print(perm)
[0 1 2]
"""

if not greedy:
assert False, "Not yet implemented. Only greedy method is implemented currently."

if not isinstance(other, ktensor):
assert False, "The first input should be a ktensor"

if not (self.shape == other.shape):
assert False, "Size mismatch"

# Set-up
N = self.ndims
RA = self.ncomponents
RB = other.ncomponents

# We're matching components in A to B
if (RA < RB):
assert False, "Tensor A must have at least as many components as tensor B"

# Make sure columns of factor matrices are normalized
A = ttb.ktensor.from_tensor_type(self).normalize()
B = ttb.ktensor.from_tensor_type(other).normalize()

# Compute all possible vector-vector congruences.

# Compute every pair for each mode
Cbig = ttb.tensor.from_function(np.zeros, (RA,RB,N))
for n in range(N):
Cbig[:,:,n] = np.abs(A.factor_matrices[n].T @ B.factor_matrices[n])

# Collapse across all modes using the product
C = Cbig.collapse(np.array([2]), np.prod).double()

# Calculate penalty based on differences in the Lambda's
# Note that we are assuming the the lambda value are positive because the
# ktensor's were previously normalized.
if weight_penalty:
P = np.zeros((RA, RB))
for ra in range(RA):
la = A.weights[ra]
for rb in range(RB):
lb = B.weights[rb]
if (la == 0) and (lb == 0):
# if both lambda values are zero (0), they match
P[ra, rb] = 1
else:
P[ra, rb] = 1 - (np.abs(la-lb) / np.max([np.abs(la),np.abs(lb)]))
C = P * C

# Option to do greedy matching
if greedy:
best_perm = -1 * np.ones((RA), dtype=np.int)
best_score = 0
for r in range(RB):
idx = np.argmax(C.reshape(np.prod(C.shape),order='F'))
ij = tt_ind2sub((RA, RB), idx)
best_score = best_score + C[ij[0], ij[1]]
C[ij[0], :] = -10
C[:, ij[1]] = -10
best_perm[ij[1]] = ij[0]
best_score = best_score / RB
flag = 1

# Rearrange the components of A according to the best matching
foo = np.arange(RA)
tf = np.in1d(foo, best_perm)
best_perm[RB:RA+1] = foo[~tf]
A.arrange(permutation=best_perm)
return best_score, A, flag, best_perm

def symmetrize(self):
"""
Expand Down
6 changes: 3 additions & 3 deletions pyttb/pyttb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -514,9 +514,9 @@ def tt_ind2sub(shape, idx):
:class:`numpy.ndarray`
"""
if idx.size == 0:
return np.array([])
return np.empty(shape=(0,len(shape)), dtype=int)

return np.array(np.unravel_index(idx, shape)).transpose()
return np.array(np.unravel_index(idx, shape, order='F')).transpose()


def tt_subsubsref(obj, s):
Expand Down Expand Up @@ -575,7 +575,7 @@ def tt_sub2ind(shape, subs):
"""
if subs.size == 0:
return np.array([])
idx = np.ravel_multi_index(tuple(subs.transpose()), shape)
idx = np.ravel_multi_index(tuple(subs.transpose()), shape, order='F')
return idx


Expand Down
33 changes: 18 additions & 15 deletions pyttb/sptensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ def extract(self, searchsubs):
assert False, 'Invalid subscripts'

# Set the default answer to zero
a = np.zeros(shape=(p, 1))
a = np.zeros(shape=(p, 1), dtype=self.vals.dtype)

# Find which indices already exist and their locations
loc = ttb.tt_ismember_rows(searchsubs, self.subs)
Expand Down Expand Up @@ -1112,18 +1112,20 @@ def __getitem__(self, item):

Examples
--------
>>> X = sptensor(np.array([[4,4,4],[2,2,1],[2,3,2]]),np.array([[3],[5],[1]]),(4,4,4))
>>> X[1,2,1] #<-- returns zero
>>> X[4,4,4] #<-- returns 3
>>> X[3:4,:,:] #<-- returns 1 x 4 x 4 sptensor
>>> X = sptensor(np.array([[3,3,3],[1,1,0],[1,2,1]]),np.array([3,5,1]),(4,4,4))
>>> X[0,1,0] #<-- returns zero
>>> X[3,3,3] #<-- returns 3
>>> X[2:3,:,:] #<-- returns 1 x 4 x 4 sptensor
X = sptensor([6;16;26],[1;1;1],30);
X([1:6]') <-- extracts a subtensor
X([1:6]','extract') %<-- extracts a vector of 6 elements
"""
# This does not work like MATLAB TTB; you must call sptensor.extract to get this functionality
# X([1:6]','extract') %<-- extracts a vector of 6 elements

#TODO IndexError for value outside of indices
# TODO Key error if item not in container
# *** CASE 1: Rectangular Subtensor ***
if isinstance(item, tuple) and len(item) == self.ndims and item[len(item)-1] != 'extract':
if isinstance(item, tuple) and len(item) == self.ndims:
# Extract the subdimensions to be extracted from self
region = item

Expand Down Expand Up @@ -1160,7 +1162,7 @@ def __getitem__(self, item):
# Return a single double value for a zero-order sub-tensor
if newsiz.size == 0:
if vals.size == 0:
a = 0
a = np.array([[0]])
else:
a = vals
return a
Expand All @@ -1177,21 +1179,22 @@ def __getitem__(self, item):
# Case 2: EXTRACT

# *** CASE 2a: Subscript indexing ***
if len(item) > 1 and isinstance(item[-1], str) and item[-1] == 'extract':
# extract array of subscripts
srchsubs = np.array(item[0])
item = item[0]
if isinstance(item, np.ndarray) and len(item.shape) == 2 and item.shape[1] == self.ndims:
srchsubs = np.array(item)

# *** CASE 2b: Linear indexing ***
else:
# Error checking
if not isinstance(item, list) and not isinstance(item, np.ndarray):
if isinstance(item, list):
idx = np.array(item)
elif isinstance(item, np.ndarray):
idx = item
else:
assert False, 'Invalid indexing'

idx = item
if len(idx.shape) != 1:
assert False, 'Expecting a row index'
#idx=np.expand_dims(idx, axis=1)

# extract linear indices and convert to subscripts
srchsubs = tt_ind2sub(self.shape, idx)

Expand Down
10 changes: 5 additions & 5 deletions pyttb/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,10 @@ def find(self):

:return:
"""
idx = np.where(self.data > 0)
subs = np.array(idx).transpose()
vals = self.data[idx]
return subs, vals[:, None]
idx = np.nonzero(np.ravel(self.data,order='F'))[0]
subs = ttb.tt_ind2sub(self.shape,idx)
vals = self.data[tuple(subs.T)][:,None]
return subs, vals

def full(self):
"""
Expand Down Expand Up @@ -1623,7 +1623,7 @@ def __repr__(self):
s += str(self.data)
s += '\n'
return s
for i, j in enumerate(range(0, np.prod(self.shape), self.shape[-1]*self.shape[-2])):
for i in np.arange(np.prod(self.shape[:-2])):
s += 'data'
if self.ndims == 2:
s += '[:, :]'
Expand Down
40 changes: 38 additions & 2 deletions tests/test_ktensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def test_ktensor_issymetric(sample_ktensor_2way, sample_ktensor_symmetric):
def test_ktensor_mask(sample_ktensor_2way):
(data, K) = sample_ktensor_2way
W = ttb.tensor.from_data(np.array([[0, 1], [1, 0]]))
assert (K.mask(W) == np.array([[39], [63]])).all()
assert (K.mask(W) == np.array([[63], [39]])).all()

# Mask too large
with pytest.raises(AssertionError) as excinfo:
Expand Down Expand Up @@ -614,7 +614,43 @@ def test_ktensor_redistribute(sample_ktensor_2way):
assert (np.array([[5, 6], [7, 8]]) == K[1]).all()
assert (np.array([1, 1]) == K.weights).all()

@pytest.mark.indevelopment
pytest.mark.indevelopment
def test_ktensor_score():
A = ttb.ktensor.from_data(np.array([2, 1, 3]), np.ones((3,3)), np.ones((4,3)), np.ones((5,3)))
B = ttb.ktensor.from_data(np.array([2, 4]), np.ones((3,2)), np.ones((4,2)), np.ones((5,2)))

# defaults
score, Aperm, flag, best_perm = A.score(B)
assert score == 0.875
assert np.allclose(Aperm.weights, np.array([15.49193338,23.23790008,7.74596669]))
assert flag == 1
assert (best_perm == np.array([0,2,1])).all()

# compare just factor matrices (i.e., do not use weights)
score, Aperm, flag, best_perm = A.score(B, weight_penalty=False)
assert score == 1.0
assert np.allclose(Aperm.weights, np.array([15.49193338,7.74596669,23.23790008]))
assert flag == 1
assert (best_perm == np.array([0,1,2])).all()

# compute score using exhaustive search
with pytest.raises(AssertionError) as excinfo:
score, Aperm, flag, best_perm = A.score(B, greedy=False)
assert "Not yet implemented. Only greedy method is implemented currently." in str(excinfo)

# try to compute score with tensor type other than ktensor
with pytest.raises(AssertionError) as excinfo:
score, Aperm, flag, best_perm = A.score(ttb.tensor.from_tensor_type(B))
assert "The first input should be a ktensor" in str(excinfo)

# try to compute score when ktensor dimensions do not match
with pytest.raises(AssertionError) as excinfo:
# A is 3x4x5; B is 3x4x4
B = ttb.ktensor.from_data(np.array([2, 4]), np.ones((3,2)), np.ones((4,2)), np.ones((4,2)))
score, Aperm, flag, best_perm = A.score(B)
assert "Size mismatch" in str(excinfo)

pytest.mark.indevelopment
def test_ktensor_shape(sample_ktensor_2way, sample_ktensor_3way):
(data, K0) = sample_ktensor_2way
assert K0.shape == (2, 2)
Expand Down
10 changes: 6 additions & 4 deletions tests/test_pyttb_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ def test_sptensor_to_sparse_matrix():
subs = np.array([[1, 1, 1], [1, 1, 3], [2, 2, 2], [3, 3, 3]])
vals = np.array([[0.5], [1.5], [2.5], [3.5]])
shape = (4, 4, 4)
mode0 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 7, 10, 15], [1, 1, 2, 3])))
mode1 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 7, 10, 15], [1, 1, 2, 3])))
mode0 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 13, 10, 15], [1, 1, 2, 3])))
mode1 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 13, 10, 15], [1, 1, 2, 3])))
mode2 = sparse.coo_matrix(([0.5, 1.5, 2.5, 3.5], ([5, 5, 10, 15], [1, 3, 2, 3])))
Ynt = [mode0, mode1, mode2]
sptensorInstance = ttb.sptensor().from_data(subs, vals, shape)
Expand Down Expand Up @@ -330,15 +330,17 @@ def test_tt_ind2sub_valid():
subs = np.array([[0, 0, 0], [1, 1, 1], [3, 3, 3]])
idx = np.array([0, 21, 63])
shape = (4, 4, 4)
print(f'\nttb.tt_ind2sub(shape, idx): {ttb.tt_ind2sub(shape, idx)}')
assert (ttb.tt_ind2sub(shape, idx) == subs).all()

subs = np.array([[0, 1], [1, 0]])
subs = np.array([[1, 0], [0, 1]])
idx = np.array([1, 2])
shape = (2, 2)
print(f'\nttb.tt_ind2sub(shape, idx): {ttb.tt_ind2sub(shape, idx)}')
assert (ttb.tt_ind2sub(shape, idx) == subs).all()

empty = np.array([])
assert (ttb.tt_ind2sub(shape, empty) == empty).all()
assert (ttb.tt_ind2sub(shape, empty) == np.empty(shape=(0,len(shape)), dtype=int)).all()

@pytest.mark.indevelopment
def test_tt_subsubsref_valid():
Expand Down
Loading