/measure

Various distance and similarity measures in python

Primary LanguagePython

measure

Various distance and similarity measures in python. Updated version will include implementation of metrics in 'Comprehensive Survey on Distance/Similarity Measures between Probability Density Functions' by Sung-Hyuk Cha

import numpy as np

class Distance(object):

    def braycurtis(self, a, b):
        return np.sum(np.fabs(a - b)) / np.sum(np.fabs(a + b))

    def canberra(self, a, b):
        return np.sum(np.fabs(a - b) / (np.fabs(a) + np.fabs(b)))

    def chebyshev(self, a, b):
        return np.amax(a - b)

    def cityblock(self, a, b):
        return self.manhattan(a, b)

    def correlation(self, a, b):
        a = a - np.mean(a)
        b = b - np.mean(b)
        return 1.0 - np.mean(a * b) / np.sqrt(np.mean(np.square(a)) * np.mean(np.square(b)))

    def cosine(self, a, b):
        return 1 - np.dot(a, b) / (np.sqrt(np.dot(a, a)) * np.sqrt(np.dot(b, b)))

    def dice(self, a, b):
        nft = ((1 - a) * b).sum()
        ntf = (a * (1 - b)).sum()
        ntt = (a * b).sum()
        return float((ntf + nft) / np.array(2.0 * ntt + ntf + nft))

    def euclidean(self, a, b):
        return np.sqrt(np.sum(np.dot((a - b), (a - b))))

    def hamming(self, a, b, w = None):
        if w is None:
            w = np.ones(a.shape[0])
        return np.average(a != b, weights = w)

    def jaccard(self, u, v):
        return np.double(np.bitwise_and((u != v), np.bitwise_or(u != 0, v != 0)).sum()) / np.double(np.bitwise_or(u != 0, v != 0).sum())

    def kulsinski(self, a, b):
        nft = ((1 - a) * b).sum()
        ntf = (a * (1 - b)).sum()
        ntt = (a * b).sum()
        return (ntf + nft - ntt + len(a)) / (ntf + nft + len(a))

    def mahalanobis(self, a, b, vi):
        return np.sqrt(np.dot(np.dot((a - b), vi),(a - b).T))

    def manhattan(self, a, b):
        return np.sum(np.fabs(a - b))

    def matching(self, a, b):
        return self.hamming(a, b)

    def minkowski(self, a, b, p):
        return np.power(np.sum(np.power(np.fabs(a - b), p)), 1 / p)

    def rogerstanimoto(self, a, b):
        nff = ((1 - a) * (1 - b)).sum()
        nft = ((1 - a) * b).sum()
        ntf = (a * (1 - b)).sum()
        ntt = (a * b).sum()
        return float(2.0 * (ntf + nft)) / float(ntt + nff + (2.0 * (ntf + nft)))

    def russellrao(self, a, b):
        return float(len(a) - (a * b).sum()) / len(a)

    def seuclidean(self, a, b, V):
        return np.sqrt(np.sum((a - b) ** 2 / V))

    def sokalmichener(self, a, b):
        nff = ((1 - a) * (1 - b)).sum()
        nft = ((1 - a) * b).sum()
        ntf = (a * (1 - b)).sum()
        ntt = (a * b).sum()
        return float(2.0 * (ntf + nft)) / float(ntt + nff + 2.0 * (ntf + nft))

    def sokalsneath(self, a, b):
        nft = ((1 - a) * b).sum()
        ntf = (a * (1 - b)).sum()
        ntt = (a * b).sum()
        return float(2.0 * (ntf + nft)) / np.array(ntt + 2.0 * (ntf + nft))

    def sqeuclidean(self, a, b):
        return np.sum(np.dot((a - b), (a - b)))

    def wminkowski(self, a, b, p, w):
        return np.power(np.sum(np.power(np.fabs(w * (a - b)), p)), 1 / p)

    def yule(self, a, b):
        nff = ((1 - a) * (1 - b)).sum()
        nft = ((1 - a) * b).sum()
        ntf = (a * (1 - b)).sum()
        ntt = (a * b).sum()
        return float(2.0 * ntf * nft / np.array(ntt * nff + ntf * nft))

def main():
    from scipy.spatial import distance
    a = np.array([1, 2, 43])
    b = np.array([3, 2, 1])

    d = Distance()
    print('-----------------------------------------------------------------')

    print('My       braycurtis: {}'.format(d.braycurtis(a, b)))
    print('SciPy    braycurtis: {}'.format(distance.braycurtis(a, b)))
    print('-----------------------------------------------------------------')
    
    print('My       canberra: {}'.format(d.canberra(a, b)))
    print('SciPy    canberra: {}'.format(distance.canberra(a, b)))
    print('-----------------------------------------------------------------')

    print('My       chebyshev: {}'.format(d.chebyshev(a, b)))
    print('SciPy    chebyshev: {}'.format(distance.chebyshev(a, b)))
    print('-----------------------------------------------------------------')

    print('My       cityblock: {}'.format(d.cityblock(a, b)))
    print('SciPy    cityblock: {}'.format(distance.cityblock(a, b)))
    print('-----------------------------------------------------------------')

    print('My       correlation: {}'.format(d.correlation(a, b)))
    print('SciPy    correlation: {}'.format(distance.correlation(a, b)))
    print('-----------------------------------------------------------------')

    print('My       euclidean: {}'.format(d.euclidean(a, b)))
    print('SciPy    euclidean: {}'.format(distance.euclidean(a, b)))
    print('-----------------------------------------------------------------')

    print('My       hamming: {}'.format(d.hamming(a, b)))
    print('SciPy    hamming: {}'.format(distance.hamming(a, b)))
    print('-----------------------------------------------------------------')

    print('My       jaccard: {}'.format(d.jaccard(a, b)))
    print('SciPy    jaccard: {}'.format(distance.jaccard(a, b)))
    print('-----------------------------------------------------------------')

    print('My       manhattan: {}'.format(d.cityblock(a, b)))
    print('SciPy    manhattan: {}'.format(distance.cityblock(a, b)))
    print('-----------------------------------------------------------------')

    print('My       cosine: {}'.format(d.cosine(a, b)))
    print('SciPy    cosine: {}'.format(distance.cosine(a, b)))
    print('-----------------------------------------------------------------')

    print('My       dice: {}'.format(d.dice(a, b)))
    print('SciPy    dice: {}'.format(distance.dice(a, b)))
    print('-----------------------------------------------------------------')

    print('My       kulsinski: {}'.format(d.kulsinski(a, b)))
    print('SciPy    kulsinski: {}'.format(distance.kulsinski(a, b)))
    print('-----------------------------------------------------------------')

    iv = np.array([[1, 0.5, 0.5], [0.5, 1, 0.5], [0.5, 0.5, 1]])
    print('My       mahalanobis: {}'.format(d.mahalanobis(a, b, iv)))
    print('SciPy    mahalanobis: {}'.format(distance.mahalanobis(a, b, iv)))
    print('-----------------------------------------------------------------')

    print('My       seuclidean: {}'.format(d.seuclidean(a, b, np.array([0.1, 0.1, 0.1]))))
    print('SciPy    seuclidean: {}'.format(distance.seuclidean(a, b, [0.1, 0.1, 0.1])))
    print('-----------------------------------------------------------------')

    print('My       sokalmichener: {}'.format(d.sokalmichener(a, b)))
    print('SciPy    sokalmichener: {}'.format(distance.sokalmichener(a, b)))
    print('-----------------------------------------------------------------')

    print('My       sokal_sneath: {}'.format(d.sokalsneath(a, b)))
    print('SciPy    sokal_sneath: {}'.format(distance.sokalsneath(a, b)))
    print('-----------------------------------------------------------------')

    print('My       sqeuclidean: {}'.format(d.sqeuclidean(a, b)))
    print('SciPy    sqeuclidean: {}'.format(distance.sqeuclidean(a, b)))
    print('-----------------------------------------------------------------')
    
    print('My       minkowski: {}'.format(d.minkowski(a, b, 2)))
    print('SciPy    minkowski: {}'.format(distance.minkowski(a, b, 2)))
    print('-----------------------------------------------------------------')

    print('My       rogerstanimoto: {}'.format(d.rogerstanimoto(a, b)))
    print('SciPy    rogerstanimoto: {}'.format(distance.rogerstanimoto(a, b)))
    print('-----------------------------------------------------------------')

    print('My       russellrao: {}'.format(d.russellrao(a, b)))
    print('SciPy    russellrao: {}'.format(distance.russellrao(a, b)))
    print('-----------------------------------------------------------------')

    print('My       wminkowski: {}'.format(d.wminkowski(a, b, 2, np.ones(3))))
    print('SciPy    wminkowski: {}'.format(distance.wminkowski(a, b, 2, np.ones(3))))
    print('-----------------------------------------------------------------')

    print('My       yule: {}'.format(d.yule(a, b)))
    print('SciPy    yule: {}'.format(distance.yule(a, b)))
    print('-----------------------------------------------------------------')

if __name__ == '__main__':
    main()
-----------------------------------------------------------------
My       braycurtis: 0.8461538461538461
SciPy    braycurtis: 0.8461538461538461
-----------------------------------------------------------------
My       canberra: 1.4545454545454546
SciPy    canberra: 1.4545454545454546
-----------------------------------------------------------------
My       chebyshev: 42
SciPy    chebyshev: 42
-----------------------------------------------------------------
My       cityblock: 44.0
SciPy    cityblock: 44
-----------------------------------------------------------------
My       correlation: 1.8762686682028846
SciPy    correlation: 1.8762686682028846
-----------------------------------------------------------------
My       euclidean: 42.04759208325728
SciPy    euclidean: 42.04759208325728
-----------------------------------------------------------------
My       hamming: 0.6666666666666666
SciPy    hamming: 0.6666666666666666
-----------------------------------------------------------------
My       jaccard: 0.6666666666666666
SciPy    jaccard: 0.6666666666666666
-----------------------------------------------------------------
My       manhattan: 44.0
SciPy    manhattan: 44
-----------------------------------------------------------------
My       cosine: 0.6896504488650589
SciPy    cosine: 0.6896504488650589
-----------------------------------------------------------------
My       dice: -0.9230769230769231
SciPy    dice: -0.9230769230769231
-----------------------------------------------------------------
My       kulsinski: 2.111111111111111
SciPy    kulsinski: 2.111111111111111
-----------------------------------------------------------------
My       mahalanobis: 41.036569057366385
SciPy    mahalanobis: 41.036569057366385
-----------------------------------------------------------------
My       seuclidean: 132.9661611087573
SciPy    seuclidean: 132.9661611087573
-----------------------------------------------------------------
My       sokalmichener: 2.1333333333333333
SciPy    sokalmichener: 2.1333333333333333
-----------------------------------------------------------------
My       sokal_sneath: 2.0869565217391304
SciPy    sokal_sneath: 2.0869565217391304
-----------------------------------------------------------------
My       sqeuclidean: 1768
SciPy    sqeuclidean: 1768.0
-----------------------------------------------------------------
My       minkowski: 42.04759208325728
SciPy    minkowski: 42.04759208325728
-----------------------------------------------------------------
My       rogerstanimoto: 2.1333333333333333
SciPy    rogerstanimoto: 2.1333333333333333
-----------------------------------------------------------------
My       russellrao: -15.666666666666666
SciPy    russellrao: -15.666666666666666
-----------------------------------------------------------------
My       wminkowski: 42.04759208325728
SciPy    wminkowski: 42.04759208325728
-----------------------------------------------------------------
My       yule: 1.5575221238938053
SciPy    yule: 1.5575221238938053
-----------------------------------------------------------------