incrementalstats.covariance_correlation

  1from __future__ import annotations
  2import numpy as np
  3
  4from .mean_var import IncrementalMeanVariance
  5
  6class IncrementalCovarianceCorrelation:
  7    """Incrementally computes vectorized covariance and correlation.
  8
  9    When feeding every row of an MxN matrix A and every row of an MxO matrix B, this code
 10    updates a co-variance NxO matrix C where every cell C[r,c] is the covariance between
 11    A[:,r] and B[:,c].
 12
 13    This code is useful when the matrixes A and B cannot be not present
 14    in memory at one given time.
 15
 16    ```
 17    nX = 100
 18    nY1 = 30
 19    nY2 = 50
 20
 21    m1 = np.random.randn(nX, nY1)
 22    m2 = np.random.randn(nX, nY2)
 23
 24    ic = IncrementalCovarianceCorrelation(nY1, nY2)
 25
 26    for row in range(nX):
 27        ic.update(m1[row,:], m2[row,:])
 28
 29    reference_covariance = (1 / (nX - 1)) * np.matmul((m1 - np.getMean()(m1, axis=0)).T, (m2 - np.getMean()(m2, axis=0)))
 30    reference_m1_stddev = np.std(m1, axis=0, ddof=1)
 31    reference_m2_stddev = np.std(m2, axis=0, ddof=1)
 32    reference_1_over_m1_stddev = (1 / numpy_m1_stddev).reshape(numpy_m1_stddev.size, 1)
 33    reference_1_over_m2_stddev = (1 / numpy_m2_stddev).reshape(1,numpy_m2_stddev.size)
 34    reference_correlation = numpy_covariance * numpy_1_over_m1_stddev * numpy_1_over_m2_stddev
 35
 36    assert (np.allclose(ic.getCovariance(), reference_covariance))
 37    assert (np.allclose(ic.getCorrelation(), reference_correlation))
 38
 39    ```
 40    """
 41
 42    def __init__(self, nX, nY):
 43        """Initialize with the #columns for matrices A and B"""
 44        self._nX = nX
 45        self._nXY = nY
 46        self._imX = IncrementalMeanVariance(nX)
 47        self._imY = IncrementalMeanVariance(nY)
 48        self._cov = np.zeros((nX, nY), dtype=np.float64)
 49        self._n = 0
 50
 51    def update(self, x, y):
 52        """Updates the covariance matrix with a single row of matrix A and a single row of matrix B"""
 53        if len(x) != self._nX:
 54            raise Exception("wrong x length")
 55        if len(y) != self._nXY:
 56            raise Exception("wrong y length")
 57
 58        self._n += 1
 59        f = (self._n - 1) / self._n
 60
 61        mfX = (x - self._imX.getMean()) * f
 62        mfY = y - self._imY.getMean()
 63
 64        self._cov += np.tensordot(mfX, mfY, axes=0)
 65
 66        self._imX.update(x)
 67        self._imY.update(y)
 68
 69    def add(self, x: IncrementalCovarianceCorrelation):
 70        """Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in
 71        parallelized computations, where different nodes compute co-variances over different
 72        ranges of rows"""
 73        n = self._n + x._n
 74        f = (self._n * x._n ** 2 + x._n * self._n ** 2) / (n ** 2)
 75
 76        deltaX = self._imX.getMean() - x._imX.getMean()
 77        deltaX = deltaX.reshape(deltaX.size, 1) * f
 78
 79        deltaY = self._imY.getMean() - x._imY.getMean()
 80        deltaY = deltaY.reshape(1, deltaY.size)
 81
 82        self._cov += x._cov + deltaX * deltaY
 83        self._n = n
 84        self._imX.add(x._imX)
 85        self._imY.add(x._imY)
 86
 87    def getCovariance(self):
 88        """Returns the scaled co-variance matrix with 1 degree of freedom"""
 89        return 1 / (self._n - 1) * self._cov
 90
 91    def getCorrelation(self):
 92        """Returns Pearson's correlation matrix"""
 93        sX = 1 / np.sqrt(self._imX.getVariance())
 94        sX = sX.reshape(sX.size, 1)
 95        sY = 1 / np.sqrt(self._imY.getVariance())
 96        sY = sY.reshape(1, sY.size)
 97        return 1 / (self._n - 1) * self._cov * sX * sY
 98
 99    def getN(self):
100        """Number of observations"""
101        return self._n
class IncrementalCovarianceCorrelation:
  7class IncrementalCovarianceCorrelation:
  8    """Incrementally computes vectorized covariance and correlation.
  9
 10    When feeding every row of an MxN matrix A and every row of an MxO matrix B, this code
 11    updates a co-variance NxO matrix C where every cell C[r,c] is the covariance between
 12    A[:,r] and B[:,c].
 13
 14    This code is useful when the matrixes A and B cannot be not present
 15    in memory at one given time.
 16
 17    ```
 18    nX = 100
 19    nY1 = 30
 20    nY2 = 50
 21
 22    m1 = np.random.randn(nX, nY1)
 23    m2 = np.random.randn(nX, nY2)
 24
 25    ic = IncrementalCovarianceCorrelation(nY1, nY2)
 26
 27    for row in range(nX):
 28        ic.update(m1[row,:], m2[row,:])
 29
 30    reference_covariance = (1 / (nX - 1)) * np.matmul((m1 - np.getMean()(m1, axis=0)).T, (m2 - np.getMean()(m2, axis=0)))
 31    reference_m1_stddev = np.std(m1, axis=0, ddof=1)
 32    reference_m2_stddev = np.std(m2, axis=0, ddof=1)
 33    reference_1_over_m1_stddev = (1 / numpy_m1_stddev).reshape(numpy_m1_stddev.size, 1)
 34    reference_1_over_m2_stddev = (1 / numpy_m2_stddev).reshape(1,numpy_m2_stddev.size)
 35    reference_correlation = numpy_covariance * numpy_1_over_m1_stddev * numpy_1_over_m2_stddev
 36
 37    assert (np.allclose(ic.getCovariance(), reference_covariance))
 38    assert (np.allclose(ic.getCorrelation(), reference_correlation))
 39
 40    ```
 41    """
 42
 43    def __init__(self, nX, nY):
 44        """Initialize with the #columns for matrices A and B"""
 45        self._nX = nX
 46        self._nXY = nY
 47        self._imX = IncrementalMeanVariance(nX)
 48        self._imY = IncrementalMeanVariance(nY)
 49        self._cov = np.zeros((nX, nY), dtype=np.float64)
 50        self._n = 0
 51
 52    def update(self, x, y):
 53        """Updates the covariance matrix with a single row of matrix A and a single row of matrix B"""
 54        if len(x) != self._nX:
 55            raise Exception("wrong x length")
 56        if len(y) != self._nXY:
 57            raise Exception("wrong y length")
 58
 59        self._n += 1
 60        f = (self._n - 1) / self._n
 61
 62        mfX = (x - self._imX.getMean()) * f
 63        mfY = y - self._imY.getMean()
 64
 65        self._cov += np.tensordot(mfX, mfY, axes=0)
 66
 67        self._imX.update(x)
 68        self._imY.update(y)
 69
 70    def add(self, x: IncrementalCovarianceCorrelation):
 71        """Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in
 72        parallelized computations, where different nodes compute co-variances over different
 73        ranges of rows"""
 74        n = self._n + x._n
 75        f = (self._n * x._n ** 2 + x._n * self._n ** 2) / (n ** 2)
 76
 77        deltaX = self._imX.getMean() - x._imX.getMean()
 78        deltaX = deltaX.reshape(deltaX.size, 1) * f
 79
 80        deltaY = self._imY.getMean() - x._imY.getMean()
 81        deltaY = deltaY.reshape(1, deltaY.size)
 82
 83        self._cov += x._cov + deltaX * deltaY
 84        self._n = n
 85        self._imX.add(x._imX)
 86        self._imY.add(x._imY)
 87
 88    def getCovariance(self):
 89        """Returns the scaled co-variance matrix with 1 degree of freedom"""
 90        return 1 / (self._n - 1) * self._cov
 91
 92    def getCorrelation(self):
 93        """Returns Pearson's correlation matrix"""
 94        sX = 1 / np.sqrt(self._imX.getVariance())
 95        sX = sX.reshape(sX.size, 1)
 96        sY = 1 / np.sqrt(self._imY.getVariance())
 97        sY = sY.reshape(1, sY.size)
 98        return 1 / (self._n - 1) * self._cov * sX * sY
 99
100    def getN(self):
101        """Number of observations"""
102        return self._n

Incrementally computes vectorized covariance and correlation.

When feeding every row of an MxN matrix A and every row of an MxO matrix B, this code updates a co-variance NxO matrix C where every cell C[r,c] is the covariance between A[:,r] and B[:,c].

This code is useful when the matrixes A and B cannot be not present in memory at one given time.

nX = 100
nY1 = 30
nY2 = 50

m1 = np.random.randn(nX, nY1)
m2 = np.random.randn(nX, nY2)

ic = IncrementalCovarianceCorrelation(nY1, nY2)

for row in range(nX):
    ic.update(m1[row,:], m2[row,:])

reference_covariance = (1 / (nX - 1)) * np.matmul((m1 - np.getMean()(m1, axis=0)).T, (m2 - np.getMean()(m2, axis=0)))
reference_m1_stddev = np.std(m1, axis=0, ddof=1)
reference_m2_stddev = np.std(m2, axis=0, ddof=1)
reference_1_over_m1_stddev = (1 / numpy_m1_stddev).reshape(numpy_m1_stddev.size, 1)
reference_1_over_m2_stddev = (1 / numpy_m2_stddev).reshape(1,numpy_m2_stddev.size)
reference_correlation = numpy_covariance * numpy_1_over_m1_stddev * numpy_1_over_m2_stddev

assert (np.allclose(ic.getCovariance(), reference_covariance))
assert (np.allclose(ic.getCorrelation(), reference_correlation))

IncrementalCovarianceCorrelation(nX, nY)
43    def __init__(self, nX, nY):
44        """Initialize with the #columns for matrices A and B"""
45        self._nX = nX
46        self._nXY = nY
47        self._imX = IncrementalMeanVariance(nX)
48        self._imY = IncrementalMeanVariance(nY)
49        self._cov = np.zeros((nX, nY), dtype=np.float64)
50        self._n = 0

Initialize with the #columns for matrices A and B

def update(self, x, y):
52    def update(self, x, y):
53        """Updates the covariance matrix with a single row of matrix A and a single row of matrix B"""
54        if len(x) != self._nX:
55            raise Exception("wrong x length")
56        if len(y) != self._nXY:
57            raise Exception("wrong y length")
58
59        self._n += 1
60        f = (self._n - 1) / self._n
61
62        mfX = (x - self._imX.getMean()) * f
63        mfY = y - self._imY.getMean()
64
65        self._cov += np.tensordot(mfX, mfY, axes=0)
66
67        self._imX.update(x)
68        self._imY.update(y)

Updates the covariance matrix with a single row of matrix A and a single row of matrix B

def add( self, x: IncrementalCovarianceCorrelation):
70    def add(self, x: IncrementalCovarianceCorrelation):
71        """Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in
72        parallelized computations, where different nodes compute co-variances over different
73        ranges of rows"""
74        n = self._n + x._n
75        f = (self._n * x._n ** 2 + x._n * self._n ** 2) / (n ** 2)
76
77        deltaX = self._imX.getMean() - x._imX.getMean()
78        deltaX = deltaX.reshape(deltaX.size, 1) * f
79
80        deltaY = self._imY.getMean() - x._imY.getMean()
81        deltaY = deltaY.reshape(1, deltaY.size)
82
83        self._cov += x._cov + deltaX * deltaY
84        self._n = n
85        self._imX.add(x._imX)
86        self._imY.add(x._imY)

Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in parallelized computations, where different nodes compute co-variances over different ranges of rows

def getCovariance(self):
88    def getCovariance(self):
89        """Returns the scaled co-variance matrix with 1 degree of freedom"""
90        return 1 / (self._n - 1) * self._cov

Returns the scaled co-variance matrix with 1 degree of freedom

def getCorrelation(self):
92    def getCorrelation(self):
93        """Returns Pearson's correlation matrix"""
94        sX = 1 / np.sqrt(self._imX.getVariance())
95        sX = sX.reshape(sX.size, 1)
96        sY = 1 / np.sqrt(self._imY.getVariance())
97        sY = sY.reshape(1, sY.size)
98        return 1 / (self._n - 1) * self._cov * sX * sY

Returns Pearson's correlation matrix

def getN(self):
100    def getN(self):
101        """Number of observations"""
102        return self._n

Number of observations