incrementalstats.covariance_correlation
1from __future__ import annotations 2import numpy as np 3 4from .mean_var import IncrementalMeanVariance 5 6class IncrementalCovarianceCorrelation: 7 """Incrementally computes vectorized covariance and correlation. 8 9 When feeding every row of an MxN matrix A and every row of an MxO matrix B, this code 10 updates a co-variance NxO matrix C where every cell C[r,c] is the covariance between 11 A[:,r] and B[:,c]. 12 13 This code is useful when the matrixes A and B cannot be not present 14 in memory at one given time. 15 16 ``` 17 nX = 100 18 nY1 = 30 19 nY2 = 50 20 21 m1 = np.random.randn(nX, nY1) 22 m2 = np.random.randn(nX, nY2) 23 24 ic = IncrementalCovarianceCorrelation(nY1, nY2) 25 26 for row in range(nX): 27 ic.update(m1[row,:], m2[row,:]) 28 29 reference_covariance = (1 / (nX - 1)) * np.matmul((m1 - np.getMean()(m1, axis=0)).T, (m2 - np.getMean()(m2, axis=0))) 30 reference_m1_stddev = np.std(m1, axis=0, ddof=1) 31 reference_m2_stddev = np.std(m2, axis=0, ddof=1) 32 reference_1_over_m1_stddev = (1 / numpy_m1_stddev).reshape(numpy_m1_stddev.size, 1) 33 reference_1_over_m2_stddev = (1 / numpy_m2_stddev).reshape(1,numpy_m2_stddev.size) 34 reference_correlation = numpy_covariance * numpy_1_over_m1_stddev * numpy_1_over_m2_stddev 35 36 assert (np.allclose(ic.getCovariance(), reference_covariance)) 37 assert (np.allclose(ic.getCorrelation(), reference_correlation)) 38 39 ``` 40 """ 41 42 def __init__(self, nX, nY): 43 """Initialize with the #columns for matrices A and B""" 44 self._nX = nX 45 self._nXY = nY 46 self._imX = IncrementalMeanVariance(nX) 47 self._imY = IncrementalMeanVariance(nY) 48 self._cov = np.zeros((nX, nY), dtype=np.float64) 49 self._n = 0 50 51 def update(self, x, y): 52 """Updates the covariance matrix with a single row of matrix A and a single row of matrix B""" 53 if len(x) != self._nX: 54 raise Exception("wrong x length") 55 if len(y) != self._nXY: 56 raise Exception("wrong y length") 57 58 self._n += 1 59 f = (self._n - 1) / self._n 60 61 mfX = (x - self._imX.getMean()) * f 62 mfY = y - self._imY.getMean() 63 64 self._cov += np.tensordot(mfX, mfY, axes=0) 65 66 self._imX.update(x) 67 self._imY.update(y) 68 69 def add(self, x: IncrementalCovarianceCorrelation): 70 """Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in 71 parallelized computations, where different nodes compute co-variances over different 72 ranges of rows""" 73 n = self._n + x._n 74 f = (self._n * x._n ** 2 + x._n * self._n ** 2) / (n ** 2) 75 76 deltaX = self._imX.getMean() - x._imX.getMean() 77 deltaX = deltaX.reshape(deltaX.size, 1) * f 78 79 deltaY = self._imY.getMean() - x._imY.getMean() 80 deltaY = deltaY.reshape(1, deltaY.size) 81 82 self._cov += x._cov + deltaX * deltaY 83 self._n = n 84 self._imX.add(x._imX) 85 self._imY.add(x._imY) 86 87 def getCovariance(self): 88 """Returns the scaled co-variance matrix with 1 degree of freedom""" 89 return 1 / (self._n - 1) * self._cov 90 91 def getCorrelation(self): 92 """Returns Pearson's correlation matrix""" 93 sX = 1 / np.sqrt(self._imX.getVariance()) 94 sX = sX.reshape(sX.size, 1) 95 sY = 1 / np.sqrt(self._imY.getVariance()) 96 sY = sY.reshape(1, sY.size) 97 return 1 / (self._n - 1) * self._cov * sX * sY 98 99 def getN(self): 100 """Number of observations""" 101 return self._n
7class IncrementalCovarianceCorrelation: 8 """Incrementally computes vectorized covariance and correlation. 9 10 When feeding every row of an MxN matrix A and every row of an MxO matrix B, this code 11 updates a co-variance NxO matrix C where every cell C[r,c] is the covariance between 12 A[:,r] and B[:,c]. 13 14 This code is useful when the matrixes A and B cannot be not present 15 in memory at one given time. 16 17 ``` 18 nX = 100 19 nY1 = 30 20 nY2 = 50 21 22 m1 = np.random.randn(nX, nY1) 23 m2 = np.random.randn(nX, nY2) 24 25 ic = IncrementalCovarianceCorrelation(nY1, nY2) 26 27 for row in range(nX): 28 ic.update(m1[row,:], m2[row,:]) 29 30 reference_covariance = (1 / (nX - 1)) * np.matmul((m1 - np.getMean()(m1, axis=0)).T, (m2 - np.getMean()(m2, axis=0))) 31 reference_m1_stddev = np.std(m1, axis=0, ddof=1) 32 reference_m2_stddev = np.std(m2, axis=0, ddof=1) 33 reference_1_over_m1_stddev = (1 / numpy_m1_stddev).reshape(numpy_m1_stddev.size, 1) 34 reference_1_over_m2_stddev = (1 / numpy_m2_stddev).reshape(1,numpy_m2_stddev.size) 35 reference_correlation = numpy_covariance * numpy_1_over_m1_stddev * numpy_1_over_m2_stddev 36 37 assert (np.allclose(ic.getCovariance(), reference_covariance)) 38 assert (np.allclose(ic.getCorrelation(), reference_correlation)) 39 40 ``` 41 """ 42 43 def __init__(self, nX, nY): 44 """Initialize with the #columns for matrices A and B""" 45 self._nX = nX 46 self._nXY = nY 47 self._imX = IncrementalMeanVariance(nX) 48 self._imY = IncrementalMeanVariance(nY) 49 self._cov = np.zeros((nX, nY), dtype=np.float64) 50 self._n = 0 51 52 def update(self, x, y): 53 """Updates the covariance matrix with a single row of matrix A and a single row of matrix B""" 54 if len(x) != self._nX: 55 raise Exception("wrong x length") 56 if len(y) != self._nXY: 57 raise Exception("wrong y length") 58 59 self._n += 1 60 f = (self._n - 1) / self._n 61 62 mfX = (x - self._imX.getMean()) * f 63 mfY = y - self._imY.getMean() 64 65 self._cov += np.tensordot(mfX, mfY, axes=0) 66 67 self._imX.update(x) 68 self._imY.update(y) 69 70 def add(self, x: IncrementalCovarianceCorrelation): 71 """Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in 72 parallelized computations, where different nodes compute co-variances over different 73 ranges of rows""" 74 n = self._n + x._n 75 f = (self._n * x._n ** 2 + x._n * self._n ** 2) / (n ** 2) 76 77 deltaX = self._imX.getMean() - x._imX.getMean() 78 deltaX = deltaX.reshape(deltaX.size, 1) * f 79 80 deltaY = self._imY.getMean() - x._imY.getMean() 81 deltaY = deltaY.reshape(1, deltaY.size) 82 83 self._cov += x._cov + deltaX * deltaY 84 self._n = n 85 self._imX.add(x._imX) 86 self._imY.add(x._imY) 87 88 def getCovariance(self): 89 """Returns the scaled co-variance matrix with 1 degree of freedom""" 90 return 1 / (self._n - 1) * self._cov 91 92 def getCorrelation(self): 93 """Returns Pearson's correlation matrix""" 94 sX = 1 / np.sqrt(self._imX.getVariance()) 95 sX = sX.reshape(sX.size, 1) 96 sY = 1 / np.sqrt(self._imY.getVariance()) 97 sY = sY.reshape(1, sY.size) 98 return 1 / (self._n - 1) * self._cov * sX * sY 99 100 def getN(self): 101 """Number of observations""" 102 return self._n
Incrementally computes vectorized covariance and correlation.
When feeding every row of an MxN matrix A and every row of an MxO matrix B, this code updates a co-variance NxO matrix C where every cell C[r,c] is the covariance between A[:,r] and B[:,c].
This code is useful when the matrixes A and B cannot be not present in memory at one given time.
nX = 100
nY1 = 30
nY2 = 50
m1 = np.random.randn(nX, nY1)
m2 = np.random.randn(nX, nY2)
ic = IncrementalCovarianceCorrelation(nY1, nY2)
for row in range(nX):
ic.update(m1[row,:], m2[row,:])
reference_covariance = (1 / (nX - 1)) * np.matmul((m1 - np.getMean()(m1, axis=0)).T, (m2 - np.getMean()(m2, axis=0)))
reference_m1_stddev = np.std(m1, axis=0, ddof=1)
reference_m2_stddev = np.std(m2, axis=0, ddof=1)
reference_1_over_m1_stddev = (1 / numpy_m1_stddev).reshape(numpy_m1_stddev.size, 1)
reference_1_over_m2_stddev = (1 / numpy_m2_stddev).reshape(1,numpy_m2_stddev.size)
reference_correlation = numpy_covariance * numpy_1_over_m1_stddev * numpy_1_over_m2_stddev
assert (np.allclose(ic.getCovariance(), reference_covariance))
assert (np.allclose(ic.getCorrelation(), reference_correlation))
43 def __init__(self, nX, nY): 44 """Initialize with the #columns for matrices A and B""" 45 self._nX = nX 46 self._nXY = nY 47 self._imX = IncrementalMeanVariance(nX) 48 self._imY = IncrementalMeanVariance(nY) 49 self._cov = np.zeros((nX, nY), dtype=np.float64) 50 self._n = 0
Initialize with the #columns for matrices A and B
52 def update(self, x, y): 53 """Updates the covariance matrix with a single row of matrix A and a single row of matrix B""" 54 if len(x) != self._nX: 55 raise Exception("wrong x length") 56 if len(y) != self._nXY: 57 raise Exception("wrong y length") 58 59 self._n += 1 60 f = (self._n - 1) / self._n 61 62 mfX = (x - self._imX.getMean()) * f 63 mfY = y - self._imY.getMean() 64 65 self._cov += np.tensordot(mfX, mfY, axes=0) 66 67 self._imX.update(x) 68 self._imY.update(y)
Updates the covariance matrix with a single row of matrix A and a single row of matrix B
70 def add(self, x: IncrementalCovarianceCorrelation): 71 """Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in 72 parallelized computations, where different nodes compute co-variances over different 73 ranges of rows""" 74 n = self._n + x._n 75 f = (self._n * x._n ** 2 + x._n * self._n ** 2) / (n ** 2) 76 77 deltaX = self._imX.getMean() - x._imX.getMean() 78 deltaX = deltaX.reshape(deltaX.size, 1) * f 79 80 deltaY = self._imY.getMean() - x._imY.getMean() 81 deltaY = deltaY.reshape(1, deltaY.size) 82 83 self._cov += x._cov + deltaX * deltaY 84 self._n = n 85 self._imX.add(x._imX) 86 self._imY.add(x._imY)
Merges another object of IncrementalCovarianceCorrelation into this co-variance matrix. This is useful in parallelized computations, where different nodes compute co-variances over different ranges of rows
88 def getCovariance(self): 89 """Returns the scaled co-variance matrix with 1 degree of freedom""" 90 return 1 / (self._n - 1) * self._cov
Returns the scaled co-variance matrix with 1 degree of freedom
92 def getCorrelation(self): 93 """Returns Pearson's correlation matrix""" 94 sX = 1 / np.sqrt(self._imX.getVariance()) 95 sX = sX.reshape(sX.size, 1) 96 sY = 1 / np.sqrt(self._imY.getVariance()) 97 sY = sY.reshape(1, sY.size) 98 return 1 / (self._n - 1) * self._cov * sX * sY
Returns Pearson's correlation matrix