incrementalstats.mean_var
1from __future__ import annotations 2import numpy as np 3 4class IncrementalMeanVariance: 5 """Incrementally computes vectorized mean and variance. 6 7 When feeding every row of an MxN matrix, this code 8 computes the mean/variance over axis 0 of this matrix, as 9 demonstrated in the example below. 10 11 This code is useful when the matrix cannot be not present 12 in memory at one given time. 13 14 ``` 15 import numpy as np 16 from risca.statistics import IncrementalMeanVariance 17 18 nX = 100 19 nY = 10 20 m = np.random.randn(nX, nY) 21 22 im = IncrementalMeanVariance(nY) 23 for row in range(nX): 24 im.update(m[row,:]) 25 26 reference_mean = m._mean(axis=0) 27 reference_variance = m._var(axis=0, ddof=1) 28 assert np.allclose(im.getMean(), reference_mean) 29 assert np.allclose(im.getVariance(), reference_variance) 30 ``` 31 32 """ 33 34 def __init__(self, ncolumns): 35 """Initialize with the #columns of the hypothetical matrix M over 36 which we will compute the mean / variance""" 37 nX = ncolumns 38 self._nX = nX 39 self._mean = np.zeros(nX, dtype=np.float64) 40 self._var = np.zeros(nX, dtype=np.float64) 41 self._n = 0 42 43 def add(self, other: IncrementalMeanVariance): 44 """Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in 45 parallelized computations, where different nodes compute mean/variance over different 46 ranges of rows""" 47 x = other 48 n = self._n + x._n 49 delta = x._mean - self._mean 50 self._mean += x._n * (delta / n) 51 self._var += x._var + self._n * x._n * delta ** 2 / n 52 self._n = n 53 54 def update(self, row): 55 x = row 56 """Updates the mean/variance with a single row. """ 57 if len(x) != self._nX: 58 raise Exception("wrong length") 59 60 self._n += 1 61 y1 = x - self._mean 62 self._mean += y1 / self._n 63 y2 = x - self._mean 64 self._var += y1 * y2 65 66 def getMean(self): 67 """Returns the current mean""" 68 return self._mean.copy() 69 70 def getVariance(self): 71 """Returns the current variance""" 72 if self._n < 2: 73 raise Exception("not enough data") 74 return 1/(self._n - 1) * self._var 75 76 def getN(self): 77 """Number of observations""" 78 return self._n
class
IncrementalMeanVariance:
5class IncrementalMeanVariance: 6 """Incrementally computes vectorized mean and variance. 7 8 When feeding every row of an MxN matrix, this code 9 computes the mean/variance over axis 0 of this matrix, as 10 demonstrated in the example below. 11 12 This code is useful when the matrix cannot be not present 13 in memory at one given time. 14 15 ``` 16 import numpy as np 17 from risca.statistics import IncrementalMeanVariance 18 19 nX = 100 20 nY = 10 21 m = np.random.randn(nX, nY) 22 23 im = IncrementalMeanVariance(nY) 24 for row in range(nX): 25 im.update(m[row,:]) 26 27 reference_mean = m._mean(axis=0) 28 reference_variance = m._var(axis=0, ddof=1) 29 assert np.allclose(im.getMean(), reference_mean) 30 assert np.allclose(im.getVariance(), reference_variance) 31 ``` 32 33 """ 34 35 def __init__(self, ncolumns): 36 """Initialize with the #columns of the hypothetical matrix M over 37 which we will compute the mean / variance""" 38 nX = ncolumns 39 self._nX = nX 40 self._mean = np.zeros(nX, dtype=np.float64) 41 self._var = np.zeros(nX, dtype=np.float64) 42 self._n = 0 43 44 def add(self, other: IncrementalMeanVariance): 45 """Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in 46 parallelized computations, where different nodes compute mean/variance over different 47 ranges of rows""" 48 x = other 49 n = self._n + x._n 50 delta = x._mean - self._mean 51 self._mean += x._n * (delta / n) 52 self._var += x._var + self._n * x._n * delta ** 2 / n 53 self._n = n 54 55 def update(self, row): 56 x = row 57 """Updates the mean/variance with a single row. """ 58 if len(x) != self._nX: 59 raise Exception("wrong length") 60 61 self._n += 1 62 y1 = x - self._mean 63 self._mean += y1 / self._n 64 y2 = x - self._mean 65 self._var += y1 * y2 66 67 def getMean(self): 68 """Returns the current mean""" 69 return self._mean.copy() 70 71 def getVariance(self): 72 """Returns the current variance""" 73 if self._n < 2: 74 raise Exception("not enough data") 75 return 1/(self._n - 1) * self._var 76 77 def getN(self): 78 """Number of observations""" 79 return self._n
Incrementally computes vectorized mean and variance.
When feeding every row of an MxN matrix, this code computes the mean/variance over axis 0 of this matrix, as demonstrated in the example below.
This code is useful when the matrix cannot be not present in memory at one given time.
import numpy as np
from risca.statistics import IncrementalMeanVariance
nX = 100
nY = 10
m = np.random.randn(nX, nY)
im = IncrementalMeanVariance(nY)
for row in range(nX):
im.update(m[row,:])
reference_mean = m._mean(axis=0)
reference_variance = m._var(axis=0, ddof=1)
assert np.allclose(im.getMean(), reference_mean)
assert np.allclose(im.getVariance(), reference_variance)
IncrementalMeanVariance(ncolumns)
35 def __init__(self, ncolumns): 36 """Initialize with the #columns of the hypothetical matrix M over 37 which we will compute the mean / variance""" 38 nX = ncolumns 39 self._nX = nX 40 self._mean = np.zeros(nX, dtype=np.float64) 41 self._var = np.zeros(nX, dtype=np.float64) 42 self._n = 0
Initialize with the #columns of the hypothetical matrix M over which we will compute the mean / variance
44 def add(self, other: IncrementalMeanVariance): 45 """Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in 46 parallelized computations, where different nodes compute mean/variance over different 47 ranges of rows""" 48 x = other 49 n = self._n + x._n 50 delta = x._mean - self._mean 51 self._mean += x._n * (delta / n) 52 self._var += x._var + self._n * x._n * delta ** 2 / n 53 self._n = n
Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in parallelized computations, where different nodes compute mean/variance over different ranges of rows