incrementalstats.mean_var

 1from __future__ import annotations
 2import numpy as np
 3
 4class IncrementalMeanVariance:
 5    """Incrementally computes vectorized mean and variance.
 6
 7    When feeding every row of an MxN matrix, this code
 8    computes the mean/variance over axis 0 of this matrix, as
 9    demonstrated in the example below.
10
11    This code is useful when the matrix cannot be not present
12    in memory at one given time.
13
14    ```
15    import numpy as np
16    from risca.statistics import IncrementalMeanVariance
17
18    nX = 100
19    nY = 10
20    m = np.random.randn(nX, nY)
21
22    im = IncrementalMeanVariance(nY)
23    for row in range(nX):
24        im.update(m[row,:])
25
26    reference_mean = m._mean(axis=0)
27    reference_variance = m._var(axis=0, ddof=1)
28    assert np.allclose(im.getMean(), reference_mean)
29    assert np.allclose(im.getVariance(), reference_variance)
30    ```
31
32    """
33
34    def __init__(self, ncolumns):
35        """Initialize with the #columns of the hypothetical matrix M over
36        which we will compute the mean / variance"""
37        nX = ncolumns
38        self._nX = nX
39        self._mean = np.zeros(nX, dtype=np.float64)
40        self._var = np.zeros(nX, dtype=np.float64)
41        self._n = 0
42
43    def add(self, other: IncrementalMeanVariance):
44        """Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in
45        parallelized computations, where different nodes compute mean/variance over different
46        ranges of rows"""
47        x = other
48        n = self._n + x._n
49        delta = x._mean - self._mean
50        self._mean += x._n * (delta / n)
51        self._var += x._var + self._n * x._n * delta ** 2 / n
52        self._n = n
53
54    def update(self, row):
55        x = row
56        """Updates the mean/variance with a single row. """
57        if len(x) != self._nX:
58            raise Exception("wrong length")
59
60        self._n += 1
61        y1 = x - self._mean
62        self._mean += y1 / self._n
63        y2 = x - self._mean
64        self._var += y1 * y2
65
66    def getMean(self):
67        """Returns the current mean"""
68        return self._mean.copy()
69
70    def getVariance(self):
71        """Returns the current variance"""
72        if self._n < 2:
73            raise Exception("not enough data")
74        return 1/(self._n - 1) * self._var
75
76    def getN(self):
77        """Number of observations"""
78        return self._n
class IncrementalMeanVariance:
 5class IncrementalMeanVariance:
 6    """Incrementally computes vectorized mean and variance.
 7
 8    When feeding every row of an MxN matrix, this code
 9    computes the mean/variance over axis 0 of this matrix, as
10    demonstrated in the example below.
11
12    This code is useful when the matrix cannot be not present
13    in memory at one given time.
14
15    ```
16    import numpy as np
17    from risca.statistics import IncrementalMeanVariance
18
19    nX = 100
20    nY = 10
21    m = np.random.randn(nX, nY)
22
23    im = IncrementalMeanVariance(nY)
24    for row in range(nX):
25        im.update(m[row,:])
26
27    reference_mean = m._mean(axis=0)
28    reference_variance = m._var(axis=0, ddof=1)
29    assert np.allclose(im.getMean(), reference_mean)
30    assert np.allclose(im.getVariance(), reference_variance)
31    ```
32
33    """
34
35    def __init__(self, ncolumns):
36        """Initialize with the #columns of the hypothetical matrix M over
37        which we will compute the mean / variance"""
38        nX = ncolumns
39        self._nX = nX
40        self._mean = np.zeros(nX, dtype=np.float64)
41        self._var = np.zeros(nX, dtype=np.float64)
42        self._n = 0
43
44    def add(self, other: IncrementalMeanVariance):
45        """Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in
46        parallelized computations, where different nodes compute mean/variance over different
47        ranges of rows"""
48        x = other
49        n = self._n + x._n
50        delta = x._mean - self._mean
51        self._mean += x._n * (delta / n)
52        self._var += x._var + self._n * x._n * delta ** 2 / n
53        self._n = n
54
55    def update(self, row):
56        x = row
57        """Updates the mean/variance with a single row. """
58        if len(x) != self._nX:
59            raise Exception("wrong length")
60
61        self._n += 1
62        y1 = x - self._mean
63        self._mean += y1 / self._n
64        y2 = x - self._mean
65        self._var += y1 * y2
66
67    def getMean(self):
68        """Returns the current mean"""
69        return self._mean.copy()
70
71    def getVariance(self):
72        """Returns the current variance"""
73        if self._n < 2:
74            raise Exception("not enough data")
75        return 1/(self._n - 1) * self._var
76
77    def getN(self):
78        """Number of observations"""
79        return self._n

Incrementally computes vectorized mean and variance.

When feeding every row of an MxN matrix, this code computes the mean/variance over axis 0 of this matrix, as demonstrated in the example below.

This code is useful when the matrix cannot be not present in memory at one given time.

import numpy as np
from risca.statistics import IncrementalMeanVariance

nX = 100
nY = 10
m = np.random.randn(nX, nY)

im = IncrementalMeanVariance(nY)
for row in range(nX):
    im.update(m[row,:])

reference_mean = m._mean(axis=0)
reference_variance = m._var(axis=0, ddof=1)
assert np.allclose(im.getMean(), reference_mean)
assert np.allclose(im.getVariance(), reference_variance)
IncrementalMeanVariance(ncolumns)
35    def __init__(self, ncolumns):
36        """Initialize with the #columns of the hypothetical matrix M over
37        which we will compute the mean / variance"""
38        nX = ncolumns
39        self._nX = nX
40        self._mean = np.zeros(nX, dtype=np.float64)
41        self._var = np.zeros(nX, dtype=np.float64)
42        self._n = 0

Initialize with the #columns of the hypothetical matrix M over which we will compute the mean / variance

def add(self, other: IncrementalMeanVariance):
44    def add(self, other: IncrementalMeanVariance):
45        """Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in
46        parallelized computations, where different nodes compute mean/variance over different
47        ranges of rows"""
48        x = other
49        n = self._n + x._n
50        delta = x._mean - self._mean
51        self._mean += x._n * (delta / n)
52        self._var += x._var + self._n * x._n * delta ** 2 / n
53        self._n = n

Merges another object of IncrementalMeanVariance into this mean/variance. This is useful in parallelized computations, where different nodes compute mean/variance over different ranges of rows

def update(self, row):
55    def update(self, row):
56        x = row
57        """Updates the mean/variance with a single row. """
58        if len(x) != self._nX:
59            raise Exception("wrong length")
60
61        self._n += 1
62        y1 = x - self._mean
63        self._mean += y1 / self._n
64        y2 = x - self._mean
65        self._var += y1 * y2
def getMean(self):
67    def getMean(self):
68        """Returns the current mean"""
69        return self._mean.copy()

Returns the current mean

def getVariance(self):
71    def getVariance(self):
72        """Returns the current variance"""
73        if self._n < 2:
74            raise Exception("not enough data")
75        return 1/(self._n - 1) * self._var

Returns the current variance

def getN(self):
77    def getN(self):
78        """Number of observations"""
79        return self._n

Number of observations