Files
docling-ibm-models/docling_ibm_models/tableformer/utils/variance.py
T
Nikos Livathinos 7445296e6a Initial commit
Signed-off-by: Nikos Livathinos <nli@zurich.ibm.com>
2024-07-15 11:23:59 +02:00

176 lines
4.6 KiB
Python

#
# Copyright IBM Corp. 2024 - 2024
# SPDX-License-Identifier: MIT
#
import logging
import numpy as np
import docling_ibm_models.tableformer.settings as s
LOG_LEVEL = logging.INFO
class MyWelford:
r"""
Running computation of the sample mean and sample variance using Welford's algorithm
"""
def __init__(self):
self._i = 0 # Running index
self._m = 0 # Running mean
self._s = 0 # (n - 1) * variance
def reset(self):
r"""
Reset the object
"""
self._i = 0
self._m = 0
self._s = 0
def add(self, xi):
r"""
Invoke add each time a new sample arrives
Inputs:
xi: The next sample of data
"""
self._i += 1
old_m = self._m
self._m = self._m + (xi - self._m) / self._i
self._s = self._s + (xi - self._m) * (xi - old_m)
def results(self):
r"""
Get the computed mean, variance and standard deviation up to now
Outputs:
m: Sample mean
v: Sample variance
std: Sample standard deviation
"""
if self._i <= 1:
return None, None, None
# v = self._s / (self._i - 1) # Sample variance
v = self._s / (self._i) # Population variance
std = np.sqrt(v)
return self._m, v, std
class MyWelfordImg(MyWelford):
r"""
Welford algorithm to calculate running mean and sample variance for images
"""
def __init__(self):
super(MyWelfordImg, self).__init__()
def add(self, img):
r"""
Input:
img: An image numpy array (channel, width, height). The only requirement is to have the
channels as the first dimension and have 3 dimensions in total
"""
channels = img.shape[0]
flat_dim = img.shape[1] * img.shape[2]
img_r = img.reshape(channels, flat_dim)
for i in range(flat_dim):
super(MyWelfordImg, self).add(img_r[:, i])
class ChanVarianceImg:
r"""
Chan's algorithm to compute a running variance with support of sub-samples
In this implementation each sub-sample is an images
Math for the original paper:
https://github.ibm.com/nli/variance_formulae
"""
def __init__(self):
r""" """
self._first = True
# Size of the calculated dataset
self._n = 0
# Sum of the samples for the 3 image channels
self._t = 0
# Sum of the square differences of the deviations of the samples from the mean
self._s = 0
def add(self, img):
r"""
Add the provided image to the computation of the dataset statistics
Input:
img: An image numpy array (channel, width, height). The only requirement is to have the
channels as the first dimension and have 3 dimensions in total
"""
ch = img.shape[0]
n = img.shape[1] * img.shape[2]
img = img.reshape(ch, n)
img_t = img.sum(axis=1)
img_t_v = img_t.reshape(ch, 1)
diff = (img - (img_t_v / n)) ** 2
img_s = diff.sum(axis=1)
if not self._first:
c = (self._n / (n * (self._n + n))) * (
((n / self._n) * self._t - img_t) ** 2
)
self._s += img_s + c
self._t += img_t
else:
self._s = img_s
self._t = img_t
self._first = False
self._n += n
def results(self):
r"""
Get the computed statistics
Output:
mean: Mean for the complete dataset
var: Population variance for the complete dataset
std: Population standard deviation for the complete dataset
"""
mean = list(self._t / self._n)
var = list(self._s / self._n) # Population variance
std = list(np.sqrt(var))
return mean, var, std
def reset(self):
r"""
Reset the object to start over again
"""
self._n = 0
self._t = 0
self._s = 0
self._first = True
if __name__ == "__main__":
logger = s.get_custom_logger("variance", LOG_LEVEL)
n = 50000
channels = 3
width = 448
height = 448
my = ChanVarianceImg()
# Generate random images
for i in range(n):
logger.info(i)
img = 255 * np.random.rand(channels, width, height)
my.add(img)
# Calculate the statistics
m, v, std = my.results()
assert m.shape == (3,), "Wrong mean dimension"
assert v.shape == (3,), "Wrong variance dimension"
assert std.shape == (3,), "Wrong std dimension"