Coverage for src/serums/error_processor.py: 0%
30 statements
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-11 16:43 +0000
« prev ^ index » next coverage.py v7.6.4, created at 2024-11-11 16:43 +0000
1"""For preprocessing errors."""
2import numpy as np
3from scipy.stats import ks_2samp, cramervonmises_2samp, anderson_ksamp
4import serums.models
5import pandas as pd
8class BinnedError:
9 """Class to represent binned error distribution.
10 This class must be given truth data, measured data, dependency arrays, an I/O mapping function, and preferences.
12 Attributes
13 ----------
14 truth : array_like
15 Truth data.
16 measured : array_like
17 Measured data.
18 dependencies : array_like
19 Dependency arrays.
20 io_map : function
21 I/O mapping function.
22 preferences : dict
23 Preferences.
24 """
26 # Initialize binned error distribution
27 def __init__(self, truth, measured, dependencies, io_map, preferences):
28 self.truth = truth
29 self.measured = measured
30 self.dependencies = dependencies
31 self.io_map = io_map
32 self.preferences = preferences
34 # Function to normalize error distribution by subtracting the truth from the measured data
35 def normalize(self):
36 self.error = self.measured - self.truth
37 return self.error
39 # Function to remove obvious outliers from error distribution by removing errors more than 10 standard deviations from the mean
40 def remove_outliers(self):
41 self.error = self.error[np.abs(self.error) < 10 * np.std(self.error)]
42 return self.error
44 # Function to recursively bin error distribution by checking for autocorrelation in the error distribution with the dependency array, and bisecting the error distribution if there is autocorrelation
45 def autobin(self, error=None):
46 # Check for autocorrelation in error distribution
47 if not error:
48 error = self.error
49 if np.any(np.corrcoef(error, self.dependencies) > 0.1):
50 # Bisect error distribution, and recursively call autobin on each bin
51 bins = self.bisect(error)
52 for bin in bins:
53 self.autobin(bin)
54 else:
55 # Store error distribution as a list of bins
56 self.bins = [error]
57 return self.bins
59 # Function to bisect error distribution into 2 bins at its midpoint and store the result as a list of bins
60 def bisect(self, error):
61 midpoint = int(len(error) / 2)
62 bins = [error[:midpoint], error[midpoint:]]
63 return bins