Coverage for src/serums/distribution_overbounder.py: 51%

345 statements  

« prev     ^ index     » next       coverage.py v7.6.4, created at 2024-11-11 16:43 +0000

1"""For SERUMS Overbound Estimation.""" 

2 

3from __future__ import annotations 

4 

5import numpy as np 

6from scipy.optimize import minimize, basinhopping 

7import serums.models as smodels 

8import serums.distribution_estimator as de 

9from abc import abstractmethod, ABC 

10from scipy.stats import norm, halfnorm, t 

11import serums.errors 

12import scipy.special as special 

13from typing import List, Callable 

14import math 

15 

16 

17def fusion( 

18 varList: List[smodels.BaseSingleModel | smodels.BaseMixtureModel], 

19 poly: Callable[ 

20 [List[smodels.BaseSingleModel | smodels.BaseMixtureModel]], np.ndarray 

21 ], 

22) -> np.ndarray: 

23 """Calculate emperical output distribution as the fusion of inputs through a function. 

24 

25 This passes the input distributions through the provided function and gives an emperical sampling 

26 from the resulting distribution. It can be used to calculate how the input distributions change through 

27 the provided transformation. It is recommended to use a polynomial for the function but any function will 

28 work as long as standard arithmetic operators are performed on the distribution objects before any other 

29 complex operations (due to the operator overloading within the distribution objects to provide samples). 

30 

31 Returns 

32 ------- 

33 np.ndarray 

34 Samples from the resulting distribution in an N x 1 array 

35 """ 

36 max_size = max([x.monte_carlo_size for x in varList]) 

37 for x in varList: 

38 x.monte_carlo_size = int(max_size) 

39 return poly(*varList) 

40 

41 

42class OverbounderBase(ABC): 

43 """Represents base class for any overbounder object.""" 

44 

45 def __init__(self): 

46 pass 

47 

48 def _erf_gauss(self, X, mean, sigma): 

49 phi = (X - mean) / (2**0.5) 

50 n_terms = 55 

51 gain = 2 / (np.pi**0.5) 

52 

53 terms = np.zeros(n_terms) 

54 n = 0 

55 

56 while n < n_terms: 

57 terms[n] = ( 

58 ((-1) ** n) 

59 * (1 / (2 * n + 1)) 

60 * (1 / math.factorial(n)) 

61 * (phi / sigma) ** (2 * n + 1) 

62 ) 

63 n = n + 1 

64 

65 return gain * sum(terms) 

66 

67 def _dsig_erf_gauss(self, X, mean, sigma): 

68 phi = (X - mean) / (2**0.5) 

69 n_terms = 55 

70 gain = 2 / (np.pi**0.5) 

71 

72 terms = np.zeros(n_terms) 

73 n = 0 

74 

75 while n < n_terms: 

76 terms[n] = ( 

77 ((-1) ** (n + 1)) 

78 * (1 / math.factorial(n)) 

79 * (phi ** (2 * n + 1)) 

80 * ((1 / sigma) ** (2 * n + 2)) 

81 ) 

82 n = n + 1 

83 

84 return gain * sum(terms) 

85 

86 def _g_sigma(self, mean, Xi, Yi, sigma): 

87 sub = 1 - 2 * Yi 

88 return sub + self._erf_gauss(Xi, mean, sigma) 

89 

90 def _g_prime_sigma(self, mean, Xi, Yi, sigma): 

91 return self._dsig_erf_gauss(Xi, mean, sigma) 

92 

93 def _h_sigma(self, Xi, Yi, sigma): 

94 return -Yi + self._erf_gauss(Xi, 0, sigma) 

95 

96 def _h_prime_sigma(self, Xi, Yi, sigma): 

97 return self._dsig_erf_gauss(Xi, 0, sigma) 

98 

99 def _find_sigma_g(self, mean, Xi, Yi): 

100 sigma_0 = 0.75 * abs(Xi - mean) 

101 sigma_iter = sigma_0 

102 i = 1 

103 

104 while abs(self._g_sigma(mean, Xi, Yi, sigma_iter)) > 1e-14: 

105 sigma_iter = sigma_iter - ( 

106 self._g_sigma(mean, Xi, Yi, sigma_iter) 

107 / self._g_prime_sigma(mean, Xi, Yi, sigma_iter) 

108 ) 

109 i = i + 1 

110 

111 return sigma_iter 

112 

113 def _find_sigma_h(self, Xi, Yi): 

114 sigma_0 = 0.75 * abs(Xi) 

115 sigma_iter = sigma_0 

116 i = 1 

117 

118 while abs(self._h_sigma(Xi, Yi, sigma_iter)) > 1e-14: 

119 sigma_iter = sigma_iter - ( 

120 self._h_sigma(Xi, Yi, sigma_iter) 

121 / self._h_prime_sigma(Xi, Yi, sigma_iter) 

122 ) 

123 i = i + 1 

124 return sigma_iter 

125 

126 def _get_pareto_scale(self, Xi, Yi, shape): 

127 return (shape * Xi) / (((1 - Yi) ** -shape) - 1) 

128 

129 def _gauss_1pt_pierce_cost(self, sigma, pt): 

130 error = np.abs(pt[1] - halfnorm.cdf(pt[0], loc=0, scale=sigma)) 

131 return error 

132 

133 def _gauss_2pt_pierce_cost(self, gauss_param_array, pts): 

134 vect = -pts[1] + 0.5 * ( 

135 1 

136 + special.erf( 

137 (pts[0] - gauss_param_array[0]) / (np.sqrt(2) * gauss_param_array[1]) 

138 ) 

139 ) 

140 

141 return np.sum(vect**2) 

142 

143 @abstractmethod 

144 def overbound(self, *args, **kwargs): 

145 """Produce the overbound of the type denoted by the child class name.""" 

146 raise NotImplementedError("not implemented") 

147 

148 

149class SymmetricGaussianOverbounder(OverbounderBase): 

150 """Represents a Symmetric Gaussian Overbounder object.""" 

151 

152 def __init__(self): 

153 """Initialize an object. 

154 

155 Parameters 

156 ---------- 

157 pierce_locator : float 

158 Determines the proportional factor on the sample standard deviation 

159 for the lower bound on the overbound enforcement. The default is 1 

160 and should not be changed unless for experimental purposes. 

161 

162 Returns 

163 ------- 

164 None. 

165 """ 

166 super().__init__() 

167 self.pierce_locator = 1 

168 

169 def overbound(self, data): 

170 """Produce Gaussian model object that overbounds input error data. 

171 

172 Parameters 

173 ---------- 

174 data : N numpy array 

175 Array containing sample of error data to be overbounded. 

176 

177 Returns 

178 ------- 

179 :class:`serums.models.Gaussian` 

180 Gaussian distribution object which overbounds the input data. 

181 """ 

182 n = data.size 

183 sample_sigma = np.std(data, ddof=1) 

184 threshold = self.pierce_locator * sample_sigma 

185 OB_mean = 0 

186 

187 ecdf_ords = np.zeros(n) 

188 for i in range(n): 

189 ecdf_ords[i] = (i + 1) / n 

190 

191 sub = np.absolute(data) 

192 abs_data = np.sort(sub) 

193 

194 confidence = 0.95 

195 alfa = 1 - confidence 

196 epsilon = np.sqrt(np.log(2 / alfa) / (2 * n)) 

197 DKW_low = np.subtract(ecdf_ords, epsilon) 

198 

199 sub = np.array(np.where(abs_data > threshold)) 

200 candidates = np.zeros(n) 

201 

202 for i in sub[0, 0:]: 

203 pt = np.array([[abs_data[i]], [DKW_low[i]]]) 

204 ans = minimize( 

205 self._gauss_1pt_pierce_cost, 

206 sample_sigma, 

207 args=(pt,), 

208 method="Powell", 

209 options={"xtol": 1e-14, "maxfev": 10000, "maxiter": 10000}, 

210 ) 

211 if ans.success is True: 

212 candidates[i] = ans.x[0] 

213 else: 

214 candidates[i] = 0 

215 

216 rational_candidates = candidates[~np.isnan(candidates)] 

217 OB_sigma = np.max(rational_candidates) 

218 

219 return smodels.Gaussian( 

220 mean=np.array([[OB_mean]]), covariance=np.array([[OB_sigma**2]]) 

221 ) 

222 

223 

224class SymmetricGPO(OverbounderBase): 

225 """Represents a Symmetric Gaussian-Pareto Overbounder object.""" 

226 

227 def __init__(self): 

228 """Initialize an object. 

229 

230 Parameters 

231 ---------- 

232 inside_pierce_locator : float 

233 Determines the proportional factor on the sample standard deviation 

234 for the lower bound on the overbound enforcement in the core 

235 region. The default is 1 and should not be changed unless for 

236 experimental purposes. 

237 ThresholdReductionFactor : int 

238 Dividing factor for reduction of the space over which the search 

239 is conducted for a threshold. Currently, this feature is not 

240 implemented so there is no purpose in altering it. 

241 

242 Returns 

243 ------- 

244 None. 

245 """ 

246 super().__init__() 

247 self.inside_pierce_locator = 1 

248 self.ThresholdReductionFactor = 1 

249 

250 def overbound(self, data): 

251 """Produce Symmetric Gaussian-Pareto model object that overbounds input error data. 

252 

253 Parameters 

254 ---------- 

255 data : N numpy array 

256 Array containing sample of error data to be overbounded. 

257 

258 Returns 

259 ------- 

260 :class:`serums.models.SymmetricGaussianPareto` 

261 Symmetric Gaussian-Pareto distribution object which overbounds the input data. 

262 """ 

263 n = data.size 

264 sigma = np.sqrt(np.var(data, ddof=1)) 

265 print("\nComputing Symmetric Gaussian-Pareto Overbound...") 

266 

267 pos = np.absolute(data) 

268 sorted_abs_data = np.sort(pos) 

269 

270 Nt_min = 250 

271 Nt_max = int(np.ceil(0.1 * n)) 

272 idx_u_min = n - Nt_max - 1 

273 idx_u_max = n - Nt_min - 1 

274 u_idxs = np.arange(idx_u_min, idx_u_max + 1) 

275 u_candidates = sorted_abs_data[u_idxs] 

276 Nu = u_candidates.size 

277 shapes = np.zeros(Nu) 

278 MLE_used = np.zeros(Nu, dtype=bool) 

279 

280 print("\nComputing Tail GPD Shape Parameter Estimates...\nProgress:") 

281 

282 resolution = 10 * (1 / Nu) 

283 for i in range(Nu): 

284 if i < Nu - 1: 

285 fraction = i / Nu 

286 checkpoint = np.floor(10 * fraction) 

287 diagnostic = (10 * fraction) - checkpoint 

288 if diagnostic < resolution: 

289 print("{:3d}%".format(int(checkpoint * 10))) 

290 else: 

291 print("100%") 

292 

293 try: 

294 shapes[i] = de.grimshaw_MLE( 

295 np.subtract( 

296 sorted_abs_data[u_idxs[i] + 1 :], 

297 sorted_abs_data[u_idxs[i]] - 1e-25, 

298 ) 

299 )[0] 

300 MLE_used[i] = True 

301 except serums.errors.DistributionEstimatorFailed: 

302 shapes[i] = 0 

303 

304 shape_max = max(shapes) 

305 idx_shape_max = np.where(shapes == shape_max)[0] 

306 if shape_max <= 0: 

307 raise serums.errors.OverboundingMethodFailed( 

308 "MLE indicates exponential or finite tail. Use the Symmetric Gaussian Overbounder." 

309 ) 

310 

311 shape_max_covar = de.grimshaw_MLE( 

312 np.subtract( 

313 sorted_abs_data[u_idxs[idx_shape_max[0]] + 1 :], 

314 sorted_abs_data[u_idxs[idx_shape_max[0]]] - 1e-25, 

315 ) 

316 )[2] 

317 

318 tail_size = (n - 1) - u_idxs[idx_shape_max[0]] 

319 tail_shape = shape_max + t.ppf(0.975, tail_size) * np.sqrt( 

320 shape_max_covar[1, 1] 

321 ) 

322 u_idx = u_idxs[idx_shape_max[0]] 

323 u = sorted_abs_data[u_idx] 

324 

325 print("\nComputing Gaussian Overbound for Core Region...") 

326 

327 ecdf_ords = np.zeros(n) 

328 for i in range(n): 

329 ecdf_ords[i] = (i + 1) / n 

330 

331 confidence = 0.95 

332 alfa = 1 - confidence 

333 epsilon = np.sqrt(np.log(2 / alfa) / (2 * n)) 

334 DKW_low = np.subtract(ecdf_ords, epsilon) 

335 

336 core_min = self.inside_pierce_locator * np.sqrt( 

337 np.var(sorted_abs_data[np.where(sorted_abs_data < u)[0]], ddof=1) 

338 ) 

339 sub = np.where(sorted_abs_data > core_min)[0] 

340 start = sub[0] 

341 sub = np.where(sorted_abs_data < u)[0] 

342 end = sub[-1] 

343 candidates = np.zeros(n) 

344 subs = np.arange(start, end + 2, 1) 

345 

346 for i in subs: 

347 candidates[i] = self._find_sigma_h(sorted_abs_data[i], DKW_low[i]) 

348 

349 real_candidates = candidates[~np.isnan(candidates)] 

350 core_sigma = np.max(real_candidates) 

351 

352 print("\nComputing Tail GPD Scale Parameter...") 

353 

354 tail_idxs = np.arange(u_idx + 1, n, 1) 

355 tail_pts = sorted_abs_data[tail_idxs] 

356 shifted_tail_pts = np.zeros(n) 

357 shifted_tail_pts[tail_idxs] = np.subtract( 

358 sorted_abs_data[tail_idxs], 

359 sorted_abs_data[u_idx], 

360 ) 

361 scales = np.zeros(n) 

362 

363 Fu = self._erf_gauss(u, 0, core_sigma) 

364 tail_DKW_ords_CEDF_domain = np.zeros(n) 

365 tail_DKW_ords_CEDF_domain[tail_idxs] = np.subtract(DKW_low[tail_idxs], Fu) / ( 

366 1 - Fu 

367 ) 

368 

369 for i in tail_idxs: 

370 scales[i] = self._get_pareto_scale( 

371 shifted_tail_pts[i], tail_DKW_ords_CEDF_domain[i], tail_shape 

372 ) 

373 

374 rational_scales = scales[~np.isnan(scales)] 

375 tail_scale = np.max(rational_scales) 

376 

377 print("\nDone.") 

378 return smodels.SymmetricGaussianPareto( 

379 scale=core_sigma, 

380 threshold=u, 

381 tail_shape=tail_shape, 

382 tail_scale=tail_scale, 

383 ) 

384 

385 

386class PairedGaussianOverbounder(OverbounderBase): 

387 """Represents a Paired Gaussian Overbounder object.""" 

388 

389 def __init__(self): 

390 """Initialize an object. 

391 

392 Parameters 

393 ---------- 

394 None. 

395 

396 Returns 

397 ------- 

398 None. 

399 """ 

400 super().__init__() 

401 

402 def _cost_left(self, params, x_check, y_check): 

403 y_curve = norm.cdf(x_check, loc=params[0], scale=params[1]) 

404 cost_vect = y_curve - y_check 

405 pos_indices = cost_vect >= 0 

406 cost = np.sum(cost_vect[pos_indices]) 

407 cost += np.sum(-1000 * y_check.size * cost_vect[np.logical_not(pos_indices)]) 

408 return cost 

409 

410 def _cost_right(self, params, x_check, y_check): 

411 y_curve = norm.cdf(x_check, loc=params[0], scale=params[1]) 

412 cost_vect = y_check - y_curve 

413 pos_indices = cost_vect >= 0 

414 cost = np.sum(cost_vect[pos_indices]) 

415 cost += np.sum(-1000 * y_check.size * cost_vect[np.logical_not(pos_indices)]) 

416 return cost 

417 

418 def overbound(self, data, debug_plots=False): 

419 """Produce Paired Gaussian model object that overbounds input error data. 

420 

421 Parameters 

422 ---------- 

423 data : N numpy array 

424 Array containing sample of error data to be overbounded. 

425 

426 Returns 

427 ------- 

428 :class:`serums.models.PairedGaussian` 

429 Paired Gaussian distribution object which overbounds the input data. 

430 """ 

431 n = data.size 

432 sorted_data = np.sort(data) 

433 init_mean = np.mean(data) 

434 init_sigma = np.std(data, ddof=1) 

435 init_guess = np.array([init_mean, init_sigma]) 

436 

437 ecdf_ords = np.zeros(n) 

438 for i in range(n): 

439 ecdf_ords[i] = (i + 1) / n 

440 

441 confidence = 0.95 

442 alfa = 1 - confidence 

443 epsilon = np.sqrt(np.log(2 / alfa) / (2 * n)) 

444 DKW_low = np.subtract(ecdf_ords, epsilon) 

445 DKW_high = np.add(ecdf_ords, epsilon) 

446 

447 left_usable_idxs = np.asarray(DKW_high < (1 - epsilon)).nonzero()[0] 

448 x_check_left = sorted_data[left_usable_idxs] 

449 y_check_left = DKW_high[left_usable_idxs] 

450 

451 left_result = basinhopping( 

452 self._cost_left, 

453 init_guess, 

454 niter=200, 

455 stepsize=np.array([init_mean / 2, init_sigma / 2]), 

456 minimizer_kwargs={ 

457 "args": (x_check_left, y_check_left), 

458 "method": "Powell", 

459 "options": { 

460 "xtol": 1e-14, 

461 "ftol": 1e-6, 

462 "maxfev": 10000, 

463 "maxiter": 10000, 

464 }, 

465 }, 

466 ) 

467 

468 right_usable_idxs = np.asarray(DKW_low > epsilon).nonzero()[0] 

469 x_check_right = sorted_data[right_usable_idxs] 

470 y_check_right = DKW_low[right_usable_idxs] 

471 

472 right_result = basinhopping( 

473 self._cost_right, 

474 init_guess, 

475 niter=200, 

476 stepsize=np.array([init_mean / 2, init_sigma / 2]), 

477 minimizer_kwargs={ 

478 "args": (x_check_right, y_check_right), 

479 "method": "Powell", 

480 "options": { 

481 "xtol": 1e-14, 

482 "ftol": 1e-6, 

483 "maxfev": 10000, 

484 "maxiter": 10000, 

485 }, 

486 }, 

487 ) 

488 

489 left_ob = smodels.Gaussian( 

490 mean=left_result.x[0], 

491 covariance=np.array([[left_result.x[1] ** 2]]), 

492 ) 

493 right_ob = smodels.Gaussian( 

494 mean=right_result.x[0], 

495 covariance=np.array([[right_result.x[1] ** 2]]), 

496 ) 

497 

498 return smodels.PairedGaussian(left_ob, right_ob) 

499 

500 

501class PairedGPO(OverbounderBase): 

502 """Represents a Paired Gaussian-Pareto Overbounder Object.""" 

503 

504 def __init__(self): 

505 """Initialize an object. 

506 

507 Parameters 

508 ---------- 

509 ThresholdReductionFactor : int 

510 Dividing factor for reduction of the space over which the search 

511 is conducted for a threshold. Currently, this feature is not 

512 implemented so there is no purpose in altering it. 

513 StrictPairedEnforcement : bool 

514 Logical property which determines how the paired overbounds are 

515 enforced. The default is False and should not be altered unless 

516 the overbound is to be used with an alternate fusion algorithm 

517 based on analytical methods rather than Monte-Carlo output 

518 simulation. 

519 

520 Returns 

521 ------- 

522 None. 

523 """ 

524 super().__init__() 

525 self.ThresholdReductionFactor = 1 

526 self.StrictPairedEnforcement = False 

527 

528 def _cost_left(self, params, x_check, y_check): 

529 y_curve = norm.cdf(x_check, loc=params[0], scale=params[1]) 

530 cost_vect = y_curve - y_check 

531 pos_indices = cost_vect >= 0 

532 cost = np.sum(cost_vect[pos_indices]) 

533 cost += np.sum(-10000 * y_check.size * cost_vect[np.logical_not(pos_indices)]) 

534 return cost 

535 

536 def _cost_right(self, params, x_check, y_check): 

537 y_curve = norm.cdf(x_check, loc=params[0], scale=params[1]) 

538 cost_vect = y_check - y_curve 

539 pos_indices = cost_vect >= 0 

540 cost = np.sum(cost_vect[pos_indices]) 

541 cost += np.sum(-10000 * y_check.size * cost_vect[np.logical_not(pos_indices)]) 

542 return cost 

543 

544 def overbound(self, data): 

545 """Produce Paired Gaussian-Pareto model object that overbounds input error data. 

546 

547 Parameters 

548 ---------- 

549 data : N numpy array 

550 Array containing sample of error data to be overbounded. 

551 

552 Returns 

553 ------- 

554 :class:`serums.models.PairedGaussianPareto` 

555 Paired Gaussian-Pareto distribution object which overbounds the input data. 

556 """ 

557 n = data.size 

558 data_sorted = np.sort(data) 

559 idx_10p = int(np.ceil(0.1 * n)) 

560 idxs_cand_u_left = np.arange(250, idx_10p, 1) 

561 

562 max_shape_left = 0 

563 idx_u_left = None 

564 max_shape_covar_left = None 

565 for i in idxs_cand_u_left: 

566 try: 

567 shape, scale, covar = de.grimshaw_MLE( 

568 np.add( 

569 np.abs( 

570 np.subtract( 

571 data_sorted[0:i], 

572 data_sorted[i], 

573 ) 

574 ), 

575 1e-14, 

576 ) 

577 ) 

578 if shape > max_shape_left: 

579 max_shape_left = shape 

580 idx_u_left = i 

581 max_shape_covar_left = covar 

582 except serums.errors.DistributionEstimatorFailed: 

583 pass 

584 

585 if max_shape_left > 0: 

586 gamma_left = max_shape_left 

587 u_left = data_sorted[idx_u_left] 

588 else: 

589 raise serums.errors.OverboundingMethodFailed( 

590 "MLE indicates exponential or finite left tail. Use the paired Gaussian Overbounder." 

591 ) 

592 

593 Nt_left = idx_u_left - 1 

594 gamma_left = gamma_left + t.ppf(0.975, Nt_left) * np.sqrt( 

595 max_shape_covar_left[1, 1] 

596 ) 

597 

598 idx_90p = int(np.floor(0.9 * n)) 

599 idxs_cand_u_right = np.arange(idx_90p, n - 250, 1) 

600 

601 max_shape_right = 0 

602 idx_u_right = None 

603 max_shape_covar_right = None 

604 for i in idxs_cand_u_right: 

605 try: 

606 shape, scale, covar = de.grimshaw_MLE( 

607 np.add( 

608 np.abs(np.subtract(data_sorted[i:], data_sorted[i])), 

609 1e-14, 

610 ) 

611 ) 

612 if shape > max_shape_right: 

613 max_shape_right = shape 

614 idx_u_right = i 

615 max_shape_covar_right = covar 

616 except serums.errors.DistributionEstimatorFailed: 

617 pass 

618 

619 if max_shape_right > 0: 

620 gamma_right = max_shape_right 

621 u_right = data_sorted[idx_u_right] 

622 else: 

623 raise serums.errors.OverboundingMethodFailed( 

624 "MLE indicates exponential or finite right tail. Use the paired Gaussian Overbounder." 

625 ) 

626 

627 Nt_right = n - idx_u_right - 1 

628 gamma_right = gamma_right + t.ppf(0.975, Nt_right) * np.sqrt( 

629 max_shape_covar_right[1, 1] 

630 ) 

631 

632 init_mean = np.mean(data) 

633 init_sigma = np.std(data, ddof=1) 

634 init_guess = np.array([init_mean, init_sigma]) 

635 

636 ecdf_ords = np.zeros(n) 

637 for i in range(n): 

638 ecdf_ords[i] = (i + 1) / n 

639 

640 confidence = 0.95 

641 alfa = 1 - confidence 

642 epsilon = np.sqrt(np.log(2 / alfa) / (2 * n)) 

643 DKW_low = np.subtract(ecdf_ords, epsilon) 

644 DKW_high = np.add(ecdf_ords, epsilon) 

645 

646 if self.StrictPairedEnforcement is True: 

647 left_usable_idxs = np.asarray(DKW_high < (1 - epsilon)).nonzero()[0] 

648 left_usable_idxs = left_usable_idxs[idx_u_left:] 

649 x_check_left = data_sorted[left_usable_idxs] 

650 y_check_left = DKW_high[left_usable_idxs] 

651 else: 

652 left_usable_idxs = np.asarray(DKW_high < (0.5 + epsilon)).nonzero()[0] 

653 left_usable_idxs = left_usable_idxs[idx_u_left:] 

654 x_check_left = data_sorted[left_usable_idxs] 

655 y_check_left = DKW_high[left_usable_idxs] 

656 

657 left_result = basinhopping( 

658 self._cost_left, 

659 init_guess, 

660 niter=300, 

661 stepsize=np.array([init_mean / 2, init_sigma / 2]), 

662 minimizer_kwargs={ 

663 "args": (x_check_left, y_check_left), 

664 "method": "Powell", 

665 "options": { 

666 "xtol": 1e-14, 

667 "ftol": 1e-6, 

668 "maxfev": 10000, 

669 "maxiter": 10000, 

670 }, 

671 }, 

672 ) 

673 

674 left_mean = left_result.x[0] 

675 left_sigma = left_result.x[1] 

676 

677 if self.StrictPairedEnforcement is True: 

678 right_usable_idxs = np.asarray(DKW_low > epsilon).nonzero()[0] 

679 right_usable_idxs = right_usable_idxs[0 : -(n - 1 - idx_u_right)] 

680 x_check_right = data_sorted[right_usable_idxs] 

681 y_check_right = DKW_low[right_usable_idxs] 

682 else: 

683 right_usable_idxs = np.asarray(DKW_low > (0.5 - epsilon)).nonzero()[0] 

684 right_usable_idxs = right_usable_idxs[0 : -(n - 1 - idx_u_right)] 

685 x_check_right = data_sorted[right_usable_idxs] 

686 y_check_right = DKW_low[right_usable_idxs] 

687 

688 right_result = basinhopping( 

689 self._cost_right, 

690 init_guess, 

691 niter=300, 

692 stepsize=np.array([init_mean / 2, init_sigma / 2]), 

693 minimizer_kwargs={ 

694 "args": (x_check_right, y_check_right), 

695 "method": "Powell", 

696 "options": { 

697 "xtol": 1e-14, 

698 "ftol": 1e-6, 

699 "maxfev": 10000, 

700 "maxiter": 10000, 

701 }, 

702 }, 

703 ) 

704 

705 right_mean = right_result.x[0] 

706 right_sigma = right_result.x[1] 

707 

708 Fu = norm.cdf(u_left, loc=left_mean, scale=left_sigma) 

709 left_transformed_ords = np.divide( 

710 np.negative( 

711 np.subtract( 

712 DKW_high[0 : idx_u_left - 1], 

713 Fu, 

714 ) 

715 ), 

716 Fu, 

717 ) 

718 left_transformed_ords = np.flip(left_transformed_ords) 

719 shifted_left_tail = np.flip( 

720 np.abs(np.subtract(data_sorted[0 : idx_u_left - 1], u_left)) 

721 ) 

722 

723 max_beta_left = 0 

724 

725 for i in range(left_transformed_ords.size): 

726 beta = self._get_pareto_scale( 

727 shifted_left_tail[i], left_transformed_ords[i], gamma_left 

728 ) 

729 if beta > max_beta_left: 

730 max_beta_left = beta 

731 

732 if max_beta_left > 0: 

733 beta_left = max_beta_left 

734 else: 

735 raise ( 

736 serums.errors.OverboundingMethodFailed( 

737 "GPD scale parameter not found for left tail. Use the paired gaussian overbounder." 

738 ) 

739 ) 

740 

741 Fu = norm.cdf(u_right, loc=right_mean, scale=right_sigma) 

742 right_transformed_ords = np.divide( 

743 np.abs(np.subtract(DKW_low[idx_u_right + 1 :], Fu)), (1 - Fu) 

744 ) 

745 shifted_right_tail = np.abs( 

746 np.subtract(data_sorted[idx_u_right + 1 :], u_right) 

747 ) 

748 

749 max_beta_right = 0 

750 

751 for i in range(right_transformed_ords.size): 

752 beta = self._get_pareto_scale( 

753 shifted_right_tail[i], right_transformed_ords[i], gamma_right 

754 ) 

755 if beta > max_beta_right: 

756 max_beta_right = beta 

757 

758 if max_beta_right > 0: 

759 beta_right = max_beta_right 

760 else: 

761 raise ( 

762 serums.errors.OverboundingMethodFailed( 

763 "GPD scale parameter not found for right tail. Use the paired gaussian overbounder" 

764 ) 

765 ) 

766 

767 return smodels.PairedGaussianPareto( 

768 left_tail_shape=gamma_left, 

769 left_tail_scale=beta_left, 

770 left_threshold=u_left, 

771 left_mean=left_mean, 

772 left_sigma=left_sigma, 

773 right_tail_shape=gamma_right, 

774 right_tail_scale=beta_right, 

775 right_threshold=u_right, 

776 right_mean=right_mean, 

777 right_sigma=right_sigma, 

778 )