import pandas as pdimport numpy as npfrom scipy.optimize import minimizefrom sklearn.base import BaseEstimator, RegressorMixinfrom sklearn.utils.validation import check_X_y, check_is_fittedclass MinimizeRegressor(BaseEstimator, RegressorMixin): """Generic regressor for parametric models with custom optimization""" def __init__(self, parametric_model: callable, loss: callable, param_names: list, initial_params: dict = None, **minimize_kwargs): self.parametric_model = parametric_model self.loss = loss self.param_names = param_names self.initial_params = initial_params or {} self.minimize_kwargs = minimize_kwargs def _objective(self, params: np.ndarray, X: pd.DataFrame, y: np.ndarray) -> float: param_dict = dict(zip(self.param_names, params)) y_pred = self.parametric_model(param_dict, X) return self.loss(y, y_pred, param_dict) def fit(self, X: pd.DataFrame, y: np.ndarray): X, y = check_X_y(X, y, accept_sparse=False, ensure_min_features=2) initial = np.array([self.initial_params.get(name, 0) for name in self.param_names]) self.optimization_ = minimize( fun=self._objective, x0=initial, args=(X, y), **self.minimize_kwargs ) self.optimal_params_ = dict(zip(self.param_names, self.optimization_.x)) return self def predict(self, X: pd.DataFrame) -> np.ndarray: check_is_fitted(self) return self.parametric_model(self.optimal_params_, X)# Complex Non-Linear Use Case --------------------------------------------------def nonlinear_model(params: dict, X: pd.DataFrame) -> np.ndarray: """Sophisticated model with multiple non-linear components""" return ( params['a'] * np.maximum(X['x1'], 0) + # Rectified linear component params['b'] * np.exp(params['c'] * X['x2']) + # Exponential growth params['d'] * np.log1p(np.abs(X['x3'])) * (X['x4'] > 0) # Conditional log )def relative_mse_loss(y_true: np.ndarray, y_pred: np.ndarray, params: dict) -> float: """Relative MSE loss with stability checks""" eps = 1e-8 # Prevent division by zero relative_error = (y_true / (y_pred + eps) - 1) return np.mean(relative_error ** 2) + 1e-4 * np.sum(np.array(list(params.values())) ** 2)# Configure and test the regressorregressor = MinimizeRegressor( parametric_model=nonlinear_model, loss=relative_mse_loss, param_names=['a', 'b', 'c', 'd'], initial_params={'a': 1.0, 'b': 0.5, 'c': -0.1, 'd': 2.0}, method='L-BFGS-B', bounds=[ (0, None), # a: Non-negative coefficient for ReLU (0, None), # b: Non-negative scale for exponential (None, 0), # c: Negative exponent for decay (None, None) # d: Unbounded coefficient ], options={'maxiter': 1000, 'ftol': 1e-6})# Generate synthetic data with complex relationshipsnp.random.seed(42)X = pd.DataFrame({ 'x1': np.random.normal(2, 1, 100), 'x2': np.random.uniform(0.1, 2, 100), 'x3': np.random.lognormal(1, 0.5, 100), 'x4': np.random.choice([-1, 1], 100)})true_params = {'a': 2.5, 'b': 1.2, 'c': -0.3, 'd': 3.0}y = nonlinear_model(true_params, X) + np.random.normal(0, 0.5, 100)# Fit and evaluateregressor.fit(X, y)predictions = regressor.predict(X)print("Optimized parameters vs true values:")print(pd.DataFrame({ 'True': true_params, 'Estimated': regressor.optimal_params_}).T.round(2))print("\nSample predictions vs actual:")print(pd.DataFrame({ 'Actual': y[:5], 'Predicted': predictions[:5]}).round(2))