83 ParamArray
maximize(std::function<
double(ParamArray)> objective_function,
84 ParamArray initial_value,
unsigned int num_iters);
99 ParamArray
minimize(std::function<
double(ParamArray)> objective_function,
100 ParamArray initial_value,
unsigned int num_iters);
120 ParamArray followGradient(
121 std::function<
double(ParamArray)> objective_function, ParamArray initial_value,
122 unsigned int num_iters,
123 std::function<
double(
double,
double)> gradient_movement_func);
133 ParamArray approximateGradient(ParamArray params,
134 std::function<
double(ParamArray)> objective_function);
138 static constexpr double eps = 1e-8;
142 ParamArray param_weights;
145 double past_gradient_decay_rate;
146 double past_squared_gradient_decay_rate;
150 double gradient_approx_step_size;
163template <
size_t NUM_PARAMS>
165 std::array<double, NUM_PARAMS> param_weights,
double gradient_approx_step_size,
166 double past_gradient_decay_rate,
double past_squared_gradient_decay_rate)
167 : param_weights(param_weights),
168 past_gradient_decay_rate(past_gradient_decay_rate),
169 past_squared_gradient_decay_rate(past_squared_gradient_decay_rate),
170 gradient_approx_step_size(gradient_approx_step_size)
174template <
size_t NUM_PARAMS>
176 std::function<
double(std::array<double, NUM_PARAMS>)> objective_function,
177 std::array<double, NUM_PARAMS> initial_value,
unsigned int num_iters)
179 return followGradient(
180 objective_function, initial_value, num_iters,
181 [](
double curr_value,
double step) {
return curr_value + step; });
184template <
size_t NUM_PARAMS>
186 std::function<
double(std::array<double, NUM_PARAMS>)> objective_function,
187 std::array<double, NUM_PARAMS> initial_value,
unsigned int num_iters)
189 return followGradient(
190 objective_function, initial_value, num_iters,
191 [](
double curr_value,
double step) {
return curr_value - step; });
194template <
size_t NUM_PARAMS>
196 std::function<
double(std::array<double, NUM_PARAMS>)> objective_function,
197 std::array<double, NUM_PARAMS> initial_value,
unsigned int num_iters,
198 std::function<
double(
double,
double)> gradient_movement_func)
204 ParamArray params = initial_value;
207 ParamArray past_gradient_averages = {0};
208 ParamArray past_squared_gradient_averages = {0};
210 for (
unsigned iter = 0; iter < num_iters; iter++)
212 ParamArray gradient = approximateGradient(params, objective_function);
215 ParamArray squared_gradient = {0};
216 for (
unsigned int i = 0; i < NUM_PARAMS; i++)
218 squared_gradient.at(i) = std::pow(gradient.at(i), 2);
222 for (
unsigned int i = 0; i < NUM_PARAMS; i++)
224 past_gradient_averages.at(i) =
225 past_gradient_decay_rate * past_gradient_averages.at(i) +
226 (1 - past_gradient_decay_rate) * gradient.at(i);
227 past_squared_gradient_averages.at(i) =
228 past_squared_gradient_decay_rate * past_squared_gradient_averages.at(i) +
229 (1 - past_squared_gradient_decay_rate) * squared_gradient.at(i);
233 ParamArray bias_corrected_past_gradient_averages = {0};
234 ParamArray bias_corrected_past_squared_gradient_averages = {0};
235 for (
unsigned int i = 0; i < NUM_PARAMS; i++)
237 bias_corrected_past_gradient_averages.at(i) =
238 past_gradient_averages.at(i) /
239 (1 - std::pow(past_gradient_decay_rate, 2));
240 bias_corrected_past_squared_gradient_averages.at(i) =
241 past_squared_gradient_averages.at(i) /
242 (1 - std::pow(past_squared_gradient_decay_rate, 2));
247 for (
unsigned int i = 0; i < NUM_PARAMS; i++)
249 params.at(i) = gradient_movement_func(
251 param_weights.at(i) * bias_corrected_past_gradient_averages.at(i) /
252 (std::sqrt(bias_corrected_past_squared_gradient_averages.at(i)) +
260template <
size_t NUM_PARAMS>
262 std::array<double, NUM_PARAMS> params,
263 std::function<
double(std::array<double, NUM_PARAMS>)> objective_function)
265 ParamArray gradient = {0};
266 double curr_function_value = objective_function(params);
268 for (
unsigned i = 0; i < NUM_PARAMS; i++)
270 auto test_params = params;
271 test_params.at(i) += gradient_approx_step_size * param_weights.at(i);
272 double new_function_value = objective_function(test_params);
274 (new_function_value - curr_function_value) / gradient_approx_step_size;