24 Adam(
double alpha = 0.001,
double beta1 = 0.9,
double beta2 = 0.999,
25 double epsilon = 10E-8)
29 this->epsilon = epsilon;
32 ~Adam()
override =
default;
35 const Eigen::MatrixXd &weightsGrad)
override {
36 this->update(weights, weightsGrad, mWeights[cl], vWeights[cl]);
40 const Eigen::MatrixXd &biasesGrad)
override {
41 this->update(biases, biasesGrad, mBiases[cl], vBiases[cl]);
45 template <
typename Derived1,
typename Derived2>
46 void update(Eigen::MatrixBase<Derived1> ¶m,
47 const Eigen::MatrixBase<Derived2> &gradients,
48 Eigen::MatrixBase<Derived1> &m, Eigen::MatrixBase<Derived1> &v) {
49 assert(param.rows() == gradients.rows() &&
50 param.cols() == gradients.cols());
55 if (m.rows() == 0 || m.cols() == 0) {
57 m = Eigen::MatrixBase<Derived1>::Zero(param.rows(), param.cols());
58 v = Eigen::MatrixBase<Derived1>::Zero(param.rows(), param.cols());
61 assert(gradients.rows() == m.rows() && gradients.cols() == m.cols());
62 assert(gradients.rows() == v.rows() && gradients.cols() == v.cols());
65 m = (beta1 * m).array() + ((1 - beta2) * gradients.array()).array();
68 v = (beta2 * v).array() +
69 ((1 - beta2) * (gradients.array() * gradients.array())).array();
72 double beta1_t = std::pow(beta1, t);
75 double beta2_t = std::pow(beta2, t);
77 double alpha_t = alpha * (sqrt(1 - beta2_t) / (1 - beta1_t));
81 param.array() - alpha_t * (m.array() / (v.array().sqrt() + epsilon));
92 std::vector<Eigen::MatrixXd> mWeights;
93 std::vector<Eigen::MatrixXd> vWeights;
94 std::vector<Eigen::MatrixXd> mBiases;
95 std::vector<Eigen::MatrixXd> vBiases;
97 void insiderInit(
size_t numLayers)
override {
101 Eigen::MatrixXd dotMatrix = Eigen::MatrixXd::Zero(0, 0);
103 for (
int i = mWeights.size(); i < numLayers; i++) {
104 mWeights.push_back(dotMatrix);
105 vWeights.push_back(dotMatrix);
106 mBiases.push_back(dotMatrix);
107 vBiases.push_back(dotMatrix);
113 cl = cl == 1 ? ll : cl - 1;
void updateWeights(Eigen::MatrixXd &weights, const Eigen::MatrixXd &weightsGrad) override
This function updates the weights passed based on the selected Optimizer and the weights gradients.
Definition Adam.hpp:34
void updateBiases(Eigen::MatrixXd &biases, const Eigen::MatrixXd &biasesGrad) override
This function updates the biases passed based based on the Optimizer and the biases gradients.
Definition Adam.hpp:39