NeuralNet 1.0
Loading...
Searching...
No Matches
Adam.hpp
1#pragma once
2
3#include <Eigen/Core>
4#include <cmath>
5
6#include "Optimizer.hpp"
7
8namespace NeuralNet {
12class Adam : public Optimizer {
13 public:
24 Adam(double alpha = 0.001, double beta1 = 0.9, double beta2 = 0.999,
25 double epsilon = 10E-8)
26 : Optimizer(alpha) {
27 this->beta1 = beta1;
28 this->beta2 = beta2;
29 this->epsilon = epsilon;
30 };
31
32 ~Adam() override = default;
33
34 void updateWeights(Eigen::MatrixXd &weights,
35 const Eigen::MatrixXd &weightsGrad) override {
36 this->update(weights, weightsGrad, mWeights[cl], vWeights[cl]);
37 };
38
39 void updateBiases(Eigen::MatrixXd &biases,
40 const Eigen::MatrixXd &biasesGrad) override {
41 this->update(biases, biasesGrad, mBiases[cl], vBiases[cl]);
42 this->setCurrentL();
43 };
44
45 template <typename Derived1, typename Derived2>
46 void update(Eigen::MatrixBase<Derived1> &param,
47 const Eigen::MatrixBase<Derived2> &gradients,
48 Eigen::MatrixBase<Derived1> &m, Eigen::MatrixBase<Derived1> &v) {
49 assert(param.rows() == gradients.rows() &&
50 param.cols() == gradients.cols());
51
52 // increment time step
53 t = t + 1;
54
55 if (m.rows() == 0 || m.cols() == 0) {
56 // Initialize moment matrices m and v
57 m = Eigen::MatrixBase<Derived1>::Zero(param.rows(), param.cols());
58 v = Eigen::MatrixBase<Derived1>::Zero(param.rows(), param.cols());
59 }
60
61 assert(gradients.rows() == m.rows() && gradients.cols() == m.cols());
62 assert(gradients.rows() == v.rows() && gradients.cols() == v.cols());
63
64 // update biased first moment estimate
65 m = (beta1 * m).array() + ((1 - beta2) * gradients.array()).array();
66
67 // updated biased second raw moment estimate
68 v = (beta2 * v).array() +
69 ((1 - beta2) * (gradients.array() * gradients.array())).array();
70
71 // compute bias-corrected first moment estimate
72 double beta1_t = std::pow(beta1, t);
73
74 // compute bias-corrected second raw moment estimate
75 double beta2_t = std::pow(beta2, t);
76
77 double alpha_t = alpha * (sqrt(1 - beta2_t) / (1 - beta1_t));
78
79 // update param
80 param =
81 param.array() - alpha_t * (m.array() / (v.array().sqrt() + epsilon));
82 }
83
84 private:
85 double beta1;
86 double beta2;
87 double epsilon;
88 int t = 0;
89 int cl; // Current layer (should be initialized to the total number of layers
90 // - 0)
91 int ll; // Last layer (should also be initialized to numLayers - 1)
92 std::vector<Eigen::MatrixXd> mWeights; // First-moment vector for weights
93 std::vector<Eigen::MatrixXd> vWeights; // Second-moment vector for weights
94 std::vector<Eigen::MatrixXd> mBiases; // First-moment vector for biases
95 std::vector<Eigen::MatrixXd> vBiases; // Second-moment vector for biases
96
97 void insiderInit(size_t numLayers) override {
98 cl = numLayers - 1;
99 ll = numLayers - 1;
100
101 Eigen::MatrixXd dotMatrix = Eigen::MatrixXd::Zero(0, 0);
102
103 for (int i = mWeights.size(); i < numLayers; i++) {
104 mWeights.push_back(dotMatrix);
105 vWeights.push_back(dotMatrix);
106 mBiases.push_back(dotMatrix);
107 vBiases.push_back(dotMatrix);
108 };
109 }
110
111 void setCurrentL() {
112 // If current layer is the first layer set it to the last layer
113 cl = cl == 1 ? ll : cl - 1;
114 }
115};
116} // namespace NeuralNet
Definition Adam.hpp:12
void updateWeights(Eigen::MatrixXd &weights, const Eigen::MatrixXd &weightsGrad) override
This function updates the weights passed based on the selected Optimizer and the weights gradients.
Definition Adam.hpp:34
void updateBiases(Eigen::MatrixXd &biases, const Eigen::MatrixXd &biasesGrad) override
This function updates the biases passed based based on the Optimizer and the biases gradients.
Definition Adam.hpp:39
Adam(double alpha=0.001, double beta1=0.9, double beta2=0.999, double epsilon=10E-8)
Definition Adam.hpp:24
Definition Optimizer.hpp:6