vectorflow.neurallayer source code

1 /**
2  * Base class for all layers.
3  *
4  * Copyright: 2017 Netflix, Inc.
5  * License: $(LINK2 http://www.apache.org/licenses/LICENSE-2.0, Apache License Version 2.0)
6  */
7 module vectorflow.neurallayer;
8 
9 private
10 {
11 import std.algorithm : canFind, map, sum;
12 import std.conv : to;
13 import std..string : lastIndexOf;
14 import std.variant;
15 
16 import vectorflow.optimizers;
17 import vectorflow.serde;
18 import vectorflow.utils;
19 }
20 
21 enum LayerT {
22     DENSE,
23     SPARSE
24 }
25 
26 struct SparseF {
27     uint id;
28     float val;
29 }
30 
31 struct SparseFG {
32     uint id;
33     float val;
34     ushort group;
35 }
36 
37 /**
38 * Abstract base class of all layers in the net.
39 */
40 abstract class NeuralLayer {
41 
42     ///
43     string name;
44     ///
45     LayerT type;
46 
47     /// total input dimension of this layer (sum of output dimensions of its parents)
48     size_t dim_in;
49     /// total output dimension of this layer
50     size_t dim_out;
51 
52     /// array referencing all the children of this layer
53     NeuralLayer[] children;
54     /// array referencing all the parents of this layer
55     NeuralLayer[] parents;
56 
57     protected bool _learnable;
58     /// whether or not this layer has any parameters to be learnt
59     final @property bool learnable(){return _learnable;}
60 
61     private ushort num_parents_seen;
62 
63     /// dense output vector of this layer (might be unused)
64     float[] out_d;
65     /// sparse output vector of this layer (might be unused)
66     SparseF[] out_s;
67     /// array of gradients to backpropagate to parents
68     float[][] backgrads; // total sum of sizes should be dim_in
69 
70     void init(double random_scale){}
71     abstract void predict();
72 
73     void accumulate_grad(float[] grad){}
74     void accumulate_grad(SparseF[] grad){}
75 
76     abstract @property ulong num_params();
77     NeuralLayer dup(){return null;}
78     void allocate_interface()
79     {
80         if(type == LayerT.DENSE)
81             out_d.length = dim_out;
82         backgrads.length = 0;
83         foreach(p; parents)
84         {
85             if(!not_learnable_branch(p))
86                 backgrads ~= new float[p.dim_out];
87             else
88                 backgrads ~= new float[0];
89         }        
90     }
91     void allocate_params(){}
92     void allocate_grad_params(){}
93 
94     Optimizer optimizer;
95     final @property optimizer_set(){return optimizer !is null;}
96 
97     this(){}
98     
99     this(ulong dim_out_, LayerT type_)
100     {
101         dim_out = dim_out_.to!size_t;
102         dim_in = 0;
103         set_type(type_);
104     }
105 
106     protected void set_type(LayerT t)
107     {
108         type = t;
109         if(type == LayerT.DENSE)
110             out_d.length = dim_out;
111         else if(type == LayerT.SPARSE)
112         {
113             out_s.length = 0;
114             out_s.reserve(100_000); // non-zero per row pre-alloc, to avoid reallocations
115         }
116     }
117 
118     final void forward_prop()
119     {
120         num_parents_seen++;
121         if(num_parents_seen >= parents.length) // ready to compute value
122         {
123             num_parents_seen = 0;
124             // compute node prediction, using parents (if any) data
125             predict();
126 
127             // propagate message to children
128             foreach(c; children)
129                 c.forward_prop();
130         }
131     }
132 
133     void backward_prop(V)(V[] grad)
134         if ((is(V == float) || is(V == SparseF)))
135     {
136         if(optimizer !is null)
137             optimizer.update(this, grad);
138         else
139             accumulate_grad(grad);
140         foreach(ind, ref p; parents)
141             p.backward_prop(backgrads[ind]);
142     }
143 
144     void set_optimizer(Optimizer opt_)
145     {
146         optimizer = opt_;
147     }
148 
149     void reset()
150     {
151         foreach(b; backgrads)
152             b[] = 0;
153     }
154 
155     void set_name(string name_)
156     {
157         if(name_ !is null && name.length > 0)
158             name = name_;
159     }
160 
161     void ser(Serializer s)
162     {
163         s.write(this.classinfo.name);
164         s.write(name);
165         s.write(dim_in.to!ulong);
166         s.write(dim_out.to!ulong);
167         s.write(type.to!string);
168         serialize(s);
169     }
170 
171     void deser(Serializer s)
172     {
173         name = s.read!string();
174         dim_in = s.read!ulong().to!size_t;
175         dim_out = s.read!ulong().to!size_t;
176         type = s.read!string().to!LayerT;
177         
178         deserialize(s);
179     }
180 
181     protected void serialize(Serializer s){}
182     protected void deserialize(Serializer s){}
183 
184     void pre_learning(){}
185     void post_learning(){}
186 
187     // discard local parameters, and use the one from the argument instead:
188     void share_params(NeuralLayer layer){}
189 
190     void recompute_topology()
191     {
192         dim_in = parents.map!(x => x.dim_out).sum;
193     }
194 
195     static bool not_learnable_branch(NeuralLayer layer)
196     {
197         bool not_learnable = !layer.learnable;
198         foreach(p; layer.parents)
199             not_learnable &= not_learnable_branch(p);
200         return not_learnable;
201     }
202 
203     override string toString()
204     {
205         auto fullname = this.classinfo.name;
206         auto classname = fullname[fullname.lastIndexOf('.')+1..$];
207         return "layer." ~ classname ~
208             "[dim_in:" ~ dim_in.to!string ~
209             ", dim_out:" ~ dim_out.to!string ~
210             "]";
211     }
212 }
213 
214 /**
215 * Base class for all roots of the net.
216 */
217 abstract class InputLayer : NeuralLayer
218 {
219     Variant input;
220 
221     this(){super();}
222 
223     this(ulong dim_out, LayerT type)
224     {
225         super(dim_out, type);
226         _learnable = false;
227     }
228 
229     final void forward_prop(T)(T obs)
230     {
231         static if(!is(T == Variant))
232         {
233             Variant v = obs;
234             input = v;
235         }
236         else
237             input = obs;
238         super.forward_prop(); // propagate forward in the graph
239     }
240 
241     abstract override void predict();
242     
243     override void accumulate_grad(V)(V[] grad) pure
244         if ((is(V == float) || is(V == SparseF))) {}
245 
246     override void backward_prop(V)(V[] grad) pure
247         if ((is(V == float) || is(V == SparseF))) {}
248 
249     override @property ulong num_params(){return 0;}
250 
251     override void recompute_topology(){}
252     override void allocate_interface(){}
253 }