1 /**
2  * Implementation of the various computational layers.
3  *
4  * Copyright: 2017 Netflix, Inc.
5  * License: $(LINK2 http://www.apache.org/licenses/LICENSE-2.0, Apache License Version 2.0)
6  */
7 module vectorflow.layers;
8 
9 private
10 {
11 import std.algorithm : all, any, map, sum;
12 import std.array;
13 import std.conv : text, to;
14 import std.random : uniform01;
15 import std..string : format, split;
16 
17 import vectorflow.neurallayer;
18 import vectorflow.optimizers;
19 import vectorflow.regularizers;
20 import vectorflow.serde;
21 import vectorflow.utils;
22 import vectorflow.math;
23 }
24 
25 
26 /**
27  * Linear layer accepting sparse or dense parents and outputing a dense vector.
28 Examples:
29 -----------------
30 auto l1 = Linear(10); // 10 dense output neurons, each with an intercept
31 auto l2 = Linear(5, false); // 5 dense output neurons, without intercepts
32 -----------------
33  *
34  */
35 class Linear : NeuralLayer {
36 
37     float[][] W;
38     float[][] grad;
39     protected size_t _with_intercept;
40     final @property bool with_intercept(){return _with_intercept == 1;}
41 
42     AdditiveLinearPrior[] priors;
43     ProxyLinearPrior prox;
44 
45     this(){super();}
46     mixin opCallNew;
47 
48     this(ulong dim_out, bool with_intercept_ = true)
49     {
50         super(dim_out, LayerT.DENSE);
51         _learnable = true;
52         _with_intercept = with_intercept_;
53     }
54 
55     override void init(double rand_scale)
56     {
57         init_matrix_rand(W, rand_scale);
58         if(_with_intercept)
59         {
60             // initialize intercept at 0
61             foreach(k; 0..dim_out)
62                 W[k][0] = 0;
63         }
64         out_d[] = 0;
65     }
66 
67     override void allocate_params()
68     {
69         auto din = dim_in + _with_intercept;
70         W = allocate_matrix_zero!float(dim_out, din.to!size_t);
71     }
72 
73     override void allocate_grad_params()
74     {
75         auto din = dim_in + _with_intercept;
76         grad = allocate_matrix_zero!float(dim_out, din.to!size_t);
77     }
78 
79     override void predict()
80     {
81         foreach(k; 0..dim_out)
82         {
83             auto row = W[k];
84             float dp = _with_intercept * row[0];
85             auto offset = _with_intercept;
86             foreach(l; parents)
87             {
88                 final switch(l.type)
89                 {
90                     case LayerT.DENSE:
91                         dp += dotProd(row[offset..offset+l.dim_out], l.out_d);
92                         break;
93                     
94                     case LayerT.SPARSE:
95                         foreach(ref f; l.out_s)
96                             dp += row[offset + f.id] * f.val;
97                         break;
98                 }
99                 offset += l.dim_out;
100             }
101             out_d[k] = dp;
102         }
103     }
104 
105     override void accumulate_grad(float[] ext_grad)
106     {
107         // gradient
108         foreach(k; 0..dim_out)
109         {
110             auto row = grad[k]; 
111             float g = ext_grad[k];
112             _accumulate_grad_row(row, g, k);
113         }
114 
115         // now backprop gradient
116         auto offset = _with_intercept;
117         foreach(i, ref b; backgrads)
118         {
119             if(parents[i].type == LayerT.SPARSE) // TODO: fixme
120                 continue;
121             if(b.length == 0)
122                 continue;
123             foreach(j; 0..dim_out)
124                 axpy(ext_grad[j], W[j][offset..offset+b.length], b);
125             offset += b.length;
126         }
127     }
128 
129     override void accumulate_grad(SparseF[] ext_grad)
130     {
131         // gradient
132         foreach(ref SparseF fg; ext_grad)
133         {
134             auto row = grad[fg.id];
135             float g = fg.val;
136             _accumulate_grad_row(row, g, fg.id);
137         }
138 
139         // now backprop gradient
140         auto offset = _with_intercept;
141         foreach(i, ref b; backgrads)
142         {
143             if(parents[i].type == LayerT.SPARSE) // TODO: fixme
144                 continue;
145             if(b.length == 0)
146                 continue;
147             foreach(ref SparseF fg; ext_grad)
148                 axpy(fg.val, W[fg.id][offset..offset+b.length], b);
149             offset += b.length;
150         }   
151     }
152 
153     final protected void _accumulate_grad_row(float[] row, float g, ulong index)
154     {
155         auto offset = _with_intercept;
156         foreach(l; parents)
157         {
158             final switch(l.type)
159             {
160                 case LayerT.DENSE:
161                     axpy(g, l.out_d, row[offset..offset+l.dim_out]);
162                     break;
163                 case LayerT.SPARSE:
164                     axpy(g, l.out_s, row[offset..offset+l.dim_out]);
165                     break;
166             }
167             offset += l.dim_out;
168         }
169         row[0] += _with_intercept * g;        
170     }
171 
172     override void serialize(Serializer s)
173     {
174         s.write(_with_intercept.to!ulong);
175         s.write(W.length.to!ulong);
176         foreach(i; 0..W.length)
177             s.write_vec(W[i]);
178     }
179 
180     override void deserialize(Serializer s)
181     {
182         _with_intercept = s.read!ulong().to!size_t;
183         W.length = s.read!ulong().to!size_t;
184         foreach(i; 0..W.length)
185             W[i] = s.read_vec!float();
186     }
187 
188     override NeuralLayer dup()
189     {
190         auto cp = new Linear(dim_out, _with_intercept == 1);
191         cp.set_name(name);
192         foreach(p; priors)
193             cp.priors ~= p.dup;
194         if(prox !is null)
195             cp.prox = prox.dup;
196               
197         return cp;
198     }
199 
200     override void share_params(NeuralLayer l)
201     {
202         auto c = l.to!Linear;
203         W = c.W;
204         _with_intercept = c.with_intercept;
205     }
206 
207     override @property ulong num_params()
208     {
209         if(W.length == 0)
210             return 0;
211         return W.length * W[0].length;
212     }
213 
214     Linear prior(AdditiveLinearPrior prior)
215     {
216         priors ~= prior;
217         prior.register(this);
218         return this;
219     }
220 
221     Linear prior(ProxyLinearPrior prior)
222     {
223         if(prox !is null)
224             throw new Exception("Single proxy prior supported for now.");
225         prox = prior;
226         prior.register(this);
227         return this;
228     }
229 }
230 
231 /**
232  * DropOut layer accepting all dense or all sparse parents.
233  *
234  * Features rescaling happens automatically at training time.
235  * Example:
236  * ---
237  * auto l = DropOut(0.3); // Drop 30% of the input neurons at random.
238  * ---
239  */
240 class DropOut : NeuralLayer {
241 
242     float _drop_rate;
243     float _scale_ratio;
244     protected void delegate() _predict;
245     protected void delegate(float[]) _acc_grad;
246 
247     this(){super();}
248     mixin opCallNew;
249 
250     this(float drop_rate)
251     {
252         super(0, LayerT.DENSE);
253         _learnable = false;
254         set_drop_rate(drop_rate);
255     }
256 
257     override void recompute_topology()
258     {
259         super.recompute_topology();
260         dim_out = dim_in;
261         auto all_dense = parents.all!(x => x.type == LayerT.DENSE);
262         auto all_sparse = parents.all!(x => x.type == LayerT.SPARSE);
263         if(!all_dense && !all_sparse)
264             throw new Exception(
265                 "DropOut layer parents have all to be of the same kind " ~ 
266                 "(sparse or dense outputs).");
267         if(all_dense)
268         {
269             set_type(LayerT.DENSE);
270             _predict = &_predict_dense;
271             _acc_grad = &_acc_grad_dense;
272         }
273         else
274         {
275             set_type(LayerT.SPARSE);
276             _predict = &_predict_sparse;
277             _acc_grad = &_acc_grad_sparse;
278         }
279     }
280 
281     override void predict()
282     {
283         _predict();
284     }
285 
286     void _predict_sparse()
287     {
288         // @TODO: this is currently very slow because of allocations
289         out_s.length = 0;
290         out_s ~= parents[0].out_s; // no need to re-index
291         if(parents.length > 1)
292         {
293             ulong offset = parents[0].dim_out;
294             foreach(p; parents[1..$])
295             {
296                 foreach(ref f; p.out_s)
297                     out_s ~= SparseF((f.id + offset).to!uint, f.val);
298                 offset += p.dim_out;
299             }
300         }
301     }
302 
303     void _predict_dense()
304     {
305         size_t offset = 0;
306         foreach(p; parents)
307         {
308             out_d[offset.. offset + p.dim_out] = p.out_d[];
309             offset += p.dim_out;
310         }
311     }
312     
313     void _predict_train_sparse()
314     {
315         // @TODO: this is currently very slow because of allocations
316         out_s.length = 0;
317         ulong offset = 0;
318         foreach(p; parents)
319         {
320             foreach(ref f; p.out_s)
321             {
322                 if(uniform01(RAND_GEN) > _drop_rate)
323                     out_s ~= SparseF((f.id + offset).to!uint, f.val * _scale_ratio);
324             }
325             offset += p.dim_out;
326         }
327     }
328 
329     void _predict_train_dense()
330     {
331         size_t offset = 0;
332         foreach(p; parents)
333         {
334             foreach(i; 0..p.dim_out)
335             {
336                 if(uniform01(RAND_GEN) > _drop_rate)
337                     out_d[offset + i] = p.out_d[i] * _scale_ratio;
338                 else
339                     out_d[offset + i] = 0.0f;
340             }
341             offset += p.dim_out;
342         }
343     }
344 
345     override void accumulate_grad(float[] grad)
346     {
347         _acc_grad(grad);
348     }
349 
350     void _acc_grad_sparse(float[] grad)
351     {
352         if(grad.length == 0)
353             return;
354         throw new NotImplementedException("");
355     }
356 
357     void _acc_grad_dense(float[] grad)
358     {
359         if(grad.length == 0)
360             return;
361         size_t offset = 0;
362         foreach(ip, ref p; parents)
363         {
364             if(parents[ip].type == LayerT.SPARSE)
365                 throw new NotImplementedException("");
366             // for dense parents:
367             foreach(i; 0..p.dim_out)
368             {
369                 if(fabs(out_d[offset + i]) > 1e-11)
370                     backgrads[ip][i] += grad[offset + i];
371             }
372             offset += p.dim_out;
373         }
374     }
375 
376     override void pre_learning()
377     {
378         if(type == LayerT.DENSE)
379             _predict = &_predict_train_dense;
380         else
381         {
382             _predict = &_predict_train_sparse;
383         }
384     }
385 
386     override void post_learning()
387     {
388         if(type == LayerT.DENSE)
389             _predict = &_predict_dense;
390         else
391             _predict = &_predict_sparse;
392     }
393 
394     protected void set_drop_rate(float rate)
395     {
396         _drop_rate = rate;
397         _scale_ratio = 1.0 / (1.0 - rate);
398     }
399 
400     override void set_optimizer(Optimizer opt_)
401     {
402         // nothing to optimize
403         optimizer = null;
404     }
405 
406     override void serialize(Serializer s)
407     {
408         s.write(_drop_rate);
409     }
410 
411     override void deserialize(Serializer s)
412     {
413         _drop_rate = s.read!float();
414     }
415 
416     override NeuralLayer dup()
417     {
418         auto cp = new DropOut();
419         cp.set_name(name);
420         cp.set_drop_rate(_drop_rate);
421         return cp;
422     }
423 
424     override @property ulong num_params()
425     {
426         return 0;
427     }
428 }
429 
430 
431 /**
432  * ReLU activation layer accepting dense parents.
433  */
434 class ReLU : NeuralLayer {
435 
436     this()
437     {
438         super(0, LayerT.DENSE);
439         _learnable = false;
440     }
441     mixin opCallNew;
442 
443     override void recompute_topology()
444     {
445         super.recompute_topology();
446         dim_out = dim_in;
447         if(!parents.all!(x => x.type == LayerT.DENSE))
448             throw new Exception("ReLU layer only supports dense parents.");
449     }
450 
451     override void predict()
452     {
453         size_t offset = 0;
454         foreach(p; parents)
455         {
456             relu(p.out_d, out_d[offset..offset+p.dim_out]);
457             offset += p.dim_out;
458         }
459     }
460 
461     version(LDC)
462     {
463         import ldc.attributes;
464 
465         pragma(inline, true)
466         @fastmath static void _relu_op(float[] b, float[] o, float[] g) pure
467         {
468             for(int i = 0; i < o.length; ++i)
469             {
470                 if(o[i] > 0)
471                     b[i] += g[i];
472             }
473         }
474     }
475 
476     override void accumulate_grad(V)(V[] grad)
477         if ((is(V == float) || is(V == SparseF)))
478     {
479         ulong offset = 0;
480         foreach(ip, ref p; parents)
481         {
482             version(LDC)
483             {
484                 _relu_op(backgrads[ip], p.out_d, grad[offset..offset+p.dim_out]);
485             }
486             else
487             {
488                 foreach(i; 0..p.dim_out)
489                 {
490                     if(p.out_d[i] > 0)
491                         backgrads[ip][i] += grad[offset + i];
492                 }
493             }
494             offset += p.dim_out;
495         }
496     }
497 
498     override void set_optimizer(Optimizer opt_)
499     {
500         // nothing to optimize
501         optimizer = null;
502     }
503 
504     override NeuralLayer dup()
505     {
506         auto cp = new ReLU();
507         cp.set_name(name);
508         return cp;
509     }
510 
511     override @property ulong num_params()
512     {
513         return 0;
514     }
515 }
516 
517 /**
518  * TanH activation layer accepting dense parents.
519  */
520 class TanH : NeuralLayer {
521 
522     this()
523     {
524         super(0, LayerT.DENSE);
525         _learnable = false;
526     }
527     mixin opCallNew;
528 
529     override void recompute_topology()
530     {
531         super.recompute_topology();
532         dim_out = dim_in;
533         if(!parents.all!(x => x.type == LayerT.DENSE))
534             throw new Exception("TanH layer only supports dense parents.");
535     }
536 
537     override void predict()
538     {
539         size_t offset = 0;
540         foreach(p; parents)
541         {
542             tanh(p.out_d, out_d[offset..offset + p.dim_out]);
543             offset += p.dim_out;
544         }
545     }
546 
547     override void accumulate_grad(float[] grad)
548     {
549         size_t offset = 0;
550         foreach(ip, ref p; parents)
551         {
552             // todo: fixme when I have multiple children, should accumulate, not override
553             tanh(grad[offset..offset+p.dim_out], backgrads[ip]);
554             foreach(i; 0..p.dim_out)
555                 backgrads[ip][i] = (1 - backgrads[ip][i] * backgrads[ip][i]) * grad[offset + i];
556 
557             offset += p.dim_out;
558         }
559     }
560 
561     override void set_optimizer(Optimizer opt_)
562     {
563         // nothing to optimize
564         optimizer = null;
565     }
566 
567     override NeuralLayer dup()
568     {
569         auto cp = new TanH();
570         cp.set_name(name);
571         return cp;
572     }
573 
574     override @property ulong num_params()
575     {
576         return 0;
577     }
578 }
579 
580 
581 /**
582  * Scaled Exponential Linear Unit activation layer accepting dense parents.
583  * See $(LINK2 https://arxiv.org/pdf/1706.02515.pdf, Self-Normalizing Neural Networks)
584  * for details.
585  */
586 class SeLU : NeuralLayer {
587 
588     float _alpha;
589     float _lambda;
590 
591     this()
592     {
593         super(0, LayerT.DENSE);
594         _learnable = false;
595 
596         _alpha = 1.67326324235;
597         _lambda = 1.05070098736;
598     }
599     mixin opCallNew;
600 
601     override void recompute_topology()
602     {
603         super.recompute_topology();
604         dim_out = dim_in;
605         if(!parents.all!(x => x.type == LayerT.DENSE))
606             throw new Exception("SeLU layer only supports dense parents.");
607     }
608 
609     override void predict()
610     {
611         size_t offset = 0;
612         foreach(p; parents)
613         {
614             foreach(j; 0..p.dim_out)
615                 if(p.out_d[j] > 0)
616                     out_d[offset + j] = _lambda * p.out_d[j];
617                 else
618                     out_d[offset + j] = _lambda * (_alpha * exp(p.out_d[j]) - _alpha);
619             offset += p.dim_out;
620         }
621     }
622 
623     override void accumulate_grad(float[] grad)
624     {
625         size_t offset = 0;
626         foreach(ip, ref p; parents)
627         {
628             foreach(j; 0..p.dim_out)
629                 if(p.out_d[j] > 0)
630                     backgrads[ip][j] += _lambda * grad[offset + j];
631                 else
632                     backgrads[ip][j] += _lambda * _alpha * exp(p.out_d[j]) * grad[offset + j];
633             offset += p.dim_out;
634         }
635     }
636 
637     override void set_optimizer(Optimizer opt_)
638     {
639         // nothing to optimize
640         optimizer = null;
641     }
642 
643     override NeuralLayer dup()
644     {
645         auto cp = new SeLU();
646         cp.set_name(name);
647         return cp;
648     }
649 
650     override @property ulong num_params()
651     {
652         return 0;
653     }
654 }
655 
656 
657 
658 /**
659  * On-the-fly polynomial kernel expansion of sparse input.
660  *
661  * This will perform polynomial kernel expansion of a set of sparse features
662  * based on a group attribute of this features.
663  * The features fed to this layer need to be a SparseFG[].
664  * It assumes that the feature ids are uniform random numbers (hashes)
665  * so that we can efficiently generate a cross-feature hash by just XOR-ing
666  * together the single hashes of the monomial.
667  * This layer is meant to be used as part of a NeuralNet() topology at test
668  * time, but it's preferable to run the expansion outside the net at training
669  * time so that it can be run only once while building the dataset. This will
670  * avoid rebuilding the cross-features at every pass during training.
671  * Example:
672  * ---
673  * // polynomial kernel (x_1 * x_3, x_1 * x_2 * x_4) in a 1k dimensional space:
674  * auto l = SparseKernelExpander(1_000, "1^3,1^2^4");
675  * ---
676  */
677 class SparseKernelExpander : InputLayer
678 {
679     short[] here;
680     uint[][] single_hashes;
681     float[][] single_vals;
682     ushort[][] cross2build;
683 
684     uint[] hash_buff;
685     float[] val_buff;
686 
687     string _cross_features_str;
688     uint _max_group_id;
689     uint _buff_single_feats_sz;
690 
691     this(){super();}
692     mixin opCallNew;
693 
694     /**
695     * Instantiate a new SparseKernelExpander layer.
696     *
697     * Params:
698     *   dim_out = total dimensionality of the input data
699     *   cross_feats_str = a string of the form `1^3,2^4^1` specifying which
700     *                     groups need to be crossed. The commas delimit the
701     *                     groups, the carets delimit the group ids present in
702     *                     the monomial
703     *   max_group_id = maximum group id present in the data, 1-indexed
704     *   buff_single_feats_sz = upper bound of the maximum number of features
705     *                          per row post expansion
706     */
707     this(ulong dim_out, string cross_feats_str, uint max_group_id = 100u,
708             uint buff_single_feats_sz = 50_000)
709     {
710         super(dim_out, LayerT.SPARSE);
711         _learnable = false;
712         
713         if(max_group_id > ushort.max)
714             throw new Exception(
715                 "Doesn't support group ids bigger than %d".format(ushort.max));
716         _cross_features_str = cross_feats_str;
717         _max_group_id = max_group_id;
718         _buff_single_feats_sz = buff_single_feats_sz;
719 
720         _init();
721     }
722 
723     protected void _init()
724     {
725         here.length = _max_group_id + 1;
726         single_hashes.length = _max_group_id + 1;
727         single_vals.length = _max_group_id + 1;
728         foreach(k; 0.._max_group_id + 1)
729         {
730             single_hashes[k].length = _buff_single_feats_sz;
731             single_vals[k].length = _buff_single_feats_sz;
732         }
733         hash_buff.length = _buff_single_feats_sz;
734         val_buff.length = _buff_single_feats_sz;
735         reset();
736 
737         auto cfs = _cross_features_str.split(',');
738         foreach(cf; cfs)
739         {
740             auto ids = cf.split('^').map!(to!ushort).array;
741             if(ids.any!(g => g > _max_group_id))
742                 throw new Exception(
743                     ("One group id for cross-feature `%s` is too large. " ~
744                      "Maximum group id specified is `%s`").format(
745                         cf, _max_group_id));
746             cross2build ~= ids;
747         }
748     }
749 
750     override void reset() pure
751     {
752         here[] = 0;
753     }
754 
755     override void predict()
756     {
757         assert(input.convertsTo!(SparseFG[]), (
758             "Wrong type: you need to feed features of type `SparseFG[]` to " ~
759             "SparseKernelExpander layer `%s`, not `%s`.").format(
760                 name, input.type));
761 
762         auto feats = input.get!(SparseFG[]);
763         out_s.length = feats.length;
764         foreach(i, ref f; feats)
765         {
766             monitor(f.group, f.id, f.val);
767             out_s[i] = SparseF(f.id, f.val);
768         }
769         expand!(SparseF[])(out_s);
770     }
771 
772     final void monitor(ushort id, uint hash, float val) pure
773     {
774         assert(id > 0, "Group ids are 1-indexed.");
775         assert(id <= _max_group_id, text(
776                 "Group-id bigger than the maximum group-id specified when ",
777                 "instantiating SparseKernelExpander."));
778         short cnt_hashes = here[id];
779         single_hashes[id][cnt_hashes] = hash;
780         single_vals[id][cnt_hashes] = val;
781         here[id] += 1;
782     }
783 
784     // @TODO: extend to non-hashed ids by extra hashing.
785     // Support of bags with bags crossing not supported.
786     final void expand(T)(ref T buff)
787     {
788         foreach(cf; cross2build)
789         {
790             bool all_here = true;
791             ulong ind_bag_feat = -1;
792             size_t size_bag = 1;
793             foreach(ind_cf; 0..cf.length)
794             {
795                 auto num_hashes = here[cf[ind_cf]];
796                 if(num_hashes == 0)
797                 {
798                     all_here = false;
799                     break;
800                 }
801                 if(num_hashes > 1)
802                 {
803                     ind_bag_feat = ind_cf;
804                     size_bag = num_hashes;
805                 }
806             }
807             if(!all_here)
808                 continue;
809             hash_buff[0..size_bag] = 0;
810             val_buff[0..size_bag] = 1.0;
811             foreach(ind_cf; 0..cf.length)
812             {
813                 auto cf_id = cf[ind_cf];
814                 short num_hashes = here[cf_id];
815 
816                 if(ind_cf != ind_bag_feat)
817                 {
818                     foreach(j; 0..size_bag)
819                     {
820                         hash_buff[j] ^= single_hashes[cf_id][0];
821                         val_buff[j] *= single_vals[cf_id][0];
822                     }
823                 }
824                 else
825                 {
826                     foreach(j; 0..size_bag)
827                     {
828                         hash_buff[j] ^= single_hashes[cf_id][j];
829                         val_buff[j] *= single_vals[cf_id][j];
830                     }
831                 }
832             }
833             // add to buffer the expanded CF
834             foreach(j; 0..size_bag)
835                 buff ~= SparseF(hash_buff[j], val_buff[j]);
836         }
837     }
838 
839     override void serialize(Serializer s)
840     {
841         s.write(_cross_features_str);
842         s.write(_max_group_id);
843         s.write(_buff_single_feats_sz);
844     }
845 
846     override void deserialize(Serializer s)
847     {
848         _cross_features_str = s.read!string();
849         _max_group_id = s.read!uint();
850         _buff_single_feats_sz = s.read!uint();
851         _learnable = false;
852         _init();
853     }
854 
855     override NeuralLayer dup()
856     {
857         auto cp = new SparseKernelExpander(dim_out, _cross_features_str);
858         cp.set_name(name);
859         return cp;
860     }
861 
862     override void share_params(NeuralLayer l)
863     {
864         auto c = l.to!SparseKernelExpander;
865         input = c.input;
866     }
867 }
868 
869 
870 class Data(alias TYPE) : InputLayer
871 {
872     this(){super();}
873 
874     this(ulong dim_out)
875     {
876         super(dim_out, TYPE);
877     }
878 
879     override void predict()
880     {
881         static if(TYPE == LayerT.DENSE)
882         {
883             assert(input.convertsTo!(float[]), (
884             "Wrong type: you need to feed features of type `float[]` to " ~
885             "DenseData layer `%s`, not `%s`.").format(name, input.type));
886 
887             out_d = input.get!(float[]);
888         }
889         else
890         {
891             assert(input.convertsTo!(SparseF[]), (
892             "Wrong type: you need to feed features of type `SparseF[]` to " ~
893             "SparseData layer `%s`, not `%s`.").format(name, input.type));
894 
895             out_s = input.get!(SparseF[]);
896         }
897     }
898 
899     override NeuralLayer dup()
900     {
901         auto cp = new Data!TYPE(dim_out);
902         cp.set_name(name);
903         return cp;
904     }
905 
906     override void share_params(NeuralLayer l)
907     {
908         auto c = l.to!(Data!TYPE);
909         input = c.input;
910     }
911 }
912 
913 /**
914  * Input layer representing a dense float[]
915 Example:
916 -----------------
917 auto l = DenseData(50); // this layer will feed a 50-dimension dense float[] to its children.
918 -----------------
919  *
920  */
921 class DenseData : Data!(LayerT.DENSE)
922 {
923     this(){super();}
924     this(ulong dim_out)
925     {
926         super(dim_out);
927     }
928     mixin opCallNew; 
929 }
930 
931 /**
932  * Input layer representing a sparse array SparseF[] of (uint, float) pairs
933 Example:
934 -----------------
935 auto l = SparseData(100); // 100 is the total dimensionality of the input space,
936 // which means that the indices of the pairs SparseF are <= 100. For example,
937 // [(13, 4.7), (2, -0.12), (87, 0.6)]
938 -----------------
939  *
940  */
941 class SparseData : Data!(LayerT.SPARSE)
942 {
943     this(){super();}
944     this(ulong dim_out)
945     {
946         super(dim_out);
947     }
948     mixin opCallNew;
949 }
950 
951 class NotImplementedException : Exception
952 {
953     this(string msg)
954     {
955         super(msg);
956     }
957 }