1 /**
2  The library supports already implemented loss functions, as well as
3  a callback-based way to specify a custom loss.
4  
5  The losses pre-implemented are: `logistic`, `square`, `multinomial`.
6 
7  For these losses, if an attribute `.weight` is found in the row, it will
8  be used to weight the loss during MLE.
9 
10  If one wants to specify a custom loss function, one has to implement a gradient
11  callback of the form `S delegate(R net_out, ref T ex, ref V[] grad)` which
12  is expected to populate in `grad` the gradient of the loss on datapoint `ex`
13  with respect to the output of the net `net_out`.
14 
15  `S` is `void` or numeric (float, double, int...).
16     If numeric, the callback is expected to return the loss value on
17     training sample `ex` for monitoring purposes.
18     
19  `R` is `float[]` or `NeuralNet`. If `float[]`, the net is expected to have
20     a single leaf and the callback receives the predictions of the leaf after
21     forward-prop. If `NeuralNet`, the callback receives a reference of the net
22     after forward-prop. Useful in case the loss function depends on multiple
23     layers values.
24 
25  `T` is the templatized row. This row needs at minimum to have an attribute
26     starting with the name `feature` to be able to perform forward-prop.
27 
28  `V` is `float[]` or `SparseF[]`. If `float`, the backpropagation will be
29     ran densely. If `SparseF[]`, the last layer will be sparsely backpropagated.
30     More efficient when the gradient is sparse and the output dimension large.
31 
32  Examples:
33  ---
34  // median (L1) loss: minimize absolute differences
35  auto loss_grad = float delegate(float[] nn_out, ref Obs o, ref float[] grads)
36  {
37     auto pred = nn_out[0]; // this is the predictions of the net
38     // after forward-prop
39     if(pred > o.label) // gradient of |pred - label| with respect to pred
40         grads[0] = 1.0f;
41     else
42         grads[0] = -1.0f;
43     
44     return fabs(pred - o.label); // return loss value so it's monitored
45     // during training
46  }
47  net.learn(data, loss_grad, ...);
48  ---
49 
50 
51  Copyright: 2017 Netflix, Inc.
52  License: $(LINK2 http://www.apache.org/licenses/LICENSE-2.0, Apache License Version 2.0)
53 */
54 module vectorflow.losses;
55 
56 private{
57 import vectorflow.math;
58 import vectorflow.utils : ct_msg;
59 }
60 
61 auto get_grad(T, alias WITH_VAL, V...)(string loss, V args)
62 {
63     static if(!__traits(hasMember, T, "label"))
64         static assert(0,
65             "When using a predefined loss, your row needs to have a `label`" ~
66             " attribute.");
67 
68     static if(__traits(hasMember, T, "weight"))
69         ct_msg!("Using `weight` attribute to perform weighted MLE inference");
70     switch(loss)
71     {
72         case "logistic":
73             static if(WITH_VAL == true)
74             {
75                 return delegate float(float[] nn_out, ref T o, ref float[] grads)
76                 {
77                     auto label = o.label > 0 ? 1.0 : -1.0;
78                     auto expp = exp(-label * nn_out[0]);
79                     auto pr = 1.0/(1.0 + expp);
80                     grads[0] = - label * (1.0 - pr);
81                     float loss = log(1.0 + expp);
82                     static if(__traits(hasMember, T, "weight"))
83                     {
84                         grads[0] *= o.weight;
85                         loss *= o.weight;
86                     }
87                     return loss;
88                 };
89             }
90             else
91             {
92                 return delegate void(float[] nn_out, ref T o, ref float[] grads)
93                 {
94                     auto label = o.label > 0 ? 1.0 : -1.0;
95                     auto expp = exp(-label * nn_out[0]);
96                     auto pr = 1.0/(1.0 + expp);
97                     grads[0] = - label * (1.0 - pr);
98                     static if(__traits(hasMember, T, "weight"))
99                         grads[0] *= o.weight;
100                 };
101             }
102         case "square":
103             static if(WITH_VAL == true)
104             {
105                 return delegate float(float[] nn_out, ref T o, ref float[] grads)
106                 {
107                     auto diff = o.label - nn_out[0];
108                     grads[0] = - diff;
109                     static if(__traits(hasMember, T, "weight"))
110                     {
111                         grads[0] *= o.weight;
112                         return 0.5 * diff * diff * o.weight;
113                     }
114                     else
115                         return 0.5 * diff * diff;
116                 };
117             }
118             else
119             {
120             return delegate void(float[] nn_out, ref T o, ref float[] grads)
121             {
122                 auto diff = o.label - nn_out[0];
123                 grads[0] = - diff;
124                 static if(__traits(hasMember, T, "weight"))
125                     grads[0] *= o.weight;
126             };
127 
128             }
129         case "multinomial":
130             static if(WITH_VAL == true)
131             {
132                 return delegate float(float[] nn_out, ref T o, ref float[] grads)
133                 {
134                     double normalizer = 0;
135                     foreach(i; 0..nn_out.length) // number of classes
136                     {
137                         auto expp = exp(nn_out[i]);
138                         normalizer += expp;
139                         grads[i] = expp;
140                     }
141                     double loss = 0;
142                     foreach(i; 0..nn_out.length)
143                     {
144                         auto r = grads[i] / normalizer;
145                         double lab = round(o.label - i) == 0 ? 1.0 : 0.0;
146                         if(lab > 0)
147                             loss += log(r + 1e-9);
148                         grads[i] = r - lab;
149                         static if(__traits(hasMember, T, "weight"))
150                             grads[i] *= o.weight;
151                     }
152                     static if(__traits(hasMember, T, "weight"))
153                         loss *= o.weight;
154                     return -loss;
155                 };
156             }
157             else
158             {
159                 return delegate void(float[] nn_out, ref T o, ref float[] grads)
160                 {
161                     double normalizer = 0;
162                     foreach(i; 0..nn_out.length) // number of classes
163                     {
164                         auto expp = exp(nn_out[i]);
165                         normalizer += expp;
166                         grads[i] = expp;
167                     }
168                     foreach(i; 0..nn_out.length)
169                     {
170                         auto r = grads[i] / normalizer;
171                         double lab = round(o.label - i) == 0 ? 1.0 : 0.0;
172                         grads[i] = r - lab;
173                         static if(__traits(hasMember, T, "weight"))
174                             grads[i] *= o.weight;
175                     }
176                 };
177             }
178         default:
179             throw new Exception("Unknown loss function: " ~ loss ~ ". You " ~
180                     "have to compute the gradient of your loss yourself.");
181     }
182 }