1 /** 2 The library supports already implemented loss functions, as well as 3 a callback-based way to specify a custom loss. 4 5 The losses pre-implemented are: `logistic`, `square`, `multinomial`. 6 7 For these losses, if an attribute `.weight` is found in the row, it will 8 be used to weight the loss during MLE. 9 10 If one wants to specify a custom loss function, one has to implement a gradient 11 callback of the form `S delegate(R net_out, ref T ex, ref V[] grad)` which 12 is expected to populate in `grad` the gradient of the loss on datapoint `ex` 13 with respect to the output of the net `net_out`. 14 15 `S` is `void` or numeric (float, double, int...). 16 If numeric, the callback is expected to return the loss value on 17 training sample `ex` for monitoring purposes. 18 19 `R` is `float[]` or `NeuralNet`. If `float[]`, the net is expected to have 20 a single leaf and the callback receives the predictions of the leaf after 21 forward-prop. If `NeuralNet`, the callback receives a reference of the net 22 after forward-prop. Useful in case the loss function depends on multiple 23 layers values. 24 25 `T` is the templatized row. This row needs at minimum to have an attribute 26 starting with the name `feature` to be able to perform forward-prop. 27 28 `V` is `float[]` or `SparseF[]`. If `float`, the backpropagation will be 29 ran densely. If `SparseF[]`, the last layer will be sparsely backpropagated. 30 More efficient when the gradient is sparse and the output dimension large. 31 32 Examples: 33 --- 34 // median (L1) loss: minimize absolute differences 35 auto loss_grad = float delegate(float[] nn_out, ref Obs o, ref float[] grads) 36 { 37 auto pred = nn_out[0]; // this is the predictions of the net 38 // after forward-prop 39 if(pred > o.label) // gradient of |pred - label| with respect to pred 40 grads[0] = 1.0f; 41 else 42 grads[0] = -1.0f; 43 44 return fabs(pred - o.label); // return loss value so it's monitored 45 // during training 46 } 47 net.learn(data, loss_grad, ...); 48 --- 49 50 51 Copyright: 2017 Netflix, Inc. 52 License: $(LINK2 http://www.apache.org/licenses/LICENSE-2.0, Apache License Version 2.0) 53 */ 54 module vectorflow.losses; 55 56 private{ 57 import vectorflow.math; 58 import vectorflow.utils : ct_msg; 59 } 60 61 auto get_grad(T, alias WITH_VAL, V...)(string loss, V args) 62 { 63 static if(!__traits(hasMember, T, "label")) 64 static assert(0, 65 "When using a predefined loss, your row needs to have a `label`" ~ 66 " attribute."); 67 68 static if(__traits(hasMember, T, "weight")) 69 ct_msg!("Using `weight` attribute to perform weighted MLE inference"); 70 switch(loss) 71 { 72 case "logistic": 73 static if(WITH_VAL == true) 74 { 75 return delegate float(float[] nn_out, ref T o, ref float[] grads) 76 { 77 auto label = o.label > 0 ? 1.0 : -1.0; 78 auto expp = exp(-label * nn_out[0]); 79 auto pr = 1.0/(1.0 + expp); 80 grads[0] = - label * (1.0 - pr); 81 float loss = log(1.0 + expp); 82 static if(__traits(hasMember, T, "weight")) 83 { 84 grads[0] *= o.weight; 85 loss *= o.weight; 86 } 87 return loss; 88 }; 89 } 90 else 91 { 92 return delegate void(float[] nn_out, ref T o, ref float[] grads) 93 { 94 auto label = o.label > 0 ? 1.0 : -1.0; 95 auto expp = exp(-label * nn_out[0]); 96 auto pr = 1.0/(1.0 + expp); 97 grads[0] = - label * (1.0 - pr); 98 static if(__traits(hasMember, T, "weight")) 99 grads[0] *= o.weight; 100 }; 101 } 102 case "square": 103 static if(WITH_VAL == true) 104 { 105 return delegate float(float[] nn_out, ref T o, ref float[] grads) 106 { 107 auto diff = o.label - nn_out[0]; 108 grads[0] = - diff; 109 static if(__traits(hasMember, T, "weight")) 110 { 111 grads[0] *= o.weight; 112 return 0.5 * diff * diff * o.weight; 113 } 114 else 115 return 0.5 * diff * diff; 116 }; 117 } 118 else 119 { 120 return delegate void(float[] nn_out, ref T o, ref float[] grads) 121 { 122 auto diff = o.label - nn_out[0]; 123 grads[0] = - diff; 124 static if(__traits(hasMember, T, "weight")) 125 grads[0] *= o.weight; 126 }; 127 128 } 129 case "multinomial": 130 static if(WITH_VAL == true) 131 { 132 return delegate float(float[] nn_out, ref T o, ref float[] grads) 133 { 134 double normalizer = 0; 135 foreach(i; 0..nn_out.length) // number of classes 136 { 137 auto expp = exp(nn_out[i]); 138 normalizer += expp; 139 grads[i] = expp; 140 } 141 double loss = 0; 142 foreach(i; 0..nn_out.length) 143 { 144 auto r = grads[i] / normalizer; 145 double lab = round(o.label - i) == 0 ? 1.0 : 0.0; 146 if(lab > 0) 147 loss += log(r + 1e-9); 148 grads[i] = r - lab; 149 static if(__traits(hasMember, T, "weight")) 150 grads[i] *= o.weight; 151 } 152 static if(__traits(hasMember, T, "weight")) 153 loss *= o.weight; 154 return -loss; 155 }; 156 } 157 else 158 { 159 return delegate void(float[] nn_out, ref T o, ref float[] grads) 160 { 161 double normalizer = 0; 162 foreach(i; 0..nn_out.length) // number of classes 163 { 164 auto expp = exp(nn_out[i]); 165 normalizer += expp; 166 grads[i] = expp; 167 } 168 foreach(i; 0..nn_out.length) 169 { 170 auto r = grads[i] / normalizer; 171 double lab = round(o.label - i) == 0 ? 1.0 : 0.0; 172 grads[i] = r - lab; 173 static if(__traits(hasMember, T, "weight")) 174 grads[i] *= o.weight; 175 } 176 }; 177 } 178 default: 179 throw new Exception("Unknown loss function: " ~ loss ~ ". You " ~ 180 "have to compute the gradient of your loss yourself."); 181 } 182 }