1 /** 2 * NeuralNet is the main abstraction of vectorflow. 3 * 4 * Copyright: 2017 Netflix, Inc. 5 * License: $(LINK2 http://www.apache.org/licenses/LICENSE-2.0, Apache License Version 2.0) 6 */ 7 module vectorflow.neuralnet; 8 9 private 10 { 11 import std.algorithm : canFind, countUntil, map, startsWith, sum; 12 import std.array : split; 13 import std.conv : text, to; 14 import std.file : exists, FileException, remove; 15 import std.format : format; 16 import std.meta : anySatisfy, Filter, staticSort; 17 import std.stdio : File, writeln; 18 19 import std.range.primitives : isForwardRange, isInputRange, ElementType; 20 import std.traits : isAggregateType, isNumeric; 21 import std.variant; 22 23 import vectorflow.layers; 24 import vectorflow.neurallayer; 25 import vectorflow.serde; 26 import vectorflow.optimizers; 27 import vectorflow.losses; 28 import vectorflow.utils : ct_msg, opCallNew; 29 } 30 31 32 /*** 33 * Neural-network abstraction. 34 Example: 35 ----------------- 36 auto nn = NeuralNet() 37 .stack(DenseData(400)) 38 .stack(Linear(10)); 39 // nn is a network working on 400-dimensions dense vectors and predicting 40 // a 10-dimensions vector 41 ----------------- 42 */ 43 class NeuralNet { 44 45 /// array of all the roots 46 InputLayer[] roots; 47 /// all nodes of the computational graph 48 NeuralLayer[] layers; 49 /// map: name --> layer 50 NeuralLayer[string] layers_map; 51 /// edges of the graph: src -> [dst1, ..., dstk] 52 string[][string] edges; 53 /// array of all the leaves 54 NeuralLayer[] leaves; 55 /// reference to the leaf of the net 56 @property NeuralLayer out_layer(){return leaves[0];} 57 58 private bool _ever_initialized; 59 60 this() 61 { 62 _ever_initialized = false; 63 } 64 mixin opCallNew; 65 66 /** 67 * Name and add a root to the net. 68 * 69 * Params: 70 * name_ = name to give to the layer. 71 * layer = input layer to add as root to the net. 72 * 73 * Returns: current neural network with the newly added layer. 74 */ 75 NeuralNet add_root(string name_, InputLayer layer) 76 { 77 check_name(name_); 78 layer.name = name_; 79 return add_root(layer); 80 } 81 82 /** 83 * Add a root to the net. 84 * 85 * Params: 86 * root_ = input layer to add as root to the net. 87 * 88 * Returns: current neural network with the newly added layer. 89 */ 90 NeuralNet add_root(InputLayer root_) 91 { 92 roots ~= root_; 93 add(root_); 94 return this; 95 } 96 97 /** 98 * Name and add a layer to the net, without wiring it. 99 * 100 * Params: 101 * name_ = name to give to the layer. 102 * layer = which layer to add to the net. 103 * opt = optional optimizer to use for this layer. 104 * 105 * Returns: current neural network with the newly added layer. 106 */ 107 NeuralNet add(string name_, NeuralLayer layer, Optimizer opt = null) 108 { 109 check_name(name_); 110 layer.name = name_; 111 return add(layer, opt); 112 } 113 114 /** 115 * Add a layer to the net, without wiring it. 116 * 117 * Params: 118 * layer = which layer to add to the net. 119 * opt = optional optimizer to use for this layer. 120 * 121 * Returns: current neural network with the newly added layer. 122 */ 123 NeuralNet add(NeuralLayer layer, Optimizer opt = null) 124 { 125 if(roots.length == 0) 126 { 127 if((cast(InputLayer)layer) is null) 128 throw new Exception( 129 "First layer added has to be an InputLayer."); 130 if(opt !is null) 131 throw new Exception( 132 "A root is not learnable, it cannot have an optimizer."); 133 add_root(cast(InputLayer)layer); 134 } 135 else 136 { 137 if(layer.name == "") 138 layer.name = generate_name(); 139 if(layer.name in layers_map) 140 throw new Exception("A layer with the name `" ~ 141 layer.name ~ "` already exist."); 142 layers_map[layer.name] = layer; 143 layers ~= layer; 144 leaves ~= layer; 145 } 146 if(opt !is null) 147 layer.set_optimizer(opt); 148 149 return this; 150 } 151 152 /** 153 * Stack a layer on top of the former leaf of the net. 154 * 155 * Params: 156 * layer = which layer to add to the net. It will be wired to the 157 * previous leaf. 158 * opt = optional optimizer to use for this layer. 159 * 160 * Returns: current neural network with the newly added layer. 161 */ 162 NeuralNet stack(NeuralLayer layer, Optimizer opt = null) 163 { 164 if(leaves.length > 1) 165 throw new Exception("Your current net is not a stack."); 166 add(layer, opt); 167 if(layers.length >= 2) 168 { 169 // wire to previous 170 auto previous = layers[$-2]; 171 wire(previous, layer); 172 } 173 return this; 174 } 175 176 /** 177 * Stack a layer on top of the former leaf of the net. 178 * 179 * Params: 180 * name_ = name to give to the layer 181 * layer = which layer to add to the net. It will be wired to the 182 * previous leaf. 183 * opt = optional optimizer to use for this layer. 184 * 185 * Returns: current neural network with the newly added layer. 186 */ 187 NeuralNet stack(string name_, NeuralLayer layer, Optimizer opt = null) 188 { 189 check_name(name_); 190 layer.name = name_; 191 return stack(layer, opt); 192 } 193 194 /** 195 * Compute the prediction of the net for $(PARAM v). 196 * Runs forward-propagation and outputs the predicted vector. 197 * 198 * Params: 199 * v = observation with one or multiple `features*` attributes 200 * which have the types expected by the roots in proper order 201 * (i.e: float[], SparseF[], SparseFG[], custom roots types...) 202 * 203 * Returns: array of last layer neurons values 204 * 205 * Example: 206 * --- 207 * struct O { 208 * float[] features_foo; 209 * } 210 * net.predict(O([1.2f, 0.7f])); 211 * --- 212 */ 213 float[] predict(T)(T v) if(isAggregateType!T && isLearnableRow!T) 214 { 215 enum Comp(string F1, string F2) = F1 < F2; 216 alias feats_fields = staticSort!( 217 Comp, Filter!(isFeaturesField, __traits(allMembers, T))); 218 assert(feats_fields.length == roots.length, 219 "Number of `features*` fields should match number of roots."); 220 reset(); 221 foreach(root_id, field; feats_fields) 222 roots[root_id].forward_prop(mixin("v." ~ field)); 223 return output; 224 } 225 226 /** 227 * Compute the prediction of the net when passing the arguments to the 228 * root(s) of the net. 229 * 230 * Params: the data to feed to the roots in proper order 231 * 232 * Returns: array of last layer neurons values 233 * 234 * Examples: 235 * --- 236 * // net with a single DenseData(2) root: 237 * net.predict([3.2f, -1.5f]); 238 * // net with a single SparseData(dim >= 34) root: 239 * net.predict([SparseF(34, -0.7f), SparseF(3, 0.2f)]); 240 * // net with one DenseData(1), one SparseData(dim >= 16) root: 241 * net.predict([0.2f], [SparseF(16, -0.15f)]); 242 * --- 243 */ 244 float[] predict(T...)(T args) 245 { 246 assert(args.length == roots.length, 247 "The number of arguments should match the number of roots."); 248 reset(); 249 foreach(i, v; args) 250 roots[i].forward_prop(v); 251 return output; 252 } 253 254 /** 255 * Create a directed edge between `parent` and `child` nodes. 256 * 257 * Params: 258 * parent = name of origin layer 259 * child = name of destination layer 260 * with_alloc = whether or not both layers should allocate internal 261 * parameters 262 */ 263 void wire(string parent, string child, bool with_alloc = true) 264 { 265 check_layer_here(parent); 266 check_layer_here(child); 267 268 auto p = layers_map[parent]; 269 auto c = layers_map[child]; 270 wire(p, c, with_alloc); 271 } 272 273 /** 274 * Create a directed edge between `parent` and `child` nodes. 275 * 276 * Params: 277 * parent = origin layer 278 * child = destination layer 279 * with_alloc = whether or not both layers should allocate internal 280 * parameters 281 */ 282 void wire(NeuralLayer parent, NeuralLayer child, bool with_alloc = true) 283 { 284 check_layer_here(parent.name); 285 check_layer_here(child.name); 286 if(parent.name in edges && edges[parent.name].canFind(child.name)) 287 throw new Exception( 288 "The edge `" ~ 289 parent.name ~ "` -> `" ~ child.name ~ 290 "` has already been added to the graph."); 291 parent.children ~= child; 292 child.parents ~= parent; 293 foreach(l; layers) 294 l.recompute_topology(); 295 if(with_alloc) 296 { 297 parent.allocate_interface(); 298 parent.allocate_params(); 299 parent.allocate_grad_params(); 300 child.allocate_interface(); 301 child.allocate_params(); 302 child.allocate_grad_params(); 303 } 304 edges[parent.name] ~= child.name; 305 306 // remove parent from the leaves array if it was already there: 307 auto ind_leaf = leaves.countUntil!(l => l.name == parent.name); 308 if(ind_leaf != -1) 309 { 310 if(leaves.length == 1) 311 leaves.length = 0; 312 if(ind_leaf == 0) 313 leaves = leaves[1..$]; 314 else if(ind_leaf == leaves.length - 1) 315 leaves = leaves[0..$-1]; 316 else 317 leaves = leaves[0..ind_leaf] ~ leaves[ind_leaf+1..$]; 318 } 319 320 optimize_graph(this); 321 } 322 323 protected void check_layer_here(string name) 324 { 325 if(name in layers_map) 326 return; 327 throw new Exception(text( 328 "Layer `", name, 329 "` is unknown. Add it to the net first if you ", 330 "want to wire it.\nCurrent net: ", this)); 331 } 332 333 /** 334 * Initialize at random all the parameters of the net. 335 * 336 * Params: 337 * rand_scale = parameters values drawn in ]-rand_scale, rand_scale[ 338 */ 339 void initialize(double rand_scale) 340 { 341 _ever_initialized = true; 342 foreach(l; layers) 343 l.init(rand_scale); 344 } 345 346 /** 347 * Return a reference to the dense output vector of the leaf of the net. 348 */ 349 @property float[] output(){ return out_layer.out_d; } 350 351 void backward_prop(V)(V[] output_grad) 352 if ((is(V == float) || is(V == SparseF))) 353 { 354 out_layer.backward_prop(output_grad); // backpropagation 355 } 356 357 /** 358 * Return the total number of learnable parameters in the net. 359 */ 360 @property ulong num_params() 361 { 362 return layers.map!(l => l.num_params).sum; 363 } 364 365 /** 366 * Reset any internal state variables of the net. 367 */ 368 void reset() 369 { 370 foreach(l; layers) 371 l.reset(); 372 } 373 374 /** 375 * Remove any optimizer defined on layers of the net. 376 */ 377 void clear_opt() 378 { 379 foreach(l; layers) 380 l.set_optimizer(null); 381 } 382 383 /** 384 * Discard local weights and use those of the target net instead. 385 * However, the net keeps its own internal state. 386 * Useful for hogwild SGD implementation. 387 * 388 * Params: 389 * net = NeuralNet whose parameters should be used. 390 */ 391 void share_params(NeuralNet net) 392 { 393 foreach(i, ref l; layers) 394 if((cast(InputLayer)l) is null) 395 l.share_params(net.layers[i]); 396 } 397 398 /** 399 * Train neural network on some data, using specified gradient callback and 400 * optimizer. 401 * 402 * Params: 403 * data = forward range of rows 404 * grad_f = gradient callback (see losses.d for details) 405 * opt = optimizer to use on all learnable layers for training 406 * verbose = whether or not to show progress during training 407 * num_cores = degree of Hogwild parallelism 408 */ 409 void learn(D, T, V, R, S, O : Optimizer)( 410 D data, 411 S delegate(R net_out, ref T ex, ref V[] grad) grad_f, 412 O opt, bool verbose = false, uint num_cores = 1) 413 if(isForwardRange!D && is(ElementType!D == T) // dataset constraints 414 && (is(V == float) || is(V == SparseF)) 415 && (is(R == float[]) || is(R == NeuralNet)) 416 && (isNumeric!S || is(S == void))) 417 { 418 static if(!isAggregateType!T || !isLearnableRow!T) 419 { 420 static assert(0, text( 421 "Your rows are invalid. Rows should be of an aggregate type (", 422 "struct, class, union or interface) and have at least one ", 423 "attribute or property whose name starts with `features`: ", 424 "that's the data that will be forward-propagated into the ", 425 "computational graph. If your graph has multiple roots, the ", 426 "lexicographic order of the attributes starting with ", 427 "`features` will be used to map them to the roots of ", 428 "the graph, in the original order these roots were added to ", 429 "the graph.")); 430 } 431 432 if(!_ever_initialized) 433 { 434 writeln("Net not initialized. Initializing all weights to 0."); 435 initialize(0.0); 436 } 437 { 438 foreach(l; layers) 439 { 440 if(!l.learnable) 441 continue; 442 if(!l.optimizer_set) 443 { 444 auto opt_cp = opt.dup; 445 l.set_optimizer(opt_cp); 446 opt_cp.register(l); 447 } 448 else 449 { 450 l.set_optimizer(l.optimizer); 451 l.optimizer.register(l); 452 } 453 } 454 // this is just to drive the learning, but each node 455 // has its own copy and optimization variables in a SGD setting 456 } 457 458 auto cores_str = ( 459 num_cores == 1 ? "1 core." : "%d cores.".format(num_cores)); 460 writeln("Training net with ", num_params, " parameters on ", cores_str); 461 foreach(l; layers) 462 l.pre_learning(); 463 opt.learn(this, data, grad_f, verbose, num_cores); 464 foreach(l; layers) 465 l.post_learning(); 466 } 467 468 /** 469 * Train neural network on a dataset, using a predefined loss. 470 * 471 * Params: 472 * data = forward range of rows 473 * loss = one of the predefined loss functions 474 * opt = optimizer to use on all learnable layers for training 475 * verbose = whether or not to show progress during training 476 * num_cores = degree of Hogwild parallelism 477 * monitor_loss = whether or not loss value should be tracked during 478 * training for monitoring (slightly slower) 479 */ 480 void learn(D, O : Optimizer)(D data, string loss, 481 O opt, bool verbose = false, uint num_cores = 1, 482 bool monitor_loss = true) 483 if(isForwardRange!D) 484 { 485 if(monitor_loss) 486 { 487 learn(data, get_grad!(ElementType!D, true)(loss), 488 opt, verbose, num_cores); 489 } 490 else 491 { 492 learn(data, get_grad!(ElementType!D, false)(loss), 493 opt, verbose, num_cores); 494 } 495 } 496 497 /** 498 * Train neural network on some data, using a gradient callback. 499 * 500 * Assumes that an optimizer has already been specified on all learnable 501 * layers. 502 * 503 * Params: 504 * data = forward range of rows 505 * grad_f = gradient callback (see losses.d for details) 506 * verbose = whether or not to show progress during training 507 * num_cores = degree of Hogwild parallelism 508 */ 509 void learn(D, T, V, R, S)( 510 D data, 511 float delegate(R net_out, ref T ex, ref V[] grad) grad_f, 512 bool verbose = false, uint num_cores = 1) 513 { 514 check_all_layers_have_optimizer(); 515 auto driver = new ShadowSGDOptimizer(this); 516 517 learn(data, grad_f, driver, verbose, num_cores); 518 } 519 520 /** 521 * Train neural network on some data, using a predefined loss. 522 * 523 * Assumes that an optimizer has already been specified on all learnable 524 * layers. 525 * 526 * Params: 527 * data = forward range of rows 528 * loss = one of the predefined loss functions 529 * verbose = whether or not to show progress during training 530 * num_cores = degree of Hogwild parallelism 531 */ 532 void learn(D)(D data, string loss, bool verbose = false, uint num_cores = 1) 533 { 534 check_all_layers_have_optimizer(); 535 auto driver = new ShadowSGDOptimizer(this); 536 537 learn(data, loss, driver, verbose, num_cores); 538 } 539 540 override string toString() 541 { 542 string s = "NeuralNet[" ~ this.num_params.to!string ~ " parameters]\n"; 543 foreach(l; layers) 544 s ~= (l.name ~ "|" ~ l.to!string ~ "\n"); 545 return s[0..$-1]; 546 } 547 548 private void check_name(string name_) 549 { 550 if(name_.length == 0) 551 throw new Exception("You must specify a non-empty name"); 552 else if(name_.canFind(',')) 553 throw new Exception( 554 "Name of layers cannot contain commas: `" ~ name_ ~ "`."); 555 } 556 557 static bool is_upstream_stack(NeuralLayer layer) 558 { 559 bool is_stack = layer.parents.length <= 1 && layer.children.length <= 1; 560 foreach(p; layer.parents) 561 is_stack &= is_upstream_stack(p); 562 return is_stack; 563 } 564 565 private void check_all_layers_have_optimizer() 566 { 567 string not_set; 568 foreach(l; layers) 569 { 570 if(l.learnable && !l.optimizer_set()) 571 not_set ~= l.name ~ ","; 572 } 573 if(not_set != "") 574 throw new Exception( 575 "You haven't specified an optimizer for the following " ~ 576 "learnable layers: " ~ not_set[0..$-1]); 577 } 578 579 private string generate_name() 580 { 581 return "layer" ~ to!string(layers.length + 1); 582 } 583 584 /** 585 * Dump the neural net (topology and weight values) to the specified path. 586 * 587 * Params: 588 * path = where to dump the neural net. 589 */ 590 void serialize(string path) 591 { 592 auto f = File(path, "w"); 593 scope(exit) f.close(); 594 scope(failure) 595 { 596 f.close(); 597 try 598 { 599 writeln("Serialization failed."); 600 remove(path); 601 } 602 catch(FileException e) 603 { 604 writeln("Couldn't cleanup `", path, 605 "` after serialization failure: ", e); 606 } 607 } 608 609 auto ser = new Serializer(&f); 610 611 // serialize root names 612 ser.write(roots.length); 613 foreach(r; roots) 614 ser.write(r.name); 615 616 // serialize edges 617 ser.write(edges.length); 618 foreach(p; edges.byKeyValue()) 619 { 620 ser.write(p.value.length); 621 foreach(child; p.value) 622 { 623 ser.write(p.key ~ "," ~ child); // parent,child 624 } 625 } 626 627 // now serialize layers 628 foreach(l; layers) 629 l.ser(ser); 630 } 631 632 /** 633 * Deserialize the neural net from the specified path. 634 * 635 * Params: 636 * path = file path of the neural net to read. 637 */ 638 static NeuralNet deserialize(string path) 639 { 640 if(!exists(path)) 641 throw new Exception("File does not exists: " ~ path); 642 auto f = File(path, "r"); 643 scope(exit) f.close(); 644 scope(failure) f.close(); 645 646 auto nn = new NeuralNet(); 647 648 auto deser = new Serializer(&f); 649 650 // deserialize root names 651 bool[string] root_names; 652 auto num_roots = deser.read!ulong(); 653 foreach(_; 0..num_roots) 654 root_names[deser.read!string()] = true; 655 656 // deserialize edges 657 string[][string] edges; 658 auto num_parents = deser.read!ulong(); 659 foreach(_; 0..num_parents) 660 { 661 auto num_children = deser.read!ulong(); 662 foreach(__; 0..num_children) 663 { 664 auto edge = deser.read!string(); 665 auto toks = edge.split(','); 666 edges[toks[0]] ~= toks[1]; 667 } 668 } 669 670 // deserialize all layers 671 auto layers = deser.deserialize_layers(); 672 foreach(l; layers) 673 { 674 if(l.name in root_names) 675 nn.add_root(cast(InputLayer)l); 676 else 677 nn.add(l); 678 } 679 680 foreach(p; edges.byKeyValue()) 681 foreach(child; p.value) 682 nn.wire(p.key, child, false); 683 foreach(l; nn.layers) 684 { 685 if(l.type ==LayerT.DENSE) 686 l.out_d.length = l.dim_out; 687 l.allocate_interface(); 688 } 689 690 return nn; 691 } 692 693 /** 694 * Return a copy of the net. 695 * 696 * Params: 697 * topology_only = whether or not the copy should be shallow 698 */ 699 NeuralNet dup(bool topology_only = false) 700 { 701 auto cp = new NeuralNet(); 702 703 bool[string] root_names; 704 foreach(r; roots) 705 { 706 root_names[r.name] = true; 707 cp.add_root(r.name, cast(InputLayer)r.dup); 708 } 709 foreach(l; layers) 710 { 711 if(l.name !in root_names) 712 { 713 auto lcp = l.dup; 714 if(l.optimizer) 715 lcp.set_optimizer(l.optimizer.dup); 716 cp.add(l.name, lcp); 717 } 718 } 719 720 foreach(p; edges.byKeyValue()) 721 foreach(child; p.value) 722 cp.wire(p.key, child, !topology_only); 723 if(!topology_only) 724 foreach(l; cp.layers) 725 { 726 l.allocate_interface(); 727 l.allocate_params(); 728 } 729 return cp; 730 } 731 } 732 733 package template isFeaturesField(string s) 734 { 735 enum isFeaturesField = s.startsWith("features"); 736 } 737 738 package template isLearnableRow(T) 739 { 740 enum isLearnableRow = anySatisfy!( 741 isFeaturesField, __traits(allMembers, T)); 742 } 743 744 745 private void optimize_graph(NeuralNet net) 746 { 747 foreach(layer; net.layers) 748 { 749 if(auto l = cast(Linear)layer) 750 { 751 foreach(p; l.priors) 752 p.register(l); 753 if(l.prox !is null) 754 l.prox.register(l); 755 } 756 } 757 } 758 759 version(assert) 760 { 761 static this() 762 { 763 ct_msg!("Non-release build."); 764 } 765 }