1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 The `module` module provides a modular architecture to build a neural network.
24
25 These modules can be either `Standalone` modules, i.e. modules that doesn't have
26 submodules, or `Container` modules, i.e. modules that are composed of one or
27 several other module(s). The `Container` modules are typically used to create
28 arbitrarily complex neural network architectures, while `Standalone` modules
29 acts as `Linear` regression layers or non-linear `Activation` layers.
30
31 The currently implemented `Standalone` modules are :
32 - `Linear`
33 - `Sigmoid`
34 - `Softmax`
35 - `Tanh`
36
37 The currently implemented `Container` modules are :
38 - `Concat`
39 - `Sequential`
40
41 See their respective documentations for more details about their use.
42
43 :see: `criterion`, `network`
44 """
45
46 import theano
47 import theano.tensor as T
48 import numpy as np
49 import warnings
50 import cPickle as pickle
51 from crino.criterion import Criterion
52 from theano.compile.sharedvalue import SharedVariable
53
55 """
56 Loads and returns a `Module` previously saved with `Module.save` function.
57
58 :Parameters:
59 filename : str
60 The path to the saved module.
61 """
62 return pickle.load(open(filename, 'rb'))
63
65 """
66 A `Module` is a part of a neural network architecture,
67 that may have parameters. Provided an input vector of
68 fixed size, a module is able to compute an output vector,
69 which size is specified at construction.
70 According to their characteristics, several modules can
71 be combined to build a new module.
72
73 If a criterion is given to a module, it is able to compute
74 the partial gradients of its parameters, in order to perform
75 a gradient descent.
76
77 :attention: This is an abstract class, it must be derived to be used.
78 """
79 - def __init__(self, nOutputs, nInputs=None):
80 """
81 Constructs a new `Module` object.
82
83 :Parameters:
84 nOutputs : int
85 The `outputs` size.
86 nInputs : int
87 The `inputs` size.
88 """
89
90 self.inputs = None
91 """
92 :ivar: The symbolic `inputs` vector of the module, denoted :math:`\mathbf{x}`
93 :type: :theano:`TensorVariable`
94 """
95
96 self.outputs = None
97 """
98 :ivar: The symbolic `outputs` vector of the module, denoted :math:`\mathbf{\hat{y}}`
99 :type: :theano:`TensorVariable`
100 """
101
102 self.nInputs = nInputs
103 """
104 :ivar: The `inputs` size
105 :type: int
106 """
107
108 self.nOutputs = nOutputs
109 """
110 :ivar: The `outputs` size
111 :type: int
112 """
113
114 self.params = []
115 """
116 :ivar: The list of parameters
117 :type: list
118 """
119
120 self.backupParams = []
121 """
122 :ivar: The list of backup parameters
123 :type: list
124 """
125
126 self.prepared = False
127 """
128 :ivar: Indicates whether the module have already been prepared
129 :type: bool
130 """
131
133 """
134 Links the outputs of the previous module to the inputs of the current module.
135
136 :Parameters:
137 previous : `Module`
138 The previous module to be linked with the current module.
139 """
140 if self.prepared and (self.nInputs != previous.nOutputs):
141 raise Exception("This module has already been prepared, you can't change its inputs size.")
142 elif not(previous.outputs) :
143 raise Exception("The inputs module has not been prepared before.")
144 else :
145 self.nInputs = previous.nOutputs
146 self.inputs = previous.outputs
147
148
166
167 - def trainFunction(self, batch_size=1, lr=0.1, downcast=None, shared_x_train=None, shared_y_train=None):
168 """
169 Constructs and compiles a Theano function in order to train the module.
170
171 :Parameters:
172 batch_size : int
173 The size of the batches to use for gradient descent :
174 - 1 for stochastic gradient descent;
175 - :math:`n \in ]1..N_{train}[` for mini-batch gradient descent (:math:`N_{train}` must be a multiple of n);
176 - :math:`N_{train}` for batch gradient descent.
177
178 (:math:`N_{train}` is the total number of training examples)
179 lr : float
180 The learning rate.
181 downcast : bool
182 If true, allows the inputs data to be downcasted
183 (e.g. from double to single precision floats for GPU use).
184 :return: a Theano-function that performs one step of gradient descent
185 :rtype: :theano:`function`
186 """
187
188 shared_sets=False
189 if isinstance(shared_x_train,SharedVariable) and isinstance(shared_y_train,SharedVariable):
190 shared_sets=True
191
192 if self.params and self.criterion:
193 self.gparams = T.grad(self.criterion.expression, self.params)
194
195
196 index = T.lscalar('index')
197 if shared_sets:
198 x_train = shared_x_train
199 y_train = shared_y_train
200 else:
201 x_train = T.matrix('x_train')
202 y_train = T.matrix('y_train')
203
204
205 updates = []
206 for param_i, grad_i in zip(self.params, self.gparams):
207 updates.append((param_i, param_i - lr*grad_i))
208
209
210 if shared_sets:
211 inputs=[index]
212 else:
213 inputs=[x_train, y_train, index]
214
215
216 return theano.function( inputs=inputs, outputs=self.criterion.expression, updates=updates,
217 givens={
218 self.inputs: x_train[index*batch_size:(index+1)*batch_size],
219 self.criterion.targets: y_train[index*batch_size:(index+1)*batch_size]
220 }, allow_input_downcast=downcast)
221 else:
222 return None
223
225 """
226 Constructs and compiles a Theano function in order to compute the criterion on a given set.
227
228 :Parameters:
229
230 downcast : bool
231 If true, allows the inputs data to be downcasted
232 (e.g. from double to single precision floats for GPU use).
233 :return: a Theano-function that performs one step of gradient descent
234 :rtype: :theano:`function`
235 """
236
237 shared_sets=False
238 if isinstance(shared_x_data,SharedVariable) and isinstance(shared_y_data,SharedVariable):
239 shared_sets=True
240
241 if self.params and self.criterion:
242
243
244 if shared_sets:
245 x_data = shared_x_data
246 y_data = shared_y_data
247 else:
248 x_data = T.matrix('x_data')
249 y_data = T.matrix('y_data')
250
251
252 if shared_sets:
253 inputs=[]
254 else:
255 inputs=[x_data, y_data]
256
257
258 return theano.function( inputs=inputs, outputs=self.criterion.expression,
259 givens={
260 self.inputs: x_data,
261 self.criterion.targets: y_data
262 }, allow_input_downcast=downcast)
263 else:
264 return None
265
267 """
268 Constructs and compiles a Theano function in order to compute the forward on a given set.
269
270 :Parameters:
271
272 downcast : bool
273 If true, allows the inputs data to be downcasted
274 (e.g. from double to single precision floats for GPU use).
275 :return: a Theano-function that performs one step of gradient descent
276 :rtype: :theano:`function`
277 """
278
279 shared_sets=False
280 if isinstance(shared_x_data,SharedVariable):
281 shared_sets=True
282
283 if self.prepared:
284
285
286 if shared_sets:
287 x_data = shared_x_data
288 else:
289 x_data = T.matrix('x_data')
290
291
292 if shared_sets:
293 inputs=[]
294 else:
295 inputs=[x_data]
296
297
298 return theano.function( inputs=inputs, outputs=self.outputs,
299 givens={
300 self.inputs: x_data
301 }, allow_input_downcast=downcast)
302 else:
303 return None
304
306 """
307 Performs the forward step on the given test example.
308
309 :Parameters:
310 x_test : :numpy:`ndarray`
311 The test example on which the neural network will compute its outputs.
312
313 :return: a Theano function that performs one step of gradient descent
314 :rtype: :theano:`function`
315 """
316 shared_x_test=theano.shared(x_test)
317 forward = self.forwardFunction(downcast=None,shared_x_data=shared_x_test)
318 return forward()
319
321
322 if self.params and self.backupParams:
323
324 updates = []
325 for param_i, backup_param_i in zip(self.params, self.backupParams):
326 updates.append((backup_param_i, param_i))
327
328
329 return theano.function(inputs=[], updates=updates)
330 else:
331 return None
332
334
335 if self.params and self.backupParams:
336
337 updates = []
338 for param_i, backup_param_i in zip(self.params, self.backupParams):
339 updates.append((param_i, backup_param_i))
340
341
342 return theano.function(inputs=[], updates=updates)
343 else:
344 return None
345
347 """
348 Prepares the module before learning.
349
350 :attention: The inputs must be linked before preparation.
351 """
352 if not self.prepared:
353 if not self.inputs:
354 raise Exception('The inputs of this module has not been linked before preparation.')
355 else:
356 self.prepareGeometry()
357 self.prepareParams()
358 self.prepareBackup()
359 self.prepareOutput()
360 self.prepared = True
361 else:
362 warnings.warn("This module is already prepared.")
363
365 """
366 Sets correctly the geometry (`nInputs` and `nOutputs`) of the potential submodules.
367
368 :attention: It must be implemented in derived classes.
369 """
370 raise NotImplementedError("This class must be derived.")
371
373 """
374 Initializes the `params` of the module and its potential submodules.
375
376 :attention: It must be implemented in derived classes.
377 """
378 raise NotImplementedError("This class must be derived.")
379
381 """
382 Initializes the `backupParams` of the module and its potential submodules.
383 """
384
385 if self.params:
386 for param_i in self.params:
387 theShape=theano.function(inputs=[],outputs=param_i.shape)
388 data=np.zeros(theShape(),dtype=theano.config.floatX)
389 self.backupParams.append(theano.shared(value=data, name='backup_'+param_i.name, borrow=True))
390
392 """
393 Computes the symbolic `outputs` of the module in respect to its `inputs`.
394
395 :attention: It must be implemented in derived classes.
396 """
397 raise NotImplementedError("This class must be derived.")
398
399 - def save(self, filename):
400 """
401 Saves this `Module` to a file.
402
403 :Parameters:
404 filename : str
405 The path where the module is to be saved.
406 """
407 pickle.dump(self, open(filename, 'wb'), protocol=-1)
408
410 """
411 A `Standalone` module computes its `outputs` without relying
412 on other modules, i.e. it doesn't have any submodule.
413
414 :attention: This is an abstract class, it must be derived to be used.
415 """
416
417 - def __init__(self, nOutputs, nInputs=None):
418 """
419 Constructs a new `Standalone` module.
420
421 :Parameters:
422 nOutputs : int
423 The `outputs` size.
424 nInputs : int
425 The `inputs` size.
426 """
427 Module.__init__(self, nOutputs, nInputs)
428
430 """
431 Do nothing, as a standalone module doesn't have submodules.
432 """
433 pass
434
435
437 """
438 A `Linear` module computes its `outputs` as a linear transformation of its `inputs`.
439 It has two `params` : :math:`W \in \mathcal{M}_{n_out \\times n_in}(\mathbb{R})`
440 and :math:`b \in \mathbb{R}^{n_{out}}`.
441
442 The `outputs` expression can be written as follows :
443 :math:`\mathbf{\hat{y}} = W\\times \mathbf{x} + b`
444 """
445
446 - def __init__(self, nOutputs, nInputs=None, W_init=None, b_init=None):
447 """
448 Constructs a new `Linear` module.
449
450 :Parameters:
451 nOutputs : int
452 The `outputs` size.
453 nInputs : int
454 The `inputs` size.
455 W_init : :numpy:`ndarray`
456 The initialization matrix for W.
457 b_init : :numpy:`ndarray`
458 The initialization vector for b.
459 """
460
461 Standalone.__init__(self, nOutputs, nInputs)
462
463 self.W_init = W_init
464 """
465 :ivar: The initialization matrix for `W`.
466 :type: :numpy:`ndarray`
467 """
468
469 self.b_init = b_init
470 """
471 :ivar: The initialization vector for `b`.
472 :type: :numpy:`ndarray`
473 """
474
475 self.W = None
476 """
477 :ivar: The symbolic linear transformation matrix.
478 :type: :theano:`TensorVariable`
479 """
480
481 self.b = None
482 """
483 :ivar: The symbolic offset vector.
484 :type: :theano:`TensorVariable`
485 """
486
488 """
489 Initializes the `W` and `b` `params` of the `Linear` module.
490
491 :Parameters:
492 W : :theano:`TensorVariable`
493 If provided, the `Linear` module will use W as a shared parameter from another module.
494 b : :theano:`TensorVariable`
495 If provided, the `Linear` module will use b as a shared parameter from another module.
496
497 :attention: `W_init` and `b_init` values will be ignored if existing W and b are passed, since they have already been initialized in another module.
498 """
499
500 if W:
501 self.W = W
502 else:
503 if (self.W_init == None):
504 ext = np.sqrt(6./(self.nInputs + self.nOutputs))
505 self.W_init = np.asarray(np.random.uniform(low=-ext,
506 high=ext, size=(self.nInputs, self.nOutputs)),
507 dtype=theano.config.floatX)
508 self.W = theano.shared(value=self.W_init, name='W', borrow=True)
509
510 if b:
511 self.b = b
512 else:
513 if (self.b_init == None):
514 self.b_init = np.zeros((self.nOutputs,), dtype=theano.config.floatX)
515 self.b = theano.shared(value=self.b_init, name='b', borrow=True)
516
517 self.params = [self.W, self.b]
518
520 """
521 Computes the linear relation :math:`\mathbf{\hat{y}} = W\\times \mathbf{x} + b`
522 """
523 self.outputs = T.dot(self.inputs, self.W) + self.b
524
525
527 """
528 A `Container` module computes its `outputs` thanks to
529 other modules, i.e. it includes several submodules.
530 The way these submodules are organized depends on the
531 implementation.
532
533 :attention: This is an abstract class, it must be derived to be used.
534 """
535
536 - def __init__(self, mods=[], nInputs=None):
537 """
538 Constructs a new `Container` module.
539
540 :Parameters:
541 mods : list
542 A list of submodules to add to the container.
543 nInputs : int
544 The `inputs` size.
545 """
546 Module.__init__(self, None, nInputs)
547
548 self.modules=[]
549 """
550 :ivar: The list of submodules
551 :type: list
552 """
553 if mods:
554 map(self.add, mods)
555
556 - def add(self, module):
557 """
558 Adds a module to the `Container`.
559
560 :Parameters:
561 module : `Module`
562 The submodule to add.
563 """
564 self.modules.append(module)
565
566
568 """
569 A `Sequential` module computes its `outputs` sequentially,
570 i.e. each `outputs` of its submodules is linked to the `inputs`
571 of the following module. The number of submodules in the sequence
572 can be chosen arbitrarily, but the `inputs` and `outputs` sizes
573 must be the same throughout the sequence.
574
575 .. image:: ../images/sequential.png
576
577 """
578 - def __init__(self, mods=[], nInputs=None):
579 """
580 Constructs a new `Sequential` container.
581
582 :Parameters:
583 mods : list
584 A list of submodules to add to the sequence. They will be linked in the same order as provided.
585 nInputs : int
586 The `inputs` size of the sequence (and of all the submodules).
587 """
588 Container.__init__(self, mods, nInputs)
589
591 """
592 Sets the same `inputs` and `outputs` size for all submodules,
593 and prepare their internal geometry.
594 """
595
596 if(self.modules):
597
598 self.nOutputs = self.modules[-1].nOutputs
599
600 first = self.modules[0]
601 first.nInputs = self.nInputs
602 first.prepareGeometry()
603 for base, new in zip(self.modules, self.modules[1:]):
604 new.nInputs = base.nOutputs
605 new.prepareGeometry()
606
608 """
609 Initializes the `params` of the submodules. The `Sequential` module `params` will include the `params` of its submodules .
610 """
611
612 if(self.modules):
613 for mod in self.modules:
614 mod.prepareParams()
615 self.params.extend(mod.params)
616
618 """
619 Computes sequentially the symbolic `outputs` of the module.
620 """
621 if(self.modules):
622 first = self.modules[0]
623 first.inputs = self.inputs
624 first.prepareOutput()
625 first.prepared = True
626 for base, new in zip(self.modules, self.modules[1:]):
627 new.linkModule(base)
628 new.prepareOutput()
629 new.prepared = True
630 self.outputs = self.modules[-1].outputs
631
632
634 """
635 A `Concat` module computes its `outputs` in parallel,
636 i.e. it subdivides its `inputs` in :math:`n \in \mathbb{N}`
637 parts of fixed sizes. The sum of the submodules `inputs`
638 sizes must equal the total `inputs` size of the `Concat`.
639
640 .. image:: ../images/concat.png
641
642 """
643 - def __init__(self, mods=[], nInputs=None):
644 """
645 Constructs a new `Concat` container.
646
647 :Parameters:
648 mods : list
649 A list of submodules to add to the concat. Each one will receive a part of the `inputs`.
650 nInputs : int
651 The `inputs` size of the concat.
652 """
653 Container.__init__(self, mods, nInputs)
654
656 """
657 Sets the `outputs` size for the `Concat`, and prepare the submodules internal geometry.
658 """
659 if(self.modules):
660
661 self.nOutputs = reduce(lambda x,y: x+y.nOutputs, self.modules, 0)
662
663 nInputs = reduce(lambda x,y: x+y.nInputs, self.modules, 0)
664 if (nInputs != self.nInputs):
665 raise Exception("The total inputs sizes of the sub-modules is wrong.")
666
667 map(lambda x:x.prepareGeometry(), self.modules)
668
670 """
671 Initializes the `params` of the submodules. The `Sequential` module `params` will include the `params` of its submodules .
672 """
673 if(self.modules):
674 for mod in self.modules:
675 mod.prepareParams()
676 self.params.extend(mod.params)
677
679 """
680 Computes the symbolic `outputs` of all the submodules, and concatenate them to get the complete `outputs`.
681 """
682
683 if(self.modules):
684 temp = 0
685 for mod in self.modules:
686 mod.linkInputs(self.inputs[:,temp:temp+mod.nInputs], mod.nInputs)
687 temp += mod.nInputs
688 mod.prepareOutput()
689 mod.prepared = True
690 self.outputs = T.concatenate(map(lambda x: x.outputs, self.modules), axis=1)
691
692
694 """
695 An `Activation` module computes its `outputs` without any parameter, but with a function
696 :math:`f \,:\, \mathbb{R}^n -> \mathbb{R}^n` applied to its `inputs` vector.
697 This function is generally non-linear, because its purpose is to provide non-linearity
698 to the neural network.
699 """
700
701 - def __init__(self, nOutputs, nInputs=None):
702 """
703 Constructs a new `Activation` module.
704
705 :Parameters:
706 nOutputs : int
707 The `outputs` size.
708 nInputs : int
709 The `inputs` size.
710 """
711
712 Standalone.__init__(self, nOutputs, nInputs)
713
715 """
716 Do nothing, as an activation module doesn't have `params`.
717 """
718 pass
719
720 -class Tanh(Activation):
721 """
722 A `Tanh` activation module computes its `outputs` with the
723 non-linear element-wise hyperbolic tangent function, that can be defined as
724 :math:`tanh(\mathbf{x}) = [\dfrac{exp(x_i)-exp(-x_i)}{exp(x_i)+exp(-x_i)}]_{i=1}^n`,
725 with :math:`\mathbf{x} = [x_1, x_2, \dots, x_n] \in \mathbb{R}^n`.
726 """
727 - def __init__(self, nOutputs, nInputs=None):
728 """
729 Constructs a new `Tanh` activation module.
730
731 :Parameters:
732 nOutputs : int
733 The `outputs` size.
734 nInputs : int
735 The `inputs` size.
736 """
737 Activation.__init__(self, nOutputs, nInputs=None)
738
740 """
741 Computes the tanh function :math:`\mathbf{\hat{y}} = = [\dfrac{exp(x_i)-exp(-x_i)}{exp(x_i)+exp(-x_i)}]_{i=1}^n`
742 """
743 self.outputs = T.tanh(self.inputs)
744
746 """
747 A `Sigmoid` activation module computes its `outputs` with the
748 non-linear element-wise sigmoid function, that can be defined as
749 :math:`\sigma(\mathbf{x}) = (1+tanh(\mathbf{x}/2))/2 = [1/(1+exp(-x_i))]_{i=1}^n`,
750 with :math:`\mathbf{x} = [x_1, x_2, \dots, x_n] \in \mathbb{R}^n`.
751 """
752 - def __init__(self, nOutputs, nInputs=None):
753 """
754 Constructs a new `Sigmoid` activation module.
755
756 :Parameters:
757 nOutputs : int
758 The `outputs` size.
759 nInputs : int
760 The `inputs` size.
761 """
762 Activation.__init__(self, nOutputs, nInputs)
763
765 """
766 Computes the sigmoid function :math:`\mathbf{\hat{y}} = [1/(1+exp(-x_i))]_{i=1}^n`
767 """
768 self.outputs = T.nnet.sigmoid(self.inputs)
769
771 """
772 A `Softmax` activation module computes its `outputs` with the
773 non-linear softmax function, that can be defined as
774 :math:`softmax(\mathbf{x}) = [exp(x_i)/\sum_{i=1}^n exp(x_i)]_{i=1}^n`,
775 with :math:`\mathbf{x} = [x_1, x_2, \dots, x_n] \in \mathbb{R}^n`.
776 """
777
778 - def __init__(self, nOutputs, nInputs=None):
779 """
780 Constructs a new `Softmax` activation module.
781
782 :Parameters:
783 nOutputs : int
784 The `outputs` size.
785 nInputs : int
786 The `inputs` size.
787 """
788 Activation.__init__(self, nOutputs, nInputs)
789
791 """
792 Computes the softmax function :math:`\mathbf{\hat{y}} = [exp(x_i)/\sum_{i=1}^n exp(x_i)]_{i=1}^n`
793 """
794 self.outputs = T.nnet.softmax(self.inputs)
795