1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """
23 The `network` module provides some ready-to-use neural network architectures,
24 along with pretraining and supervised learning methods.
25
26 The currently implemented neural network architectures are :
27 - `AutoEncoder` and its counterpart `OutputAutoEncoder`
28 - `MultiLayerPerceptron`
29 - `DeepNeuralNetwork`
30 - `InputOutputDeepArchitecture`
31
32 See their respective documentations for more details about their use.
33
34 :see: `criterion`, `network`
35 """
36
37 import datetime as DT
38 import theano
39 import theano.tensor as T
40 import numpy as np
41 import sys
42
43 from crino.module import Sequential, Linear, Sigmoid, Tanh
44 from crino.criterion import CrossEntropy, MeanSquareError
45
46
48 """
49 A `MultiLayerPerceptron` (MLP) is one classical form of artificial
50 neural networks, whichs aims at predicting one or more output states
51 given some particular inputs. A MLP is a `Sequential` module, made of
52 a succession of `Linear` modules and non-linear `Activation` modules.
53 This tends to make the MLP able to learn non-linear decision functions.
54
55 A MLP must be trained with a supervised learning algorithm in order
56 to work. The gradient backpropagation is by far the most used algorithm
57 used to train MLPs.
58 """
60 """
61 Constructs a new `MultiLayerPerceptron` network.
62
63 :Parameters:
64 nUnits : int list
65 The sizes of the (input, hidden and output) representations.
66 outputActivation : class derived from `Activation`
67 The type of activation for the output layer.
68 :attention: `outputActivation` parameter is not an instance but a class.
69 """
70 Sequential.__init__(self, nInputs=nUnits[0])
71 self.nUnits = nUnits
72
73
74 for nOutputs in nUnits[1:-1]:
75 self.add(Linear(nOutputs))
76 self.add(Tanh(nOutputs))
77
78 self.add(Linear(nUnits[-1]))
79 self.add(outputActivation(nUnits[-1]))
80
81
83 if not(self.prepared):
84 raise ValueError("You can not get geometry on a non-prepared MLP")
85 geometry=[self.nInputs]
86 geometry+=list(map(lambda mod:mod.nOutputs,self.modules))
87
88 return geometry
89
91 if not(self.prepared):
92 raise ValueError("You can not get params on a non-prepared MLP")
93 params={}
94 params['geometry']=self.getGeometry()
95 params['weights_biases']=list(map(lambda param:np.array(param.get_value()),self.params))
96
97 return params
98
100 if not(self.prepared):
101 raise ValueError("You can not set params on a non-prepared MLP")
102 if self.getGeometry()!=params['geometry']:
103 raise ValueError("Params geometry does not match MLP geometry")
104
105 for param,w in zip(self.params,params['weights_biases']):
106 param.set_value(w)
107
110
113
116
119
122
125
127 known={}
128 unknown={}
129 known_keys=["batch_size","learning_rate", "epochs", "growth_factor", "growth_threshold","badmove_threshold","verbose"]
130 for (key, value) in param_dict.items():
131 if key in known_keys:
132 known[key]=value
133 else:
134 unknown[key]=value
135 return (known,unknown)
136
138 ret=dict(param_dict)
139 default_values={'batch_size':1, 'learning_rate':1.0, 'epochs':100, 'growth_factor':1.25, 'growth_threshold':5, 'badmove_threshold':10, 'verbose':True}
140 for key in default_values.keys():
141 if not(param_dict.has_key(key)):
142 ret[key]=default_values[key]
143 return ret
144
145 - def finetune(self, shared_x_train, shared_y_train, batch_size, learning_rate, epochs, growth_factor, growth_threshold, badmove_threshold, verbose):
146 """
147 Performs the supervised learning step of the `MultiLayerPerceptron`,
148 using a batch-gradient backpropagation algorithm. The `learning_rate`
149 is made adaptative with the `growth_factor` multiplier. If the mean loss
150 is improved during `growth_threshold` successive epochs, then the
151 `learning_rate` is increased. If the mean loss is degraded, the epoche
152 is called a "bad move", and the `learning_rate` is decreased until the
153 mean loss is improved again. If the mean loss cannot be improved within
154 `badmove_threshold` trials, then the last trained parameters are kept
155 even though, and the finetuning goes further.
156
157 :Parameters:
158 shared_x_train : :theano:`SharedVariable` from :numpy:`ndarray`
159 The training examples.
160 shared_y_train : :theano:`SharedVariable` from :numpy:`ndarray`
161 The training labels.
162 batch_size : int
163 The number of training examples in each mini-batch.
164 learning_rate : float
165 The rate used to update the parameters with the gradient.
166 epochs : int
167 The number of epochs to run the training algorithm.
168 growth_factor : float
169 The multiplier factor used to increase or decrease the `learning_rate`.
170 growth_threshold : int
171 The number of successive loss-improving epochs after which the `learning_rate` must be updated.
172 badmove_threshold : int
173 The number of successive loss-non-improving gradient descents after which parameters must be updated.
174 verbose : bool
175 If true, information about the training process will be displayed on the standard output.
176
177 :return: elapsed time, in datetime.
178 """
179
180
181
182 train = self.trainFunction(batch_size, learning_rate, True, shared_x_train, shared_y_train)
183 hold=self.holdFunction()
184 restore=self.restoreFunction()
185 trainCriterionFunction=self.criterionFunction(downcast=True, shared_x_data=shared_x_train, shared_y_data=shared_y_train)
186
187 n_train_batches = shared_x_train.get_value().shape[0]/batch_size
188 finetune_start_time = DT.datetime.now()
189 mean_loss = trainCriterionFunction()
190 good_epochs = 0
191 self.finetune_full_history=[(-1,learning_rate,mean_loss)]
192 self.finetune_history=[mean_loss]
193
194 self.initEpochHook(locals())
195 for epoch in xrange(epochs):
196 epoch_start_time=DT.datetime.now()
197 loss_by_batch = []
198 hold()
199 if(verbose):
200 print "",
201
202 self.initBadmoveHook(locals())
203 for badmoves in xrange(badmove_threshold):
204
205 self.initBatchHook(locals())
206 for lbatch_index in xrange(n_train_batches):
207 loss = train(lbatch_index)
208 loss_by_batch.append(loss)
209 if(verbose):
210 print "\r | |_Batch %d/%d, loss : %f" % (lbatch_index+1, n_train_batches, loss),
211 sys.stdout.flush()
212 if self.checkBatchHook(locals()):
213 break
214
215 new_mean_loss = np.mean(loss_by_batch)
216 self.finetune_full_history.append((epoch,learning_rate,new_mean_loss))
217
218 if self.checkBadmoveHook(locals()):
219 break
220
221 if new_mean_loss < mean_loss:
222 good_epochs += 1
223 break
224
225 if badmoves+1<badmove_threshold:
226 if(verbose):
227 print "\r# Bad move %f < %f; Learning rate : %f --> %f" % (mean_loss, new_mean_loss, learning_rate, learning_rate/growth_factor)
228 restore()
229 learning_rate = learning_rate/growth_factor
230 train = self.trainFunction(
231 batch_size, learning_rate,True,
232 shared_x_train, shared_y_train)
233 else:
234 if(verbose):
235 print("\r# Break Epoch on bad move threshold")
236
237 good_epochs = 0
238
239
240 mean_loss = new_mean_loss
241 self.finetune_history.append(mean_loss)
242
243 if(good_epochs >= growth_threshold):
244 good_epochs = 0
245 if(verbose):
246 print "\r# Fast Track; Learning rate : %f > %f" % (learning_rate, learning_rate*growth_factor)
247 learning_rate = learning_rate*growth_factor
248 train = self.trainFunction(
249 batch_size, learning_rate, True,
250 shared_x_train, shared_y_train)
251
252 if(verbose):
253 print "\r |_Epoch %d/%d, mean loss : %f, duration (s) : %s" % (epoch+1, epochs, new_mean_loss,(DT.datetime.now()-epoch_start_time).total_seconds())
254
255 if self.checkEpochHook(locals()):
256 break
257
258 return (DT.datetime.now()-finetune_start_time)
259
260 - def train(self, x_train, y_train, **params):
261 """
262 Performs the supervised learning step of the `MultiLayerPerceptron`.
263 This function explicitly calls `finetune`, but displays a bit more information.
264
265 :Parameters:
266 x_train : :numpy:`ndarray`
267 The training examples.
268 y_train : :numpy:`ndarray`
269 The training labels.
270 params : dict
271 The learning parameters, encoded in a dictionary, that are used
272 in the `finetune` method.
273
274 Possible keys: batch_size, learning_rate, epochs, growth_factor,
275 growth_threshold, badmove_threshold, verbose.
276
277 :return: elapsed time, in datetime.
278 :see: `finetune`
279 """
280 (learning_params,unknown)=self.checkLearningParameters(params)
281 if len(unknown)>0:
282 print("Waring unknown training parameters %s"%(unknown,))
283
284 learning_params=self.defaultLearningParameters(learning_params)
285
286 print "-- Beginning of fine-tuning (%d epochs) --" % (learning_params['epochs'])
287 shared_x_train=theano.shared(x_train)
288 shared_y_train=theano.shared(y_train)
289 delta = self.finetune(shared_x_train, shared_y_train, **learning_params)
290 print "-- End of fine-tuning (lasted %s) --" % (delta)
291 return delta
292
293
295 """
296 An `AutoEncoder` is a neural network whichs aims at encoding
297 its inputs in a smaller representation space. It is made of
298 a projection layer and a backprojection layer. The compressed
299 representation (or hidden representation) lies in the projection
300 layer, while the backprojection layer reconstructs the original inputs.
301
302 The weights between those two layers are shared, that means
303 that the backprojection matrix is constrained to be the transpose
304 of the projection matrix. However, the two biases are independant.
305
306 If the data allows it, the `AutoEncoder` is best learned with a `Sigmoid`
307 final activation module in conjunction with a `CrossEntropy` criterion.
308
309 :see: `CrossEntropy`, `Sigmoid`
310 """
312 """
313 Constructs a new `AutoEncoder` network.
314
315 :Parameters:
316 nVisibles : int
317 The size of the visible representation.
318 nHidden : int
319 The size of the hidden representation.
320 outputActivation : class derived from `Activation`
321 The type of activation for the backprojection layer.
322 :attention: `outputActivation` parameter is not an instance but a class.
323 """
324 Sequential.__init__(self, nInputs=nVisibles)
325 self.nHidden = nHidden
326 self.add(Linear(nHidden, nVisibles))
327 self.add(Tanh(nHidden))
328 self.add(Linear(nVisibles, nHidden))
329 self.add(outputActivation(nVisibles))
330
332 if(self.modules):
333 self.modules[0].prepareParams()
334 self.modules[2].prepareParams(self.modules[0].params[0].T)
335 self.params.extend(self.modules[0].params)
336 self.params.extend([self.modules[2].params[1]])
337
339 """
340 Returns the hidden representation for a given input.
341
342 :Parameters:
343 x_input : :numpy:`ndarray`
344 The input on which the hidden representation must be computed.
345
346 :return: the corresponding hidden representation
347 """
348 linear=self.modules[0].forward(x_input)
349 return self.modules[1].forward(linear)
350
353 if(self.modules):
354 self.modules[2].prepareParams()
355 self.modules[0].prepareParams(self.modules[2].params[0].T)
356 self.params.extend(self.modules[2].params)
357 self.params.extend([self.modules[0].params[1]])
358
360 """
361 A `PretrainedMLP` is a specialization of the MLP, where the
362 layers are pretrained, for input part on the training examples
363 (:math:`\mathbf{x}) or for output part on the training labels
364 (:math:`\mathbf{y}) using a Stacked `AutoEncoder` strategy.
365
366 :see: `MultiLayerPerceptron`, http://www.deeplearning.net/tutorial/SdA.html
367 """
369 """
370 Constructs a new `DeepNeuralNetwork`.
371
372 :Parameters:
373 nUnits : int list
374 The sizes of the (input, hidden* , output) representations.
375 outputActivation : class derived from `Activation`
376 The type of activation for the output layer.
377 nInputLayers : int
378 Number of layers starting from input to be stacked with AE
379 nOutputLayers : int
380 Number of layers starting from output to be stacked with AE
381 InputAutoEncoderClass : AutoEncoder sub class
382 Class to be used for Input Auto Encoders
383 OutputAutoEncoderClass : OutputAutoEncoder sub class
384 Class to be used for Output Auto Encoders
385
386 :attention: `outputActivation` parameter is not an instance but a class.
387 """
388 MultiLayerPerceptron.__init__(self, nUnits, outputActivation)
389
390 nLayers=len(nUnits)-1;
391 nLinkLayers=nLayers-nInputLayers-nOutputLayers;
392
393 self.inputRepresentationSize=nUnits[0]
394 self.nUnitsInput = nUnits[1:nInputLayers+1]
395 self.nUnitsLink= nUnits[nInputLayers+1:nInputLayers+nLinkLayers]
396 self.nUnitsOutput = nUnits[nInputLayers+nLinkLayers:-1]
397 self.outputRepresentationSize=nUnits[-1]
398
399
400 self.inputAutoEncoders = []
401 self.outputAutoEncoders = []
402 self.linkLayer=[]
403
404 x = T.matrix('x')
405 y = T.matrix('y')
406
407 if len(self.nUnitsInput)>0:
408 for nVisibles,nHidden in zip([self.inputRepresentationSize]+self.nUnitsInput[:-1], self.nUnitsInput):
409 ae = InputAutoEncoderClass(nVisibles, nHidden)
410 ae.linkInputs(x,nVisibles)
411 ae.prepare()
412 ae.criterion = CrossEntropy(ae.outputs, y)
413 self.inputAutoEncoders.append(ae)
414 self.lastInputSize=self.nUnitsInput[-1]
415 else:
416 self.lastInputSize=self.inputRepresentationSize
417
418 if len(self.nUnitsOutput)>0:
419 for nHidden,nVisibles in zip(self.nUnitsOutput, self.nUnitsOutput[1:]+[self.outputRepresentationSize]):
420 ae = OutputAutoEncoderClass(nVisibles, nHidden)
421 ae.linkInputs(x,nVisibles)
422 ae.prepare()
423 ae.criterion = CrossEntropy(ae.outputs, y)
424 self.outputAutoEncoders.append(ae)
425 self.firstOutputSize=self.nUnitsOutput[0]
426 linkLayerLastActivation=Tanh
427 else:
428 self.firstOutputSize=self.outputRepresentationSize
429 linkLayerLastActivation=outputActivation
430
431 self.linkLayer=MultiLayerPerceptron([self.lastInputSize]+self.nUnitsLink+[self.firstOutputSize], linkLayerLastActivation)
432 self.linkLayer.linkInputs(x,self.lastInputSize)
433 self.linkLayer.prepare()
434
435 self.linkInputData=None
436 self.linkOutputData=None
437
439 if(self.modules):
440 if len(self.nUnitsInput)>0:
441 inputParams=list(map(lambda ae:(ae.params[0], ae.params[1]),self.inputAutoEncoders))
442 else:
443 inputParams=[]
444
445 linkParams=list(map(lambda mod:(mod.params[0], mod.params[1]),self.linkLayer.modules[::2]))
446
447 if len(self.nUnitsOutput)>0:
448 outputParams=list(map(lambda ae:(ae.params[0], ae.params[1]),self.outputAutoEncoders))
449 else:
450 outputParams=[]
451
452 for mod,params in zip(self.modules[::2],inputParams+linkParams+outputParams):
453 mod.prepareParams(params[0],params[1])
454 self.params.extend([mod.params[0],mod.params[1]])
455
456
497
499 """
500 Performs the unsupervised learning step of the output autoencoders,
501 using a batch-gradient backpropagation algorithm.
502
503 The `InputOutputDeepArchitecture` pretrains the output autoencoders,
504 in the same way the `DeepNeuralNetwork` does for input autoencoders. In
505 this case, the given training data are the labels (:math:`\mathbf{y}`)
506 and not the examples (:math:`\mathbf{x}`) (i.e. the labels that the network
507 must predict).
508
509 Once an `AutoEncoder` is learned, the backprojection layer (decoding) is kept and used
510 to initialize the network layers. The projection (encoding) part is not useful
511 anymore.
512
513 :Parameters:
514 data : :theano:`SharedVariable` from :numpy:`ndarray`
515 The training data (typically example labels).
516 params : dict
517 The learning parameters, encoded in a dictionary, that are used
518 in the `finetune` method.
519 """
520 (learning_params,unknown)=self.checkLearningParameters(params)
521 if len(unknown)>0:
522 print("Waring unknown training parameters %s"%(unknown,))
523
524 learning_params=self.defaultLearningParameters(learning_params)
525
526 n_train_batches = data.shape[0]/learning_params['batch_size']
527 shared_outputs= data
528 start_time = DT.datetime.now()
529 for (ae,layer) in reversed(zip(self.outputAutoEncoders, xrange(len(self.outputAutoEncoders)))):
530 if learning_params['verbose']:
531 print("--- Learning output AE %d/%d ---"%(layer+1,len(self.outputAutoEncoders)))
532 delta=ae.finetune(shared_outputs, shared_outputs, **learning_params)
533 if learning_params['verbose']:
534 print("--- Output AE %d/%d Learned, Duration (s) %d ---"%(layer+1,len(self.outputAutoEncoders),delta.total_seconds()))
535 outputs = ae.hiddenValues(shared_outputs.get_value())
536 shared_outputs=theano.shared((outputs+1.)/2.)
537 self.linkOutputData=theano.shared(outputs)
538 return (DT.datetime.now()-start_time)
539
540 - def train(self, x_train, y_train, **params):
541 """
542 Performs the pretraining step for the input and output autoencoders,
543 optionally the semi-supervised pretraining step of the link layer, and
544 finally the supervised learning step (`finetune`).
545
546 :Parameters:
547 x_train : :numpy:`ndarray`
548 The training examples.
549 y_train : :numpy:`ndarray`
550 The training labels.
551 params : dict
552 The learning parameters, encoded in a dictionary, that are used
553 during the autoencoders pretraining (`pretrainInputAutoEncoders`,
554 `pretrainOutputAutoEncoders`), the link layer pretraining, and
555 the final learning (`finetune`) steps.
556
557 Possible keys: batch_size, learning_rate, epochs, growth_factor,
558 growth_threshold, badmove_threshold, verbose,
559 input_pretraining_params, output_pretraining_params,
560 link_pretraining, link_pretraining_params.
561
562 The link_pretraining parameter controls whether the link layer
563 is pretrained or not (default: False).
564
565 The input_pretraining_params, output_pretraining_params and
566 link_pretraining_params parameters are themselves dictionaries
567 containing the training parameters for each pretraining step.
568
569 :return: elapsed time, in deltatime.
570 :see: `pretrainInputAutoEncoders`, `pretrainOutputAutoEncoders`,
571 `finetune`
572 """
573
574 (training_params,unknown)=self.checkLearningParameters(params)
575 if unknown.has_key("input_pretraining_params"):
576 input_pretraining_params=unknown.pop("input_pretraining_params")
577 else:
578 input_pretraining_params={'epochs':50}
579
580 if unknown.has_key("output_pretraining_params"):
581 output_pretraining_params=unknown.pop("output_pretraining_params")
582 else:
583 output_pretraining_params={'epochs':50}
584
585 if unknown.has_key("link_pretraining"):
586 link_pretraining=unknown.pop("link_pretraining")
587 else:
588 link_pretraining=False
589
590 if unknown.has_key("link_pretraining_params"):
591 link_pretraining_params=unknown.pop("link_pretraining_params")
592 else:
593 link_pretraining_params={'epochs':50}
594
595 if len(unknown)>0:
596 print("Warning: unknown training parameters %s"%(unknown,))
597
598 training_params=self.defaultLearningParameters(training_params)
599 input_pretraining_params=self.defaultLearningParameters(input_pretraining_params)
600 output_pretraining_params=self.defaultLearningParameters(output_pretraining_params)
601 link_pretraining_params=self.defaultLearningParameters(link_pretraining_params)
602
603 shared_x_train=theano.shared(x_train)
604 shared_y_train=theano.shared(y_train)
605
606 totalDelta=DT.timedelta(0)
607 if len(self.nUnitsInput)>0:
608 if(training_params['verbose']):
609 print "-- Beginning of input layers pre-training (%d epochs) --" % (input_pretraining_params['epochs'])
610 delta = self.pretrainInputAutoEncoders(shared_x_train,**input_pretraining_params)
611 totalDelta += delta
612 if(training_params['verbose']):
613 print "-- End of input layers pre-training (lasted %s) --" % (totalDelta)
614
615 if len(self.nUnitsOutput)>0:
616 if(training_params['verbose']):
617 print "-- Beginning of output layers pre-training (%d epochs) --" % (output_pretraining_params['epochs'])
618 delta = self.pretrainOutputAutoEncoders(shared_y_train, **output_pretraining_params)
619 totalDelta += delta
620 if(training_params['verbose']):
621 print "-- End of output layers pre-training (lasted %s) --" % (delta)
622
623 if(link_pretraining):
624 if self.linkInputData is None:
625 self.linkInputData=shared_x_train
626 if self.linkOutputData is None:
627 self.linkOutputData=shared_y_train
628
629 y = T.matrix('y')
630 if len(self.nUnitsOutput)>0:
631 self.linkLayer.criterion=MeanSquareError(self.linkLayer.outputs, y)
632 else:
633 self.linkLayer.criterion=self.criterion.__class__(self.linkLayer.outputs, y)
634
635 if(training_params['verbose']):
636 print "-- Beginning of link layer pre-training (%d epochs) --" % (link_pretraining_params['epochs'])
637 delta = self.linkLayer.finetune(self.linkInputData,self.linkOutputData,**link_pretraining_params)
638 totalDelta += delta
639 if(training_params['verbose']):
640 print "-- End of link layer pre-training (lasted %s) --" % (delta)
641
642 if(training_params['verbose']):
643 print "-- Beginning of fine-tuning (%d epochs) --" % (training_params['epochs'])
644 delta = self.finetune(shared_x_train, shared_y_train, **training_params)
645 totalDelta += delta
646 if(training_params['verbose']):
647 print "-- End of fine-tuning (lasted %s) --" % (delta)
648 return totalDelta
649
651 """
652 A `DeepNeuralNetwork` (DNN) is a specialization of the MLP, where the
653 layers are pretrained on the training examples (:math:`\mathbf{x})
654 using a Stacked `AutoEncoder` strategy. It has been specifically designed
655 for data that lies in a high-dimensional input space.
656
657 :see: `MultiLayerPerceptron`, http://www.deeplearning.net/tutorial/SdA.html
658 """
660 """
661 Constructs a new `DeepNeuralNetwork`.
662
663 :Parameters:
664 nUnitsInput : int list
665 The sizes of the (input and hidden) representations on the input side.
666 nUnitsOutput : int list
667 The sizes of the (hidden and output) representations on the output side.
668 outputActivation : class derived from `Activation`
669 The type of activation for the output layer.
670 :attention: `outputActivation` parameter is not an instance but a class.
671 """
672 PretrainedMLP.__init__(self, nUnitsInput+nUnitsOutput, outputActivation=outputActivation, nInputLayers=len(nUnitsInput)-1)
673
706