Source code for symjax.nn.initializers

import numpy as np


[docs]def constant(shape, value): return np.full(shape, value)
[docs]def uniform(shape, scale=0.05): """Sample uniform weights U(-scale, scale). Parameters ---------- shape: tuple scale: float (default=0.05) """ return np.random.uniform(low=-scale, high=scale, size=shape)
[docs]def normal(shape, scale=0.05): """Sample Gaussian weights N(0, scale). Parameters ---------- shape: tuple scale: float (default=0.05) """ return np.random.normal(loc=0.0, scale=scale, size=shape)
[docs]def orthogonal(shape, scale=1): """From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120""" flat_shape = (shape[0], np.prod(shape[1:])) a = np.random.normal(0.0, 1.0, flat_shape) u, _, v = np.linalg.svd(a, full_matrices=False) # pick the one with the correct shape q = u if u.shape == flat_shape else v q = q.reshape(shape) return scale * q[: shape[0], : shape[1]]
[docs]def get_fans(shape): """ utility giving `fan_in` and `fan_out` of a tensor (shape). The concept of `fan_in` and `fan_out` helps to create weight initializers. Those quantities represent the number of units that the current weight takes as input (from the previous layer) and the number of output is produces. From those two numbers, the variance of random variables can be obtained such that the layer feature maps do not vanish or explode in amplitude. Parameters ---------- shape: tuple the shape of the tensor. For a densely connected this is (previous layer width, current layer width) and for convolutional (2D) it is (n_filters, input_channels)+ spatial shapes Returns ------- fan_in: int fan_out: int """ fan_out, fan_in = shape[:2] if len(shape) > 2: kernel_spatial = np.prod(shape[2:]) fan_in = fan_in * kernel_spatial fan_out = fan_out * kernel_spatial return fan_in, fan_out
[docs]def variance_scaling(shape, mode, gain=1, distribution=normal): """Variance Scaling initialization.""" if len(shape) < 2: raise RuntimeError("This initializer only works with shapes of length >= 2") fan_in, fan_out = get_fans(shape) if mode == "fan_in": den = fan_in elif mode == "fan_out": den = fan_out elif mode == "fan_avg": den = (fan_in + fan_out) / 2.0 elif mode == "fan_sum": den = fan_in + fan_out else: raise ValueError( "mode must be fan_in, fan_out, fan_avg or fan_sum, value passed was {mode}" ) scale = gain / np.sqrt(den) return distribution(shape, scale=scale)
[docs]def glorot_uniform(shape): """Reference: Glorot & Bengio, AISTATS 2010""" return variance_scaling(shape, mode="fan_avg", distribution=uniform)
[docs]def glorot_normal(shape): """Reference: Glorot & Bengio, AISTATS 2010""" return variance_scaling(shape, mode="fan_avg", distribution=normal)
[docs]def he_normal(shape): """Reference: He et al., http://arxiv.org/abs/1502.01852""" return variance_scaling(shape, mode="fan_in", distribution=normal)
[docs]def he_uniform(shape): """Reference: He et al., http://arxiv.org/abs/1502.01852""" return variance_scaling(shape, mode="fan_in", distribution=uniform)
[docs]def lecun_uniform(shape, name=None): """Reference: LeCun 98, Efficient Backprop http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf """ return variance_scaling(shape, mode="fan_in", gain=np.sqrt(3), distribution=uniform)