|
78 | 78 | __cuda_dsigmoid_delta, __cuda_randomly_replace_elements, __cuda_l1reg, \
|
79 | 79 | __cuda_soft_threshold, __cuda_clip, __cuda_swapaxes01, \
|
80 | 80 | __cuda_sequence_to_tensor, __cuda_to_onehot, __cuda_leakyrelu, \
|
81 |
| - __cuda_dleakyrelu_delta, __cuda_mse_core = [None] * 18 |
| 81 | + __cuda_dleakyrelu_delta, __cuda_mse_core , \ |
| 82 | + __cuda_elu, __cuda_delu_delta = [None] * 20 |
82 | 83 |
|
83 | 84 | except ImportError:
|
84 | 85 | warnings.warn("CUDA libraries are not available.")
|
@@ -153,7 +154,8 @@ def init_gpu(gpu_id=0, seed=0):
|
153 | 154 | __cuda_dsigmoid_delta, __cuda_randomly_replace_elements, __cuda_l1reg, \
|
154 | 155 | __cuda_soft_threshold, __cuda_clip, _IS_CUDA_INITIALIZED, \
|
155 | 156 | __cuda_sequence_to_tensor, __cuda_to_onehot, \
|
156 |
| - __cuda_leakyrelu, __cuda_dleakyrelu_delta, __cuda_mse_core |
| 157 | + __cuda_leakyrelu, __cuda_dleakyrelu_delta, __cuda_mse_core, \ |
| 158 | + __cuda_elu, __cuda_delu_delta |
157 | 159 |
|
158 | 160 | if _IS_CUDA_INITIALIZED:
|
159 | 161 | warnings.warn("GPU was already initialized, will not initialize again!")
|
@@ -212,6 +214,14 @@ def init_gpu(gpu_id=0, seed=0):
|
212 | 214 | __cuda_dsigmoid_delta =ElementwiseKernel(
|
213 | 215 | "float* d, float* a", "d[i] *= a[i]*(1.0 - a[i])", 'dsigmoid_delta')
|
214 | 216 |
|
| 217 | + __cuda_elu = ElementwiseKernel("float* x, float* o, float alpha", |
| 218 | + 'o[i] = x[i] > 0 ? x[i] : alpha*(expf(x[i])-1);', |
| 219 | + 'elu_eltw') |
| 220 | + __cuda_delu_delta = ElementwiseKernel("float* d, float* a, float alpha", |
| 221 | + 'd[i] *= (a[i] > 0 ? 1.0 : a[i]+alpha);', |
| 222 | + 'delu_eltw') |
| 223 | + |
| 224 | + |
215 | 225 | # drops "val" into x p times of the time. r contains (0, 1] uniform values.
|
216 | 226 | # Resulting mask will be stored in r, as well.
|
217 | 227 | __cuda_randomly_replace_elements = ElementwiseKernel(
|
@@ -681,6 +691,27 @@ def dleakyrelu_delta(D, A, X, beta=0.1, stream=None):
|
681 | 691 | return D
|
682 | 692 |
|
683 | 693 |
|
| 694 | +def elu(x, alpha=1.0, out=None, stream=None): |
| 695 | + if out is None: |
| 696 | + out = empty_like(x) |
| 697 | + if isinstance(x, gpuarray.GPUArray): |
| 698 | + __cuda_elu(x, out, alpha, stream=stream) |
| 699 | + else: |
| 700 | + out[:] = np.where(x > 0, x, alpha*(np.exp(x)-1)) |
| 701 | + return out |
| 702 | + |
| 703 | + |
| 704 | +def delu_delta(D, A, X, alpha=1.0, stream=None): |
| 705 | + """ Calculates D *= (a > 0)""" |
| 706 | + if isinstance(D, gpuarray.GPUArray): |
| 707 | + __cuda_delu_delta(D, A, np.float32(alpha), stream=stream) |
| 708 | + else: |
| 709 | + D *= np.where(A > 0, 1, alpha+A) |
| 710 | + return D |
| 711 | + |
| 712 | + |
| 713 | + |
| 714 | + |
684 | 715 | ######## RANDOM NUMBERS ###########################################
|
685 | 716 | def rand_gaussian(shape, mu=0.0, sigma=1.0, dtype=np.float32, use_gpu=False, stream=None):
|
686 | 717 | out = empty(shape, dtype, use_gpu)
|
|
0 commit comments