diff --git a/include/caffe/layers/bias_layer.hpp b/include/caffe/layers/bias_layer.hpp new file mode 100644 index 00000000000..eedc3aaa351 --- /dev/null +++ b/include/caffe/layers/bias_layer.hpp @@ -0,0 +1,54 @@ +#ifndef CAFFE_BIAS_LAYER_HPP_ +#define CAFFE_BIAS_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +namespace caffe { + +/** + * @brief Computes a sum of two input Blobs, with the shape of the + * latter Blob "broadcast" to match the shape of the former. + * Equivalent to tiling the latter Blob, then computing the elementwise + * sum. + * + * The second input may be omitted, in which case it's learned as a parameter + * of the layer. + */ +template +class BiasLayer : public Layer { + public: + explicit BiasLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Bias"; } + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MaxBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + private: + Blob bias_multiplier_; + int outer_dim_, bias_dim_, inner_dim_, dim_; +}; + + + +} // namespace caffe + +#endif // CAFFE_BIAS_LAYER_HPP_ diff --git a/include/caffe/layers/channelwise_affine_layer.hpp b/include/caffe/layers/channelwise_affine_layer.hpp deleted file mode 100644 index 6d8ac98b6ed..00000000000 --- a/include/caffe/layers/channelwise_affine_layer.hpp +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ -#define CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ - -#include -#include "caffe/blob.hpp" -#include "caffe/layer.hpp" -#include "caffe/layers/neuron_layer.hpp" -#include "caffe/proto/caffe.pb.h" - -namespace caffe { - /** - * @brief Affine non-linearity function @f$ - * y = ax+b - * @f$, could be used after batch normalization layer - * - */ -template -class ChannelwiseAffineLayer : public NeuronLayer { - public: - /** - * @param param provides ChannelwiseAffineParameter ChannelwiseAffine_param, - * with ChannelwiseAffineLayer options: - * - slope_filler (\b optional, FillerParameter, - * default {'type': constant 'value':1.0001}). - * - bias_filler (\b optional, FillerParameter, - * default {'type': constant 'value':0.0001}). - * - channel_shared (\b optional, default false). - * slopes and biases are shared across channels. - */ - explicit ChannelwiseAffineLayer(const LayerParameter& param) - : NeuronLayer(param) {} - virtual void LayerSetUp(const vector*>& bottom, - const vector*>& top); - virtual void Reshape(const vector*>& bottom, - const vector*>& top); - virtual inline const char* type() const { return "ChannelwiseAffine"; } - - protected: - /** - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the inputs @f$ x @f$ - * @param top output Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the computed outputs for each channel @f$i@f$ @f$ - * y_i = a_i x_i + b_i - * @f$. - */ - virtual void Forward_cpu(const vector*>& bottom, - const vector*>& top); - virtual void Forward_gpu(const vector*>& bottom, - const vector*>& top); - /** - * @brief Computes the error gradient w.r.t. the ChannelwiseAffine inputs. - * - * @param top output Blob vector (length 1), providing the error gradient with - * respect to the outputs - * -# @f$ (N \times C \times ...) @f$ - * containing error gradients @f$ \frac{\partial E}{\partial y} @f$ - * with respect to computed outputs @f$ y @f$ - * @param propagate_down see Layer::Backward. - * @param bottom input Blob vector (length 1) - * -# @f$ (N \times C \times ...) @f$ - * the inputs @f$ x @f$; For each channel @f$i@f$, backward fills their - * diff with gradients @f$ - * \frac{\partial E}{\partial x_i} = \left\{ - * \begin{array}{lr} - * a_i \frac{\partial E}{\partial y_i} - * \end{array} \right. - * @f$. - * If param_propagate_down_[0] is true, it fills the diff with gradients - * @f$ - * \frac{\partial E}{\partial a_i} = \left\{ - * \begin{array}{lr} - * \sum_{x_i} x_i \frac{\partial E}{\partial y_i} - * \end{array} \right. - * @f$. - * If param_propagate_down_[1] is true, it fills the diff with gradients - * @f$ - * \frac{\partial E}{\partial b_i} = \left\{ - * \begin{array}{lr} - * frac{\partial E}{\partial y_i} - * \end{array} \right. - * @f$. - */ - virtual void Backward_cpu(const vector*>& top, - const vector& propagate_down, - const vector*>& bottom); - virtual void Backward_gpu(const vector*>& top, - const vector& propagate_down, - const vector*>& bottom); - bool channel_shared_; - Blob multiplier_; - // dot multiplier for backward computation of params - Blob bias_multiplier_; - Blob backward_buff_; - // temporary buffer for backward computation - Blob bottom_memory_; - // memory for in-place computation -}; -} // namespace caffe - -#endif // CAFFE_CHANNELWISE_AFFINE_LAYER_HPP_ diff --git a/include/caffe/layers/scale_layer.hpp b/include/caffe/layers/scale_layer.hpp new file mode 100644 index 00000000000..924df2e51ab --- /dev/null +++ b/include/caffe/layers/scale_layer.hpp @@ -0,0 +1,83 @@ +#ifndef CAFFE_SCALE_LAYER_HPP_ +#define CAFFE_SCALE_LAYER_HPP_ + +#include + +#include "caffe/blob.hpp" +#include "caffe/layer.hpp" +#include "caffe/proto/caffe.pb.h" + +#include "caffe/layers/bias_layer.hpp" + +namespace caffe { + +/** + * @brief Computes a product of two input Blobs, with the shape of the + * latter Blob "broadcast" to match the shape of the former. + * Equivalent to tiling the latter Blob, then computing the elementwise + * product. + * + * The second input may be omitted, in which case it's learned as a parameter + * of the layer. + */ +template +class ScaleLayer: public Layer { + public: + explicit ScaleLayer(const LayerParameter& param) + : Layer(param) {} + virtual void LayerSetUp(const vector*>& bottom, + const vector*>& top); + virtual void Reshape(const vector*>& bottom, + const vector*>& top); + + virtual inline const char* type() const { return "Scale"; } + // Scale + virtual inline int MinBottomBlobs() const { return 1; } + virtual inline int MaxBottomBlobs() const { return 2; } + virtual inline int ExactNumTopBlobs() const { return 1; } + + protected: + /** + * In the below shape specifications, @f$ i @f$ denotes the value of the + * `axis` field given by `this->layer_param_.scale_param().axis()`, after + * canonicalization (i.e., conversion from negative to positive index, + * if applicable). + * + * @param bottom input Blob vector (length 2) + * -# @f$ (d_0 \times ... \times + * d_i \times ... \times d_j \times ... \times d_n) @f$ + * the first factor @f$ x @f$ + * -# @f$ (d_i \times ... \times d_j) @f$ + * the second factor @f$ y @f$ + * @param top output Blob vector (length 1) + * -# @f$ (d_0 \times ... \times + * d_i \times ... \times d_j \times ... \times d_n) @f$ + * the product @f$ z = x y @f$ computed after "broadcasting" y. + * Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$, + * then computing the elementwise product. + */ + virtual void Forward_cpu(const vector*>& bottom, + const vector*>& top); + virtual void Forward_gpu(const vector*>& bottom, + const vector*>& top); + virtual void Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + virtual void Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom); + + shared_ptr > bias_layer_; + vector*> bias_bottom_vec_; + vector bias_propagate_down_; + int bias_param_id_; + + Blob sum_multiplier_; + Blob sum_result_; + Blob temp_; + int axis_; + int outer_dim_, scale_dim_, inner_dim_; +}; + + +} // namespace caffe + +#endif // CAFFE_SCALE_LAYER_HPP_ diff --git a/src/caffe/layers/bias_layer.cpp b/src/caffe/layers/bias_layer.cpp new file mode 100644 index 00000000000..0a786b5db98 --- /dev/null +++ b/src/caffe/layers/bias_layer.cpp @@ -0,0 +1,121 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layers/bias_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void BiasLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + if (bottom.size() == 1 && this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else if (bottom.size() == 1) { + // bias is a learned parameter; initialize it + const BiasParameter& param = this->layer_param_.bias_param(); + const int axis = bottom[0]->CanonicalAxisIndex(param.axis()); + const int num_axes = param.num_axes(); + CHECK_GE(num_axes, -1) << "num_axes must be non-negative, " + << "or -1 to extend to the end of bottom[0]"; + if (num_axes >= 0) { + CHECK_GE(bottom[0]->num_axes(), axis + num_axes) + << "bias blob's shape extends past bottom[0]'s shape when applied " + << "starting with bottom[0] axis = " << axis; + } + this->blobs_.resize(1); + const vector::const_iterator& shape_start = + bottom[0]->shape().begin() + axis; + const vector::const_iterator& shape_end = + (num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes); + vector bias_shape(shape_start, shape_end); + this->blobs_[0].reset(new Blob(bias_shape)); + shared_ptr > filler(GetFiller(param.filler())); + filler->Fill(this->blobs_[0].get()); + } + this->param_propagate_down_.resize(this->blobs_.size(), true); +} + +template +void BiasLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + const BiasParameter& param = this->layer_param_.bias_param(); + Blob* bias = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get(); + // Always set axis == 0 in special case where bias is a scalar + // (num_axes == 0). Mathematically equivalent for any choice of axis, so the + // actual setting can be safely ignored; and computation is most efficient + // with axis == 0 and (therefore) outer_dim_ == 1. + const int axis = (bias->num_axes() == 0) ? + 0 : bottom[0]->CanonicalAxisIndex(param.axis()); + CHECK_GE(bottom[0]->num_axes(), axis + bias->num_axes()) + << "bias blob's shape extends past bottom[0]'s shape when applied " + << "starting with bottom[0] axis = " << axis; + for (int i = 0; i < bias->num_axes(); ++i) { + CHECK_EQ(bottom[0]->shape(axis + i), bias->shape(i)) + << "dimension mismatch between bottom[0]->shape(" << axis + i + << ") and bias->shape(" << i << ")"; + } + outer_dim_ = bottom[0]->count(0, axis); + bias_dim_ = bias->count(); + inner_dim_ = bottom[0]->count(axis + bias->num_axes()); + dim_ = bias_dim_ * inner_dim_; + if (bottom[0] != top[0]) { + top[0]->ReshapeLike(*bottom[0]); + } + bias_multiplier_.Reshape(vector(1, inner_dim_)); + if (bias_multiplier_.cpu_data()[inner_dim_ - 1] != Dtype(1)) { + caffe_set(inner_dim_, Dtype(1), bias_multiplier_.mutable_cpu_data()); + } +} + +template +void BiasLayer::Forward_cpu(const vector*>& bottom, + const vector*>& top) { + const Dtype* bias_data = + ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + if (bottom[0] != top[0]) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + caffe_copy(bottom[0]->count(), bottom_data, top_data); + } + for (int n = 0; n < outer_dim_; ++n) { + caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_, + inner_dim_, Dtype(1), Dtype(1), bias_data, + bias_multiplier_.cpu_data(), Dtype(1), top_data); + top_data += dim_; + } +} + +template +void BiasLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (propagate_down[0] && bottom[0] != top[0]) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + caffe_copy(bottom[0]->count(), top_diff, bottom_diff); + } + // in-place, we don't need to do anything with the data diff + const bool bias_param = (bottom.size() == 1); + if ((!bias_param && propagate_down[1]) || + (bias_param && this->param_propagate_down_[0])) { + const Dtype* top_diff = top[0]->cpu_diff(); + Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1]) + ->mutable_cpu_diff(); + bool accum = bias_param; + for (int n = 0; n < outer_dim_; ++n) { + caffe_cpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1), + top_diff, bias_multiplier_.cpu_data(), Dtype(accum), bias_diff); + top_diff += dim_; + accum = true; + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(BiasLayer); +#endif + +INSTANTIATE_CLASS(BiasLayer); +REGISTER_LAYER_CLASS(Bias); + +} // namespace caffe diff --git a/src/caffe/layers/bias_layer.cu b/src/caffe/layers/bias_layer.cu new file mode 100644 index 00000000000..8ac913a5d7b --- /dev/null +++ b/src/caffe/layers/bias_layer.cu @@ -0,0 +1,59 @@ +#include + +#include "caffe/filler.hpp" +#include "caffe/layers/bias_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +__global__ void BiasForward(const int n, const Dtype* in, + const Dtype* bias, const int bias_dim, const int inner_dim, + Dtype* out) { + CUDA_KERNEL_LOOP(index, n) { + const int bias_index = (index / inner_dim) % bias_dim; + out[index] = in[index] + bias[bias_index]; + } +} + +template +void BiasLayer::Forward_gpu(const vector*>& bottom, + const vector*>& top) { + const int count = top[0]->count(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + const Dtype* bias_data = + ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + BiasForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, bias_data, bias_dim_, inner_dim_, top_data); +} + +template +void BiasLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (propagate_down[0] && bottom[0] != top[0]) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + caffe_copy(bottom[0]->count(), top_diff, bottom_diff); + } + // in-place, we don't need to do anything with the data diff + const bool bias_param = (bottom.size() == 1); + if ((!bias_param && propagate_down[1]) || + (bias_param && this->param_propagate_down_[0])) { + const Dtype* top_diff = top[0]->gpu_diff(); + Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1]) + ->mutable_gpu_diff(); + bool accum = bias_param; + for (int n = 0; n < outer_dim_; ++n) { + caffe_gpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1), + top_diff, bias_multiplier_.gpu_data(), Dtype(accum), bias_diff); + top_diff += dim_; + accum = true; + } + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(BiasLayer); + +} // namespace caffe diff --git a/src/caffe/layers/channelwise_affine_layer.cpp b/src/caffe/layers/channelwise_affine_layer.cpp deleted file mode 100644 index e9f31fb10e3..00000000000 --- a/src/caffe/layers/channelwise_affine_layer.cpp +++ /dev/null @@ -1,189 +0,0 @@ -#include -#include - -#include "caffe/filler.hpp" -#include "caffe/layer.hpp" -#include "caffe/layers/channelwise_affine_layer.hpp" - -namespace caffe { - -template -void ChannelwiseAffineLayer::LayerSetUp( - const vector*>& bottom, - const vector*>& top) { - CHECK_GE(bottom[0]->num_axes(), 2) - << "Number of axes of bottom blob must be >=2."; - ChannelwiseAffineParameter channelwise_affine_param = - this->layer_param().channelwise_affine_param(); - int channels = bottom[0]->channels(); - channel_shared_ = channelwise_affine_param.channel_shared(); - if (this->blobs_.size() > 0) { - LOG(INFO) << "Skipping parameter initialization"; - } else { - this->blobs_.resize(2); - if (channel_shared_) { - this->blobs_[0].reset(new Blob(vector(0))); - this->blobs_[1].reset(new Blob(vector(0))); - - } else { - this->blobs_[0].reset(new Blob(vector(1, channels))); - this->blobs_[1].reset(new Blob(vector(1, channels))); - } - shared_ptr > filler; - if (channelwise_affine_param.has_slope_filler()) { - filler.reset(GetFiller(channelwise_affine_param.slope_filler())); - } else { - FillerParameter filler_param; - filler_param.set_type("constant"); - filler_param.set_value(1.0001); - filler.reset(GetFiller(filler_param)); - } - filler->Fill(this->blobs_[0].get()); - - if (channelwise_affine_param.has_bias_filler()) { - filler.reset(GetFiller(channelwise_affine_param.bias_filler())); - } else { - FillerParameter filler_param; - filler_param.set_type("constant"); - filler_param.set_value(0.0001); - filler.reset(GetFiller(filler_param)); - } - filler->Fill(this->blobs_[1].get()); - } - if (channel_shared_) { - CHECK_EQ(this->blobs_[0]->count(), 1) - << "Slope size is inconsistent with prototxt config"; - } else { - CHECK_EQ(this->blobs_[0]->count(), channels) - << "Slope size is inconsistent with prototxt config"; - } - - // Propagate gradients to the parameters (as directed by backward pass). - this->param_propagate_down_.resize(this->blobs_.size(), true); - multiplier_.Reshape(vector(1, bottom[0]->count(1))); - bias_multiplier_.Reshape(vector(1, bottom[0]->count(1))); - backward_buff_.Reshape(vector(1, bottom[0]->count(1))); - caffe_set(multiplier_.count(), Dtype(1.0), - multiplier_.mutable_cpu_data()); - caffe_set(bias_multiplier_.count(), Dtype(1.0), - bias_multiplier_.mutable_cpu_data()); -} - -template -void ChannelwiseAffineLayer::Reshape( - const vector*>& bottom, - const vector*>& top) { - CHECK_GE(bottom[0]->num_axes(), 2) - << "Number of axes of bottom blob must be >=2."; - top[0]->ReshapeLike(*bottom[0]); - if (bottom[0] == top[0]) { - // For in-place computation - bottom_memory_.ReshapeLike(*bottom[0]); - } - int height = 1; - int width = 1; - if (bottom[0]->num_axes() > 2) { - height = bottom[0]->shape(2); - width = bottom[0]->shape(3); - } - vector bias_multiplier_shape(1, height * width); - bias_multiplier_.Reshape(bias_multiplier_shape); - caffe_set(bias_multiplier_.count(), Dtype(1), - bias_multiplier_.mutable_cpu_data()); -} - -template -void ChannelwiseAffineLayer::Forward_cpu( - const vector*>& bottom, - const vector*>& top) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - Dtype* top_data = top[0]->mutable_cpu_data(); - const int count = bottom[0]->count(); - const int dim = bottom[0]->count(2); - const int channels = bottom[0]->channels(); - const Dtype* slope_data = this->blobs_[0]->cpu_data(); - const Dtype* bias_data = this->blobs_[1]->cpu_data(); - // For in-place computation - if (bottom[0] == top[0]) { - caffe_copy(count, bottom_data, bottom_memory_.mutable_cpu_data()); - } - // if channel_shared, channel index in the following computation becomes - // always zero. - const int div_factor = channel_shared_ ? channels : 1; - for (int i = 0; i < count; ++i) { - int c = (i / dim) % channels / div_factor; - top_data[i] = bottom_data[i] * slope_data[c] + bias_data[c]; - } -} - -template -void ChannelwiseAffineLayer::Backward_cpu( - const vector*>& top, - const vector& propagate_down, - const vector*>& bottom) { - const Dtype* bottom_data = bottom[0]->cpu_data(); - const Dtype* slope_data = this->blobs_[0]->cpu_data(); - - const Dtype* top_diff = top[0]->cpu_diff(); - const int count = bottom[0]->count(); - const int dim = bottom[0]->count(2); - const int channels = bottom[0]->shape(1); - const int num = bottom[0]->shape(0); - int height = 1; - int width = 1; - if (bottom[0]->num_axes() > 2) { - height = bottom[0]->shape(2); - width = bottom[0]->shape(3); - } - - // For in-place computation - if (top[0] == bottom[0]) { - bottom_data = bottom_memory_.cpu_data(); - } - - // if channel_shared, channel index in the following computation becomes - // always zero. - const int div_factor = channel_shared_ ? channels : 1; - - // Propagte to param - // Since to write bottom diff will affect top diff if top and bottom blobs - // are identical (in-place computaion), we first compute param backward to - // keep top_diff unchanged. - - if (this->param_propagate_down_[1]) { - Dtype* bias_diff = this->blobs_[1]->mutable_cpu_diff(); - caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff); - for (int n = 0; n < num; ++n) { - caffe_cpu_gemv(CblasNoTrans, channels, height * width, 1., - top_diff + top[0]->offset(n), - bias_multiplier_.cpu_data(), 1., bias_diff); - } - } - if (this->param_propagate_down_[0]) { - Dtype* slope_diff = this->blobs_[0]->mutable_cpu_diff(); - caffe_set(this->blobs_[0]->count(), Dtype(0), slope_diff); - for (int i = 0; i < count; ++i) { - int c = (i / dim) % channels / div_factor; - slope_diff[c] += top_diff[i] * bottom_data[i]; - } - } - - // Propagate to bottom - if (propagate_down[0]) { - Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); - for (int i = 0; i < count; ++i) { - int c = (i / dim) % channels / div_factor; - bottom_diff[i] = slope_data[c] * top_diff[i]; - } - } -} - - -#ifdef CPU_ONLY -STUB_GPU(ChannelwiseAffineLayer); -#endif - -INSTANTIATE_CLASS(ChannelwiseAffineLayer); -REGISTER_LAYER_CLASS(ChannelwiseAffine); - -} // namespace caffe diff --git a/src/caffe/layers/channelwise_affine_layer.cu b/src/caffe/layers/channelwise_affine_layer.cu deleted file mode 100644 index 2066b26560b..00000000000 --- a/src/caffe/layers/channelwise_affine_layer.cu +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include - -#include "caffe/layer.hpp" -#include "caffe/layers/channelwise_affine_layer.hpp" - -namespace caffe { - -// CUDA kernel for forward -template -__global__ void ChannelwiseAffineForward(const int n, const int channels, - const int dim, const Dtype* in, Dtype* out, const Dtype* slope_data, - const Dtype* bias_data, const int div_factor) { - CUDA_KERNEL_LOOP(index, n) { - int c = (index / dim) % channels / div_factor; - out[index] = in[index] * slope_data[c] + bias_data[c]; - } -} - -// CUDA kernel for bottom backward -template -__global__ void ChannelwiseAffineBackward(const int n, - const int channels, const int dim, const Dtype* in_diff, - Dtype* out_diff, const Dtype* slope_data, const int div_factor) { - CUDA_KERNEL_LOOP(index, n) { - int c = (index / dim) % channels / div_factor; - out_diff[index] = slope_data[c] * in_diff[index]; - } -} - -// CUDA kernel for element-wise parameter backward -template -__global__ void ChannelwiseAffineParamSlopeBackward(const int n, - const int rows, const int rowPitch, const Dtype* in_diff, - const Dtype* in_data, Dtype* out_diff) { - CUDA_KERNEL_LOOP(index, n) { - out_diff[index] = in_diff[index] * in_data[index]; - for ( int k = 1; k < rows; k++ ) { - out_diff[index] += in_diff[index + k*rowPitch] - * in_data[index + k*rowPitch]; - } - } -} - -template -void ChannelwiseAffineLayer::Forward_gpu( - const vector*>& bottom, - const vector*>& top) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - Dtype* top_data = top[0]->mutable_gpu_data(); - const int count = bottom[0]->count(); - const int dim = bottom[0]->count(2); - const int channels = bottom[0]->channels(); - const Dtype* slope_data = this->blobs_[0]->gpu_data(); - const Dtype* bias_data = this->blobs_[1]->gpu_data(); - const int div_factor = channel_shared_ ? channels : 1; - - // For in-place computation - if (top[0] == bottom[0]) { - caffe_copy(count, bottom_data, bottom_memory_.mutable_gpu_data()); - } - // NOLINT_NEXT_LINE(whitespace/operators) - ChannelwiseAffineForward<<>>( - count, channels, dim, bottom_data, top_data, - slope_data, bias_data, div_factor); - CUDA_POST_KERNEL_CHECK; -} - -template -void ChannelwiseAffineLayer::Backward_gpu( - const vector*>& top, - const vector& propagate_down, - const vector*>& bottom) { - const Dtype* bottom_data = bottom[0]->gpu_data(); - const Dtype* top_diff = top[0]->gpu_diff(); - const int count = bottom[0]->count(); - const int num = bottom[0]->shape(0); - const int dim = bottom[0]->count(2); - const int channels = bottom[0]->shape(1); - int height = 1; - int width = 1; - if (bottom[0]->num_axes() > 2) { - height = bottom[0]->shape(2); - width = bottom[0]->shape(3); - } - - // For in-place computation - if (top[0] == bottom[0]) { - bottom_data = bottom_memory_.gpu_data(); - } - // Propagate to param - // Since to write bottom diff will affect top diff if top and bottom blobs - // are identical (in-place computaion), we first compute param backward to - // keep top_diff unchanged. - if (this->param_propagate_down_[1]) { - Dtype* bias_diff = this->blobs_[1]->mutable_gpu_diff(); - caffe_gpu_set(this->blobs_[1]->count(), Dtype(0.0), bias_diff); - // Gradient with respect to bias - for (int n = 0; n < num; ++n) { - caffe_gpu_gemv( - CblasNoTrans, channels, height * width, (Dtype)1., - top_diff + top[0]->offset(n), bias_multiplier_.gpu_data(), - (Dtype)1., bias_diff); - } - } - if (this->param_propagate_down_[0]) { - Dtype* slope_diff = this->blobs_[0]->mutable_gpu_diff(); - int cdim = channels * dim; - // compute element-wise diff - // NOLINT_NEXT_LINE(whitespace/operators) - ChannelwiseAffineParamSlopeBackward<<>>( - cdim, num, top[0]->offset(1), top_diff , - bottom_data, - backward_buff_.mutable_gpu_diff()); - CUDA_POST_KERNEL_CHECK; - if (channel_shared_) { - Dtype d = 0; - caffe_gpu_dot(cdim, backward_buff_.gpu_diff(), - multiplier_.gpu_data(), &d); - caffe_gpu_add_scalar(this->blobs_[0]->count(), Dtype(d), slope_diff); - } else { - caffe_gpu_gemv(CblasNoTrans, channels, dim, Dtype(1.), - backward_buff_.gpu_diff(), multiplier_.gpu_data(), Dtype(1.), - slope_diff); - } - } - // Propagate to bottom - if (propagate_down[0]) { - Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); - const Dtype* slope_data = this->blobs_[0]->gpu_data(); - int div_factor = channel_shared_ ? channels : 1; - // NOLINT_NEXT_LINE(whitespace/operators) - ChannelwiseAffineBackward<<>>( - count, channels, dim, top_diff, bottom_diff, slope_data, div_factor); - CUDA_POST_KERNEL_CHECK; - } -} - -INSTANTIATE_LAYER_GPU_FUNCS(ChannelwiseAffineLayer); - -} // namespace caffe diff --git a/src/caffe/layers/scale_layer.cpp b/src/caffe/layers/scale_layer.cpp new file mode 100644 index 00000000000..2af65450b51 --- /dev/null +++ b/src/caffe/layers/scale_layer.cpp @@ -0,0 +1,219 @@ +#include +#include + +#include "caffe/filler.hpp" +#include "caffe/layer_factory.hpp" +#include "caffe/layers/scale_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +void ScaleLayer::LayerSetUp(const vector*>& bottom, + const vector*>& top) { + const ScaleParameter& param = this->layer_param_.scale_param(); + if (bottom.size() == 1 && this->blobs_.size() > 0) { + LOG(INFO) << "Skipping parameter initialization"; + } else if (bottom.size() == 1) { + // scale is a learned parameter; initialize it + axis_ = bottom[0]->CanonicalAxisIndex(param.axis()); + const int num_axes = param.num_axes(); + CHECK_GE(num_axes, -1) << "num_axes must be non-negative, " + << "or -1 to extend to the end of bottom[0]"; + if (num_axes >= 0) { + CHECK_GE(bottom[0]->num_axes(), axis_ + num_axes) + << "scale blob's shape extends past bottom[0]'s shape when applied " + << "starting with bottom[0] axis = " << axis_; + } + this->blobs_.resize(1); + const vector::const_iterator& shape_start = + bottom[0]->shape().begin() + axis_; + const vector::const_iterator& shape_end = + (num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes); + vector scale_shape(shape_start, shape_end); + this->blobs_[0].reset(new Blob(scale_shape)); + FillerParameter filler_param(param.filler()); + if (!param.has_filler()) { + // Default to unit (1) filler for identity operation. + filler_param.set_type("constant"); + filler_param.set_value(1); + } + shared_ptr > filler(GetFiller(filler_param)); + filler->Fill(this->blobs_[0].get()); + } + if (param.bias_term()) { + LayerParameter layer_param(this->layer_param_); + layer_param.set_type("Bias"); + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(param.axis()); + if (bottom.size() > 1) { + bias_param->set_num_axes(bottom[1]->num_axes()); + } else { + bias_param->set_num_axes(param.num_axes()); + } + bias_param->mutable_filler()->CopyFrom(param.bias_filler()); + bias_layer_ = LayerRegistry::CreateLayer(layer_param); + bias_bottom_vec_.resize(1); + bias_bottom_vec_[0] = bottom[0]; + bias_layer_->SetUp(bias_bottom_vec_, top); + bias_param_id_ = this->blobs_.size(); + this->blobs_.resize(bias_param_id_ + 1); + this->blobs_[bias_param_id_] = bias_layer_->blobs()[0]; + bias_propagate_down_.resize(1, false); + } + this->param_propagate_down_.resize(this->blobs_.size(), true); +} + +template +void ScaleLayer::Reshape(const vector*>& bottom, + const vector*>& top) { + const ScaleParameter& param = this->layer_param_.scale_param(); + Blob* scale = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get(); + // Always set axis_ == 0 in special case where scale is an actual scale + // (num_axes == 0). Mathematically equivalent for any choice of axis_, so the + // actual setting can be safely ignored; and computation is most efficient + // with axis_ == 0 and (therefore) outer_dim_ == 1. (Setting axis_ to + // bottom[0]->num_axes() - 1, giving inner_dim_ == 1, would be equally + // performant.) + axis_ = (scale->num_axes() == 0) ? + 0 : bottom[0]->CanonicalAxisIndex(param.axis()); + CHECK_GE(bottom[0]->num_axes(), axis_ + scale->num_axes()) + << "scale blob's shape extends past bottom[0]'s shape when applied " + << "starting with bottom[0] axis = " << axis_; + for (int i = 0; i < scale->num_axes(); ++i) { + CHECK_EQ(bottom[0]->shape(axis_ + i), scale->shape(i)) + << "dimension mismatch between bottom[0]->shape(" << axis_ + i + << ") and scale->shape(" << i << ")"; + } + outer_dim_ = bottom[0]->count(0, axis_); + scale_dim_ = scale->count(); + inner_dim_ = bottom[0]->count(axis_ + scale->num_axes()); + if (bottom[0] == top[0]) { // in-place computation + temp_.ReshapeLike(*bottom[0]); + } else { + top[0]->ReshapeLike(*bottom[0]); + } + sum_result_.Reshape(vector(1, outer_dim_ * scale_dim_)); + const int sum_mult_size = std::max(outer_dim_, inner_dim_); + sum_multiplier_.Reshape(vector(1, sum_mult_size)); + if (sum_multiplier_.cpu_data()[sum_mult_size - 1] != Dtype(1)) { + caffe_set(sum_mult_size, Dtype(1), sum_multiplier_.mutable_cpu_data()); + } + if (bias_layer_) { + bias_bottom_vec_[0] = top[0]; + bias_layer_->Reshape(bias_bottom_vec_, top); + } +} + +template +void ScaleLayer::Forward_cpu( + const vector*>& bottom, const vector*>& top) { + const Dtype* bottom_data = bottom[0]->cpu_data(); + if (bottom[0] == top[0]) { + // In-place computation; need to store bottom data before overwriting it. + // Note that this is only necessary for Backward; we could skip this if not + // doing Backward, but Caffe currently provides no way of knowing whether + // we'll need to do Backward at the time of the Forward call. + caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(), + temp_.mutable_cpu_data()); + } + const Dtype* scale_data = + ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data(); + Dtype* top_data = top[0]->mutable_cpu_data(); + for (int n = 0; n < outer_dim_; ++n) { + for (int d = 0; d < scale_dim_; ++d) { + const Dtype factor = scale_data[d]; + caffe_cpu_scale(inner_dim_, factor, bottom_data, top_data); + bottom_data += inner_dim_; + top_data += inner_dim_; + } + } + if (bias_layer_) { + bias_layer_->Forward(bias_bottom_vec_, top); + } +} + +template +void ScaleLayer::Backward_cpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (bias_layer_ && + this->param_propagate_down_[this->param_propagate_down_.size() - 1]) { + bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_); + } + const bool scale_param = (bottom.size() == 1); + Blob* scale = scale_param ? this->blobs_[0].get() : bottom[1]; + if ((!scale_param && propagate_down[1]) || + (scale_param && this->param_propagate_down_[0])) { + const Dtype* top_diff = top[0]->cpu_diff(); + const bool in_place = (bottom[0] == top[0]); + const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data(); + // Hack: store big eltwise product in bottom[0] diff, except in the special + // case where this layer itself does the eltwise product, in which case we + // can store it directly in the scale diff, and we're done. + // If we're computing in-place (and not doing eltwise computation), this + // hack doesn't work and we store the product in temp_. + const bool is_eltwise = (bottom[0]->count() == scale->count()); + Dtype* product = (is_eltwise ? scale->mutable_cpu_diff() : + (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff())); + caffe_mul(top[0]->count(), top_diff, bottom_data, product); + if (!is_eltwise) { + Dtype* sum_result = NULL; + if (inner_dim_ == 1) { + sum_result = product; + } else if (sum_result_.count() == 1) { + const Dtype* sum_mult = sum_multiplier_.cpu_data(); + Dtype* scale_diff = scale->mutable_cpu_diff(); + if (scale_param) { + Dtype result = caffe_cpu_dot(inner_dim_, product, sum_mult); + *scale_diff += result; + } else { + *scale_diff = caffe_cpu_dot(inner_dim_, product, sum_mult); + } + } else { + const Dtype* sum_mult = sum_multiplier_.cpu_data(); + sum_result = (outer_dim_ == 1) ? + scale->mutable_cpu_diff() : sum_result_.mutable_cpu_data(); + caffe_cpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, + Dtype(1), product, sum_mult, Dtype(0), sum_result); + } + if (outer_dim_ != 1) { + const Dtype* sum_mult = sum_multiplier_.cpu_data(); + Dtype* scale_diff = scale->mutable_cpu_diff(); + if (scale_dim_ == 1) { + if (scale_param) { + Dtype result = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); + *scale_diff += result; + } else { + *scale_diff = caffe_cpu_dot(outer_dim_, sum_mult, sum_result); + } + } else { + caffe_cpu_gemv(CblasTrans, outer_dim_, scale_dim_, + Dtype(1), sum_result, sum_mult, Dtype(scale_param), + scale_diff); + } + } + } + } + if (propagate_down[0]) { + const Dtype* top_diff = top[0]->cpu_diff(); + const Dtype* scale_data = scale->cpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_cpu_diff(); + for (int n = 0; n < outer_dim_; ++n) { + for (int d = 0; d < scale_dim_; ++d) { + const Dtype factor = scale_data[d]; + caffe_cpu_scale(inner_dim_, factor, top_diff, bottom_diff); + bottom_diff += inner_dim_; + top_diff += inner_dim_; + } + } + } +} + +#ifdef CPU_ONLY +STUB_GPU(ScaleLayer); +#endif + +INSTANTIATE_CLASS(ScaleLayer); +REGISTER_LAYER_CLASS(Scale); + +} // namespace caffe diff --git a/src/caffe/layers/scale_layer.cu b/src/caffe/layers/scale_layer.cu new file mode 100644 index 00000000000..fc9a8064db5 --- /dev/null +++ b/src/caffe/layers/scale_layer.cu @@ -0,0 +1,135 @@ +#include +#include + +#include "caffe/layers/scale_layer.hpp" +#include "caffe/util/math_functions.hpp" + +namespace caffe { + +template +__global__ void ScaleForward(const int n, const Dtype* in, + const Dtype* scale, const int scale_dim, const int inner_dim, + Dtype* out) { + CUDA_KERNEL_LOOP(index, n) { + const int scale_index = (index / inner_dim) % scale_dim; + out[index] = in[index] * scale[scale_index]; + } +} + +template +__global__ void ScaleBiasForward(const int n, const Dtype* in, + const Dtype* scale, const Dtype* bias, + const int scale_dim, const int inner_dim, Dtype* out) { + CUDA_KERNEL_LOOP(index, n) { + const int scale_index = (index / inner_dim) % scale_dim; + out[index] = in[index] * scale[scale_index] + bias[scale_index]; + } +} + +template +void ScaleLayer::Forward_gpu( + const vector*>& bottom, const vector*>& top) { + const int count = top[0]->count(); + const Dtype* bottom_data = bottom[0]->gpu_data(); + if (bottom[0] == top[0]) { + // in-place computation; need to store bottom data before overwriting it. + // Note that this is only necessary for Backward; we could skip this if not + // doing Backward, but Caffe currently provides no way of knowing whether + // we'll need to do Backward at the time of the Forward call. + caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(), + temp_.mutable_gpu_data()); + } + const Dtype* scale_data = + ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data(); + Dtype* top_data = top[0]->mutable_gpu_data(); + if (bias_layer_) { + const Dtype* bias_data = this->blobs_[bias_param_id_]->gpu_data(); + ScaleBiasForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, scale_data, bias_data, scale_dim_, inner_dim_, + top_data); + } else { + ScaleForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, bottom_data, scale_data, scale_dim_, inner_dim_, top_data); + } +} + +template +void ScaleLayer::Backward_gpu(const vector*>& top, + const vector& propagate_down, const vector*>& bottom) { + if (bias_layer_ && + this->param_propagate_down_[this->param_propagate_down_.size() - 1]) { + bias_layer_->Backward(top, bias_propagate_down_, bias_bottom_vec_); + } + const bool scale_param = (bottom.size() == 1); + Blob* scale = scale_param ? this->blobs_[0].get() : bottom[1]; + if ((!scale_param && propagate_down[1]) || + (scale_param && this->param_propagate_down_[0])) { + const Dtype* top_diff = top[0]->gpu_diff(); + const bool in_place = (bottom[0] == top[0]); + const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->gpu_data(); + // Hack: store big eltwise product in bottom[0] diff, except in the special + // case where this layer itself does the eltwise product, in which case we + // can store it directly in the scale diff, and we're done. + // If we're computing in-place (and not doing eltwise computation), this + // hack doesn't work and we store the product in temp_. + const bool is_eltwise = (bottom[0]->count() == scale->count()); + Dtype* product = (is_eltwise ? scale->mutable_gpu_diff() : + (in_place ? temp_.mutable_gpu_data() : bottom[0]->mutable_gpu_diff())); + caffe_gpu_mul(top[0]->count(), top_diff, bottom_data, product); + if (!is_eltwise) { + Dtype* sum_result = NULL; + if (inner_dim_ == 1) { + sum_result = product; + } else if (sum_result_.count() == 1) { + const Dtype* sum_mult = sum_multiplier_.gpu_data(); + Dtype* scale_diff = scale->mutable_cpu_diff(); + if (scale_param) { + Dtype result; + caffe_gpu_dot(inner_dim_, product, sum_mult, &result); + *scale_diff += result; + } else { + caffe_gpu_dot(inner_dim_, product, sum_mult, scale_diff); + } + } else { + const Dtype* sum_mult = sum_multiplier_.gpu_data(); + sum_result = (outer_dim_ == 1) ? + scale->mutable_gpu_diff() : sum_result_.mutable_gpu_data(); + caffe_gpu_gemv(CblasNoTrans, sum_result_.count(), inner_dim_, + Dtype(1), product, sum_mult, Dtype(0), sum_result); + } + if (outer_dim_ != 1) { + const Dtype* sum_mult = sum_multiplier_.gpu_data(); + if (scale_dim_ == 1) { + Dtype* scale_diff = scale->mutable_cpu_diff(); + if (scale_param) { + Dtype result; + caffe_gpu_dot(outer_dim_, sum_mult, sum_result, &result); + *scale_diff += result; + } else { + caffe_gpu_dot(outer_dim_, sum_mult, sum_result, scale_diff); + } + } else { + Dtype* scale_diff = scale->mutable_gpu_diff(); + caffe_gpu_gemv(CblasTrans, outer_dim_, scale_dim_, + Dtype(1), sum_result, sum_mult, Dtype(scale_param), + scale_diff); + } + } + } + } + if (propagate_down[0]) { + const int count = top[0]->count(); + const Dtype* top_diff = top[0]->gpu_diff(); + const Dtype* scale_data = scale->gpu_data(); + Dtype* bottom_diff = bottom[0]->mutable_gpu_diff(); + ScaleForward // NOLINT_NEXT_LINE(whitespace/operators) + <<>>( + count, top_diff, scale_data, scale_dim_, inner_dim_, bottom_diff); + } +} + +INSTANTIATE_LAYER_GPU_FUNCS(ScaleLayer); + +} // namespace caffe diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto index fe6209cf673..6493a72d778 100644 --- a/src/caffe/proto/caffe.proto +++ b/src/caffe/proto/caffe.proto @@ -306,7 +306,7 @@ message ParamSpec { // NOTE // Update the next available ID when you add a new LayerParameter field. // -// LayerParameter next available layer-specific ID: 142 (last added: channelwise_affine_param) +// LayerParameter next available layer-specific ID: 143 (last added: scale_param) message LayerParameter { optional string name = 1; // the layer name optional string type = 2; // the layer type @@ -356,7 +356,7 @@ message LayerParameter { optional AccuracyParameter accuracy_param = 102; optional ArgMaxParameter argmax_param = 103; optional BatchNormParameter batch_norm_param = 139; - optional ChannelwiseAffineParameter channelwise_affine_param = 141; + optional BiasParameter bias_param = 141; optional ConcatParameter concat_param = 104; optional ContrastiveLossParameter contrastive_loss_param = 105; optional ConvolutionParameter convolution_param = 106; @@ -385,6 +385,7 @@ message LayerParameter { optional ReductionParameter reduction_param = 136; optional ReLUParameter relu_param = 123; optional ReshapeParameter reshape_param = 133; + optional ScaleParameter scale_param = 142; optional SigmoidParameter sigmoid_param = 124; optional SoftmaxParameter softmax_param = 125; optional SPPParameter spp_param = 132; @@ -499,15 +500,36 @@ message BatchNormParameter { optional float eps = 3 [default = 1e-5]; } -message ChannelwiseAffineParameter { - - // Initial value of a_i. Default is a_i=1.0 for all i. - optional FillerParameter slope_filler = 1; - - optional FillerParameter bias_filler = 2; +message BiasParameter { + // The first axis of bottom[0] (the first input Blob) along which to apply + // bottom[1] (the second input Blob). May be negative to index from the end + // (e.g., -1 for the last axis). + // + // For example, if bottom[0] is 4D with shape 100x3x40x60, the output + // top[0] will have the same shape, and bottom[1] may have any of the + // following shapes (for the given value of axis): + // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 + // (axis == 1 == -3) 3; 3x40; 3x40x60 + // (axis == 2 == -2) 40; 40x60 + // (axis == 3 == -1) 60 + // Furthermore, bottom[1] may have the empty shape (regardless of the value of + // "axis") -- a scalar bias. + optional int32 axis = 1 [default = 1]; - // Whether or not slope paramters are shared across channels. - optional bool channel_shared = 3 [default = false]; + // (num_axes is ignored unless just one bottom is given and the bias is + // a learned parameter of the layer. Otherwise, num_axes is determined by the + // number of axes by the second bottom.) + // The number of axes of the input (bottom[0]) covered by the bias + // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. + // Set num_axes := 0, to add a zero-axis Blob: a scalar. + optional int32 num_axes = 2 [default = 1]; + + // (filler is ignored unless just one bottom is given and the bias is + // a learned parameter of the layer.) + // The initialization for the learned bias parameter. + // Default is the zero (0) initialization, resulting in the BiasLayer + // initially performing the identity operation. + optional FillerParameter filler = 3; } message ContrastiveLossParameter { @@ -972,6 +994,43 @@ message ReshapeParameter { optional int32 num_axes = 3 [default = -1]; } +message ScaleParameter { + // The first axis of bottom[0] (the first input Blob) along which to apply + // bottom[1] (the second input Blob). May be negative to index from the end + // (e.g., -1 for the last axis). + // + // For example, if bottom[0] is 4D with shape 100x3x40x60, the output + // top[0] will have the same shape, and bottom[1] may have any of the + // following shapes (for the given value of axis): + // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 + // (axis == 1 == -3) 3; 3x40; 3x40x60 + // (axis == 2 == -2) 40; 40x60 + // (axis == 3 == -1) 60 + // Furthermore, bottom[1] may have the empty shape (regardless of the value of + // "axis") -- a scalar multiplier. + optional int32 axis = 1 [default = 1]; + + // (num_axes is ignored unless just one bottom is given and the scale is + // a learned parameter of the layer. Otherwise, num_axes is determined by the + // number of axes by the second bottom.) + // The number of axes of the input (bottom[0]) covered by the scale + // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. + // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. + optional int32 num_axes = 2 [default = 1]; + + // (filler is ignored unless just one bottom is given and the scale is + // a learned parameter of the layer.) + // The initialization for the learned scale parameter. + // Default is the unit (1) initialization, resulting in the ScaleLayer + // initially performing the identity operation. + optional FillerParameter filler = 3; + + // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but + // may be more efficient). Initialized with bias_filler (defaults to 0). + optional bool bias_term = 4 [default = false]; + optional FillerParameter bias_filler = 5; +} + message SigmoidParameter { enum Engine { DEFAULT = 0; diff --git a/src/caffe/test/test_bias_layer.cpp b/src/caffe/test/test_bias_layer.cpp new file mode 100644 index 00000000000..0d23d3f453c --- /dev/null +++ b/src/caffe/test/test_bias_layer.cpp @@ -0,0 +1,461 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layers/bias_layer.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class BiasLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + BiasLayerTest() + : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_bottom_eltwise_(new Blob(2, 3, 4, 5)), + blob_bottom_broadcast_0_(new Blob()), + blob_bottom_broadcast_1_(new Blob()), + blob_bottom_broadcast_2_(new Blob()), + blob_bottom_bias_(new Blob(vector())), + blob_top_(new Blob()) { + Caffe::set_random_seed(1701); + vector broadcast_shape(2); + broadcast_shape[0] = 2; broadcast_shape[1] = 3; + this->blob_bottom_broadcast_0_->Reshape(broadcast_shape); + broadcast_shape[0] = 3; broadcast_shape[1] = 4; + this->blob_bottom_broadcast_1_->Reshape(broadcast_shape); + broadcast_shape[0] = 4; broadcast_shape[1] = 5; + this->blob_bottom_broadcast_2_->Reshape(broadcast_shape); + FillerParameter filler_param; + filler_param.set_min(1); + filler_param.set_max(10); + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_bottom_eltwise_); + filler.Fill(this->blob_bottom_broadcast_0_); + filler.Fill(this->blob_bottom_broadcast_1_); + filler.Fill(this->blob_bottom_broadcast_2_); + filler.Fill(this->blob_bottom_bias_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~BiasLayerTest() { + delete blob_bottom_; + delete blob_bottom_eltwise_; + delete blob_bottom_broadcast_0_; + delete blob_bottom_broadcast_1_; + delete blob_bottom_broadcast_2_; + delete blob_bottom_bias_; + delete blob_top_; + } + Blob* const blob_bottom_; + Blob* const blob_bottom_eltwise_; + Blob* const blob_bottom_broadcast_0_; + Blob* const blob_bottom_broadcast_1_; + Blob* const blob_bottom_broadcast_2_; + Blob* const blob_bottom_bias_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(BiasLayerTest, TestDtypesAndDevices); + +TYPED_TEST(BiasLayerTest, TestForwardEltwise) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data_a = this->blob_bottom_->cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_bottom_->cpu_data(); + const int count = this->blob_bottom_->count(); + const Dtype* in_data_a = orig_bottom.cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestBackwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + shared_ptr > layer(new BiasLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_bias_diff; + orig_bias_diff.CopyFrom(*this->blob_bottom_eltwise_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_eltwise_->count(); ++i) { + EXPECT_NEAR(orig_bias_diff.cpu_diff()[i], + this->blob_bottom_eltwise_->cpu_diff()[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardEltwiseWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(0); + bias_param->set_num_axes(-1); + bias_param->mutable_filler()->set_type("gaussian"); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data_a = this->blob_bottom_->cpu_data(); + const Dtype* in_data_b = layer->blobs()[0]->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] + in_data_b[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastBegin) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_); + LayerParameter layer_param; + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) + + this->blob_bottom_broadcast_0_->data_at(n, c, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddle) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(1); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) + + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(1); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_bottom_->data_at(n, c, h, w), + orig_bottom.data_at(n, c, h, w) + + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestBackwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(1); + shared_ptr > layer(new BiasLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_bias_diff; + orig_bias_diff.CopyFrom(*this->blob_bottom_broadcast_1_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_broadcast_1_->count(); ++i) { + EXPECT_NEAR(orig_bias_diff.cpu_diff()[i], + this->blob_bottom_broadcast_1_->cpu_diff()[i], 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastMiddleWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(1); + bias_param->set_num_axes(2); + bias_param->mutable_filler()->set_type("gaussian"); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) + + layer->blobs()[0]->data_at(c, h, 0, 0), 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBroadcastEnd) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(2); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) + + this->blob_bottom_broadcast_2_->data_at(h, w, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBias) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_bias_); + LayerParameter layer_param; + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data = this->blob_bottom_->cpu_data(); + const Dtype bias = *this->blob_bottom_bias_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data[i] + bias, 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestForwardBiasAxis2) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_bias_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(2); + shared_ptr > layer(new BiasLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data = this->blob_bottom_->cpu_data(); + const Dtype bias = *this->blob_bottom_bias_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data[i] + bias, 1e-5); + } +} + +TYPED_TEST(BiasLayerTest, TestGradientEltwise) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientEltwiseWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(0); + bias_param->set_num_axes(-1); + bias_param->mutable_filler()->set_type("gaussian"); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBroadcastBegin) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_); + LayerParameter layer_param; + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBroadcastMiddle) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(1); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBroadcastMiddleWithParam) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + BiasParameter* bias_param = layer_param.mutable_bias_param(); + bias_param->set_axis(1); + bias_param->set_num_axes(2); + bias_param->mutable_filler()->set_type("gaussian"); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBroadcastEnd) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(2); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBias) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_bias_); + LayerParameter layer_param; + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(BiasLayerTest, TestGradientBiasAxis2) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_bias_); + LayerParameter layer_param; + layer_param.mutable_bias_param()->set_axis(2); + BiasLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe diff --git a/src/caffe/test/test_channelwise_affine_layer.cpp b/src/caffe/test/test_channelwise_affine_layer.cpp deleted file mode 100644 index a3e2544f77a..00000000000 --- a/src/caffe/test/test_channelwise_affine_layer.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include - -#include "gtest/gtest.h" - -#include "caffe/blob.hpp" -#include "caffe/common.hpp" -#include "caffe/filler.hpp" -#include "caffe/layers/channelwise_affine_layer.hpp" - -#include "caffe/test/test_caffe_main.hpp" -#include "caffe/test/test_gradient_check_util.hpp" - -namespace caffe { - -template -class ChannelwiseAffineLayerTest : public MultiDeviceTest { - typedef typename TypeParam::Dtype Dtype; - - protected: - ChannelwiseAffineLayerTest() - : blob_bottom_(new Blob(2, 3, 4, 5)), - blob_top_(new Blob()) { - Caffe::set_random_seed(1701); - // fill the values - FillerParameter filler_param; - GaussianFiller filler(filler_param); - filler.Fill(this->blob_bottom_); - blob_bottom_vec_.push_back(blob_bottom_); - blob_top_vec_.push_back(blob_top_); - } - virtual ~ChannelwiseAffineLayerTest() { - delete blob_bottom_; delete blob_top_; } - Blob* const blob_bottom_; - Blob* const blob_top_; - vector*> blob_bottom_vec_; - vector*> blob_top_vec_; - - void TestChannelwiseAffine(ChannelwiseAffineLayer *layer) { - layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); - // Now, check values - const Dtype* bottom_data = this->blob_bottom_->cpu_data(); - const Dtype* top_data = this->blob_top_->cpu_data(); - const Dtype* slope_data = layer->blobs()[0]->cpu_data(); - const Dtype* bias_data = layer->blobs()[1]->cpu_data(); - const Dtype kDelta = 2e-5; - int hw = this->blob_bottom_->height() * this->blob_bottom_->width(); - int channels = this->blob_bottom_->channels(); - bool channel_shared = - layer->layer_param().channelwise_affine_param().channel_shared(); - for (int i = 0; i < this->blob_bottom_->count(); ++i) { - int c = channel_shared ? 0 : (i / hw) % channels; - EXPECT_NEAR(top_data[i], - bottom_data[i]* slope_data[c] + bias_data[c], kDelta); - } - } -}; -TYPED_TEST_CASE(ChannelwiseAffineLayerTest, TestDtypesAndDevices); - - -TYPED_TEST(ChannelwiseAffineLayerTest, TestChannelwiseAffineForward) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - ChannelwiseAffineLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - FillerParameter filler_param; - GaussianFiller filler(filler_param); - filler.Fill(layer.blobs()[0].get()); - filler.Fill(layer.blobs()[1].get()); - this->TestChannelwiseAffine(&layer); -} - -TYPED_TEST(ChannelwiseAffineLayerTest, - TestChannelwiseAffineForwardChannelShared) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - layer_param.mutable_channelwise_affine_param()->set_channel_shared(true); - ChannelwiseAffineLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - this->TestChannelwiseAffine(&layer); -} - -TYPED_TEST(ChannelwiseAffineLayerTest, TestChannelwiseAffineGradient) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - layer_param.mutable_channelwise_affine_param()->set_channel_shared(false); - ChannelwiseAffineLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - -TYPED_TEST(ChannelwiseAffineLayerTest, - TestChannelwiseAffineGradientChannelShared) { - typedef typename TypeParam::Dtype Dtype; - LayerParameter layer_param; - layer_param.mutable_channelwise_affine_param()->set_channel_shared(true); - ChannelwiseAffineLayer layer(layer_param); - layer.SetUp(this->blob_bottom_vec_, this->blob_top_vec_); - GradientChecker checker(1e-2, 1e-3, 1701, 0., 0.01); - checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, - this->blob_top_vec_); -} - -} // namespace caffe diff --git a/src/caffe/test/test_scale_layer.cpp b/src/caffe/test/test_scale_layer.cpp new file mode 100644 index 00000000000..3669c2e1a4f --- /dev/null +++ b/src/caffe/test/test_scale_layer.cpp @@ -0,0 +1,501 @@ +#include +#include + +#include "gtest/gtest.h" + +#include "caffe/blob.hpp" +#include "caffe/common.hpp" +#include "caffe/filler.hpp" +#include "caffe/layers/scale_layer.hpp" + +#include "caffe/test/test_caffe_main.hpp" +#include "caffe/test/test_gradient_check_util.hpp" + +namespace caffe { + +template +class ScaleLayerTest : public MultiDeviceTest { + typedef typename TypeParam::Dtype Dtype; + + protected: + ScaleLayerTest() + : blob_bottom_(new Blob(2, 3, 4, 5)), + blob_bottom_eltwise_(new Blob(2, 3, 4, 5)), + blob_bottom_broadcast_0_(new Blob()), + blob_bottom_broadcast_1_(new Blob()), + blob_bottom_broadcast_2_(new Blob()), + blob_bottom_scale_(new Blob(vector())), + blob_top_(new Blob()) { + Caffe::set_random_seed(1701); + vector broadcast_shape(2); + broadcast_shape[0] = 2; broadcast_shape[1] = 3; + this->blob_bottom_broadcast_0_->Reshape(broadcast_shape); + broadcast_shape[0] = 3; broadcast_shape[1] = 4; + this->blob_bottom_broadcast_1_->Reshape(broadcast_shape); + broadcast_shape[0] = 4; broadcast_shape[1] = 5; + this->blob_bottom_broadcast_2_->Reshape(broadcast_shape); + FillerParameter filler_param; + filler_param.set_min(1); + filler_param.set_max(10); + UniformFiller filler(filler_param); + filler.Fill(this->blob_bottom_); + filler.Fill(this->blob_bottom_eltwise_); + filler.Fill(this->blob_bottom_broadcast_0_); + filler.Fill(this->blob_bottom_broadcast_1_); + filler.Fill(this->blob_bottom_broadcast_2_); + filler.Fill(this->blob_bottom_scale_); + blob_bottom_vec_.push_back(blob_bottom_); + blob_top_vec_.push_back(blob_top_); + } + virtual ~ScaleLayerTest() { + delete blob_bottom_; + delete blob_bottom_eltwise_; + delete blob_bottom_broadcast_0_; + delete blob_bottom_broadcast_1_; + delete blob_bottom_broadcast_2_; + delete blob_bottom_scale_; + delete blob_top_; + } + Blob* const blob_bottom_; + Blob* const blob_bottom_eltwise_; + Blob* const blob_bottom_broadcast_0_; + Blob* const blob_bottom_broadcast_1_; + Blob* const blob_bottom_broadcast_2_; + Blob* const blob_bottom_scale_; + Blob* const blob_top_; + vector*> blob_bottom_vec_; + vector*> blob_top_vec_; +}; + +TYPED_TEST_CASE(ScaleLayerTest, TestDtypesAndDevices); + +TYPED_TEST(ScaleLayerTest, TestForwardEltwise) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data_a = this->blob_bottom_->cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_bottom_->cpu_data(); + const int count = this->blob_bottom_->count(); + const Dtype* in_data_a = orig_bottom.cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestBackwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + shared_ptr > layer(new ScaleLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_scale_diff; + orig_scale_diff.CopyFrom(*this->blob_bottom_eltwise_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_eltwise_->count(); ++i) { + EXPECT_NEAR(orig_scale_diff.cpu_diff()[i], + this->blob_bottom_eltwise_->cpu_diff()[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardEltwiseWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(0); + scale_param->set_num_axes(-1); + scale_param->mutable_filler()->set_type("gaussian"); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data_a = this->blob_bottom_->cpu_data(); + const Dtype* in_data_b = layer->blobs()[0]->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastBegin) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_); + LayerParameter layer_param; + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + this->blob_bottom_broadcast_0_->data_at(n, c, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddle) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(1); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(1); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_bottom_->data_at(n, c, h, w), + orig_bottom.data_at(n, c, h, w) * + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestBackwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(1); + shared_ptr > layer(new ScaleLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_scale_diff; + orig_scale_diff.CopyFrom(*this->blob_bottom_broadcast_1_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_broadcast_1_->count(); ++i) { + EXPECT_NEAR(orig_scale_diff.cpu_diff()[i], + this->blob_bottom_broadcast_1_->cpu_diff()[i], 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(1); + scale_param->set_num_axes(2); + scale_param->mutable_filler()->set_type("gaussian"); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + layer->blobs()[0]->data_at(c, h, 0, 0), 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastMiddleWithParamAndBias) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(1); + scale_param->set_num_axes(2); + scale_param->mutable_filler()->set_type("gaussian"); + scale_param->set_bias_term(true); + scale_param->mutable_bias_filler()->set_type("gaussian"); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + layer->blobs()[0]->data_at(c, h, 0, 0) + + layer->blobs()[1]->data_at(c, h, 0, 0), 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardBroadcastEnd) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(2); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_top_->data_at(n, c, h, w), + this->blob_bottom_->data_at(n, c, h, w) * + this->blob_bottom_broadcast_2_->data_at(h, w, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardScale) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data = this->blob_bottom_->cpu_data(); + const Dtype scale = *this->blob_bottom_scale_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data[i] * scale, 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestForwardScaleAxis2) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(2); + shared_ptr > layer(new ScaleLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + ASSERT_EQ(this->blob_bottom_->shape(), this->blob_top_->shape()); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_top_->cpu_data(); + const int count = this->blob_top_->count(); + const Dtype* in_data = this->blob_bottom_->cpu_data(); + const Dtype scale = *this->blob_bottom_scale_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data[i] * scale, 1e-5); + } +} + +TYPED_TEST(ScaleLayerTest, TestGradientEltwise) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientEltwise(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientEltwiseWithParam) { + typedef typename TypeParam::Dtype Dtype; + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(0); + scale_param->set_num_axes(-1); + scale_param->mutable_filler()->set_type("gaussian"); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientBroadcastBegin) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_0_); + LayerParameter layer_param; + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientBroadcastMiddle) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(1); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientBroadcastMiddleWithParam) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_axis(1); + scale_param->set_num_axes(2); + scale_param->mutable_filler()->set_type("gaussian"); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientBroadcastEnd) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_2_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(2); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientScale) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientScaleAndBias) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + ScaleParameter* scale_param = layer_param.mutable_scale_param(); + scale_param->set_bias_term(true); + scale_param->mutable_bias_filler()->set_type("gaussian"); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +TYPED_TEST(ScaleLayerTest, TestGradientScaleAxis2) { + typedef typename TypeParam::Dtype Dtype; + this->blob_bottom_vec_.push_back(this->blob_bottom_scale_); + LayerParameter layer_param; + layer_param.mutable_scale_param()->set_axis(2); + ScaleLayer layer(layer_param); + GradientChecker checker(1e-2, 1e-3); + checker.CheckGradientExhaustive(&layer, this->blob_bottom_vec_, + this->blob_top_vec_); +} + +} // namespace caffe