diff --git a/include/caffe/layers/scalar_layer.hpp b/include/caffe/layers/scalar_layer.hpp index 59882e4d5f6..f679622dde4 100644 --- a/include/caffe/layers/scalar_layer.hpp +++ b/include/caffe/layers/scalar_layer.hpp @@ -65,6 +65,7 @@ class ScalarLayer: public Layer { Blob sum_multiplier_; Blob sum_result_; + Blob temp_; int axis_; int outer_dim_, scalar_dim_, inner_dim_; }; diff --git a/src/caffe/layers/scalar_layer.cpp b/src/caffe/layers/scalar_layer.cpp index 67988fce6df..0fa489ae976 100644 --- a/src/caffe/layers/scalar_layer.cpp +++ b/src/caffe/layers/scalar_layer.cpp @@ -44,12 +44,6 @@ void ScalarLayer::LayerSetUp(const vector*>& bottom, template void ScalarLayer::Reshape(const vector*>& bottom, const vector*>& top) { - // TODO: make ScalarLayer usable in-place. - // Currently, in-place computation is broken during Backward with - // propagate_down[0] && propagate_down[1], as bottom[0]'s diff is used for - // temporary storage of an intermediate result, overwriting top[0]'s diff - // if using in-place computation. - CHECK_NE(bottom[0], top[0]) << "ScalarLayer cannot be used in-place"; const ScalarParameter& param = this->layer_param_.scalar_param(); Blob* scalar = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get(); // Always set axis_ == 0 in special case where scalar is an actual scalar @@ -71,7 +65,11 @@ void ScalarLayer::Reshape(const vector*>& bottom, outer_dim_ = bottom[0]->count(0, axis_); scalar_dim_ = scalar->count(); inner_dim_ = bottom[0]->count(axis_ + scalar->num_axes()); - top[0]->ReshapeLike(*bottom[0]); + if (bottom[0] == top[0]) { // in-place computation + temp_.ReshapeLike(*bottom[0]); + } else { + top[0]->ReshapeLike(*bottom[0]); + } sum_result_.Reshape(vector(1, outer_dim_ * scalar_dim_)); const int sum_mult_size = std::max(outer_dim_, inner_dim_); sum_multiplier_.Reshape(vector(1, sum_mult_size)); @@ -84,6 +82,14 @@ template void ScalarLayer::Forward_cpu( const vector*>& bottom, const vector*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); + if (bottom[0] == top[0]) { + // In-place computation; need to store bottom data before overwriting it. + // Note that this is only necessary for Backward; we could skip this if not + // doing Backward, but Caffe currently provides no way of knowing whether + // we'll need to do Backward at the time of the Forward call. + caffe_copy(bottom[0]->count(), bottom[0]->cpu_data(), + temp_.mutable_cpu_data()); + } const Dtype* scalar_data = ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); @@ -105,12 +111,16 @@ void ScalarLayer::Backward_cpu(const vector*>& top, if ((!scalar_param && propagate_down[1]) || (scalar_param && this->param_propagate_down_[0])) { const Dtype* top_diff = top[0]->cpu_diff(); - const Dtype* bottom_data = bottom[0]->cpu_data(); + const bool in_place = (bottom[0] == top[0]); + const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->cpu_data(); // Hack: store big eltwise product in bottom[0] diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scalar diff, and we're done. + // If we're computing in-place (and not doing eltwise computation), this + // hack doesn't work and we store the product in temp_. const bool is_eltwise = (bottom[0]->count() == scalar->count()); - Dtype* product = (is_eltwise ? scalar : bottom[0])->mutable_cpu_diff(); + Dtype* product = (is_eltwise ? scalar->mutable_cpu_diff() : + (in_place ? temp_.mutable_cpu_data() : bottom[0]->mutable_cpu_diff())); caffe_mul(top[0]->count(), top_diff, bottom_data, product); if (!is_eltwise) { Dtype* sum_result = NULL; diff --git a/src/caffe/layers/scalar_layer.cu b/src/caffe/layers/scalar_layer.cu index b1af488d769..9c6932723af 100644 --- a/src/caffe/layers/scalar_layer.cu +++ b/src/caffe/layers/scalar_layer.cu @@ -21,6 +21,14 @@ void ScalarLayer::Forward_gpu( const vector*>& bottom, const vector*>& top) { const int count = top[0]->count(); const Dtype* bottom_data = bottom[0]->gpu_data(); + if (bottom[0] == top[0]) { + // in-place computation; need to store bottom data before overwriting it. + // Note that this is only necessary for Backward; we could skip this if not + // doing Backward, but Caffe currently provides no way of knowing whether + // we'll need to do Backward at the time of the Forward call. + caffe_copy(bottom[0]->count(), bottom[0]->gpu_data(), + temp_.mutable_gpu_data()); + } const Dtype* scalar_data = ((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->gpu_data(); Dtype* top_data = top[0]->mutable_gpu_data(); @@ -37,12 +45,16 @@ void ScalarLayer::Backward_gpu(const vector*>& top, if ((!scalar_param && propagate_down[1]) || (scalar_param && this->param_propagate_down_[0])) { const Dtype* top_diff = top[0]->gpu_diff(); - const Dtype* bottom_data = bottom[0]->gpu_data(); + const bool in_place = (bottom[0] == top[0]); + const Dtype* bottom_data = (in_place ? &temp_ : bottom[0])->gpu_data(); // Hack: store big eltwise product in bottom[0] diff, except in the special // case where this layer itself does the eltwise product, in which case we // can store it directly in the scalar diff, and we're done. + // If we're computing in-place (and not doing eltwise computation), this + // hack doesn't work and we store the product in temp_. const bool is_eltwise = (bottom[0]->count() == scalar->count()); - Dtype* product = (is_eltwise ? scalar : bottom[0])->mutable_gpu_diff(); + Dtype* product = (is_eltwise ? scalar->mutable_gpu_diff() : + (in_place ? temp_.mutable_gpu_data() : bottom[0]->mutable_gpu_diff())); caffe_gpu_mul(top[0]->count(), top_diff, bottom_data, product); if (!is_eltwise) { Dtype* sum_result = NULL; diff --git a/src/caffe/test/test_scalar_layer.cpp b/src/caffe/test/test_scalar_layer.cpp index caba89a0d81..399d54a395e 100644 --- a/src/caffe/test/test_scalar_layer.cpp +++ b/src/caffe/test/test_scalar_layer.cpp @@ -86,6 +86,70 @@ TYPED_TEST(ScalarLayerTest, TestForwardEltwise) { } } +TYPED_TEST(ScalarLayerTest, TestForwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + shared_ptr > layer(new ScalarLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + const Dtype* data = this->blob_bottom_->cpu_data(); + const int count = this->blob_bottom_->count(); + const Dtype* in_data_a = orig_bottom.cpu_data(); + const Dtype* in_data_b = this->blob_bottom_eltwise_->cpu_data(); + for (int i = 0; i < count; ++i) { + EXPECT_NEAR(data[i], in_data_a[i] * in_data_b[i], 1e-5); + } +} + +TYPED_TEST(ScalarLayerTest, TestBackwardEltwiseInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_eltwise_); + LayerParameter layer_param; + shared_ptr > layer(new ScalarLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_scalar_diff; + orig_scalar_diff.CopyFrom(*this->blob_bottom_eltwise_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_eltwise_->count(); ++i) { + EXPECT_NEAR(orig_scalar_diff.cpu_diff()[i], + this->blob_bottom_eltwise_->cpu_diff()[i], 1e-5); + } +} + TYPED_TEST(ScalarLayerTest, TestForwardEltwiseWithParam) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param; @@ -151,6 +215,77 @@ TYPED_TEST(ScalarLayerTest, TestForwardBroadcastMiddle) { } } +TYPED_TEST(ScalarLayerTest, TestForwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scalar_param()->set_axis(1); + shared_ptr > layer(new ScalarLayer(layer_param)); + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + for (int n = 0; n < this->blob_bottom_->num(); ++n) { + for (int c = 0; c < this->blob_bottom_->channels(); ++c) { + for (int h = 0; h < this->blob_bottom_->height(); ++h) { + for (int w = 0; w < this->blob_bottom_->width(); ++w) { + EXPECT_NEAR(this->blob_bottom_->data_at(n, c, h, w), + orig_bottom.data_at(n, c, h, w) * + this->blob_bottom_broadcast_1_->data_at(c, h, 0, 0), + 1e-5); + } + } + } + } +} + +TYPED_TEST(ScalarLayerTest, TestBackwardBroadcastMiddleInPlace) { + typedef typename TypeParam::Dtype Dtype; + Blob orig_bottom(this->blob_bottom_->shape()); + orig_bottom.CopyFrom(*this->blob_bottom_); + this->blob_bottom_vec_.push_back(this->blob_bottom_broadcast_1_); + LayerParameter layer_param; + layer_param.mutable_scalar_param()->set_axis(1); + shared_ptr > layer(new ScalarLayer(layer_param)); + Blob top_diff(this->blob_bottom_->shape()); + FillerParameter filler_param; + filler_param.set_type("gaussian"); + filler_param.set_std(1); + GaussianFiller filler(filler_param); + filler.Fill(&top_diff); + vector propagate_down(2, true); + // Run forward + backward without in-place computation; + // save resulting bottom diffs. + layer->SetUp(this->blob_bottom_vec_, this->blob_top_vec_); + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_top_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + const bool kReshape = true; + const bool kCopyDiff = true; + Blob orig_bottom_diff; + orig_bottom_diff.CopyFrom(*this->blob_bottom_, kCopyDiff, kReshape); + Blob orig_scalar_diff; + orig_scalar_diff.CopyFrom(*this->blob_bottom_broadcast_1_, + kCopyDiff, kReshape); + // Rerun forward + backward with in-place computation; + // check that resulting bottom diffs are the same. + this->blob_top_vec_[0] = this->blob_bottom_; // in-place computation + layer->Forward(this->blob_bottom_vec_, this->blob_top_vec_); + caffe_copy(top_diff.count(), top_diff.cpu_data(), + this->blob_bottom_->mutable_cpu_diff()); + layer->Backward(this->blob_top_vec_, propagate_down, this->blob_bottom_vec_); + for (int i = 0; i < this->blob_bottom_->count(); ++i) { + EXPECT_NEAR(orig_bottom_diff.cpu_diff()[i], + this->blob_bottom_->cpu_diff()[i], 1e-5); + } + for (int i = 0; i < this->blob_bottom_broadcast_1_->count(); ++i) { + EXPECT_NEAR(orig_scalar_diff.cpu_diff()[i], + this->blob_bottom_broadcast_1_->cpu_diff()[i], 1e-5); + } +} + TYPED_TEST(ScalarLayerTest, TestForwardBroadcastMiddleWithParam) { typedef typename TypeParam::Dtype Dtype; LayerParameter layer_param;