Skip to content

Commit

Permalink
Separation and generalization of ChannelwiseAffineLayer into BiasLayer
Browse files Browse the repository at this point in the history
and ScaleLayer.  The behavior of ChannelwiseAffineLayer can be
reproduced by a ScaleLayer with `scale_param { bias_term: true }`.

BiasLayer and ScaleLayer each take 1 or 2 bottoms, with the output having
the same shape as the first.  The second input -- either another  bottom or a
learned parameter -- will have its axes (virtually) broadcast and tiled to have
the same shape as the first, after which elementwise addition (Bias) or
multiplication (Scale) is performed.
  • Loading branch information
jeffdonahue committed Jan 23, 2016
1 parent ec04197 commit 8a67137
Show file tree
Hide file tree
Showing 13 changed files with 1,702 additions and 551 deletions.
54 changes: 54 additions & 0 deletions include/caffe/layers/bias_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#ifndef CAFFE_BIAS_LAYER_HPP_
#define CAFFE_BIAS_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

namespace caffe {

/**
* @brief Computes a sum of two input Blobs, with the shape of the
* latter Blob "broadcast" to match the shape of the former.
* Equivalent to tiling the latter Blob, then computing the elementwise
* sum.
*
* The second input may be omitted, in which case it's learned as a parameter
* of the layer.
*/
template <typename Dtype>
class BiasLayer : public Layer<Dtype> {
public:
explicit BiasLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Bias"; }
virtual inline int MinBottomBlobs() const { return 1; }
virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }

virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

private:
Blob<Dtype> bias_multiplier_;
int outer_dim_, bias_dim_, inner_dim_, dim_;
};



} // namespace caffe

#endif // CAFFE_BIAS_LAYER_HPP_
103 changes: 0 additions & 103 deletions include/caffe/layers/channelwise_affine_layer.hpp

This file was deleted.

83 changes: 83 additions & 0 deletions include/caffe/layers/scale_layer.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#ifndef CAFFE_SCALE_LAYER_HPP_
#define CAFFE_SCALE_LAYER_HPP_

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"

#include "caffe/layers/bias_layer.hpp"

namespace caffe {

/**
* @brief Computes a product of two input Blobs, with the shape of the
* latter Blob "broadcast" to match the shape of the former.
* Equivalent to tiling the latter Blob, then computing the elementwise
* product.
*
* The second input may be omitted, in which case it's learned as a parameter
* of the layer.
*/
template <typename Dtype>
class ScaleLayer: public Layer<Dtype> {
public:
explicit ScaleLayer(const LayerParameter& param)
: Layer<Dtype>(param) {}
virtual void LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);

virtual inline const char* type() const { return "Scale"; }
// Scale
virtual inline int MinBottomBlobs() const { return 1; }
virtual inline int MaxBottomBlobs() const { return 2; }
virtual inline int ExactNumTopBlobs() const { return 1; }

protected:
/**
* In the below shape specifications, @f$ i @f$ denotes the value of the
* `axis` field given by `this->layer_param_.scale_param().axis()`, after
* canonicalization (i.e., conversion from negative to positive index,
* if applicable).
*
* @param bottom input Blob vector (length 2)
* -# @f$ (d_0 \times ... \times
* d_i \times ... \times d_j \times ... \times d_n) @f$
* the first factor @f$ x @f$
* -# @f$ (d_i \times ... \times d_j) @f$
* the second factor @f$ y @f$
* @param top output Blob vector (length 1)
* -# @f$ (d_0 \times ... \times
* d_i \times ... \times d_j \times ... \times d_n) @f$
* the product @f$ z = x y @f$ computed after "broadcasting" y.
* Equivalent to tiling @f$ y @f$ to have the same shape as @f$ x @f$,
* then computing the elementwise product.
*/
virtual void Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top);
virtual void Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);
virtual void Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom);

shared_ptr<Layer<Dtype> > bias_layer_;
vector<Blob<Dtype>*> bias_bottom_vec_;
vector<bool> bias_propagate_down_;
int bias_param_id_;

Blob<Dtype> sum_multiplier_;
Blob<Dtype> sum_result_;
Blob<Dtype> temp_;
int axis_;
int outer_dim_, scale_dim_, inner_dim_;
};


} // namespace caffe

#endif // CAFFE_SCALE_LAYER_HPP_
121 changes: 121 additions & 0 deletions src/caffe/layers/bias_layer.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#include <vector>

#include "caffe/filler.hpp"
#include "caffe/layers/bias_layer.hpp"
#include "caffe/util/math_functions.hpp"

namespace caffe {

template <typename Dtype>
void BiasLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
if (bottom.size() == 1 && this->blobs_.size() > 0) {
LOG(INFO) << "Skipping parameter initialization";
} else if (bottom.size() == 1) {
// bias is a learned parameter; initialize it
const BiasParameter& param = this->layer_param_.bias_param();
const int axis = bottom[0]->CanonicalAxisIndex(param.axis());
const int num_axes = param.num_axes();
CHECK_GE(num_axes, -1) << "num_axes must be non-negative, "
<< "or -1 to extend to the end of bottom[0]";
if (num_axes >= 0) {
CHECK_GE(bottom[0]->num_axes(), axis + num_axes)
<< "bias blob's shape extends past bottom[0]'s shape when applied "
<< "starting with bottom[0] axis = " << axis;
}
this->blobs_.resize(1);
const vector<int>::const_iterator& shape_start =
bottom[0]->shape().begin() + axis;
const vector<int>::const_iterator& shape_end =
(num_axes == -1) ? bottom[0]->shape().end() : (shape_start + num_axes);
vector<int> bias_shape(shape_start, shape_end);
this->blobs_[0].reset(new Blob<Dtype>(bias_shape));
shared_ptr<Filler<Dtype> > filler(GetFiller<Dtype>(param.filler()));
filler->Fill(this->blobs_[0].get());
}
this->param_propagate_down_.resize(this->blobs_.size(), true);
}

template <typename Dtype>
void BiasLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const BiasParameter& param = this->layer_param_.bias_param();
Blob<Dtype>* bias = (bottom.size() > 1) ? bottom[1] : this->blobs_[0].get();
// Always set axis == 0 in special case where bias is a scalar
// (num_axes == 0). Mathematically equivalent for any choice of axis, so the
// actual setting can be safely ignored; and computation is most efficient
// with axis == 0 and (therefore) outer_dim_ == 1.
const int axis = (bias->num_axes() == 0) ?
0 : bottom[0]->CanonicalAxisIndex(param.axis());
CHECK_GE(bottom[0]->num_axes(), axis + bias->num_axes())
<< "bias blob's shape extends past bottom[0]'s shape when applied "
<< "starting with bottom[0] axis = " << axis;
for (int i = 0; i < bias->num_axes(); ++i) {
CHECK_EQ(bottom[0]->shape(axis + i), bias->shape(i))
<< "dimension mismatch between bottom[0]->shape(" << axis + i
<< ") and bias->shape(" << i << ")";
}
outer_dim_ = bottom[0]->count(0, axis);
bias_dim_ = bias->count();
inner_dim_ = bottom[0]->count(axis + bias->num_axes());
dim_ = bias_dim_ * inner_dim_;
if (bottom[0] != top[0]) {
top[0]->ReshapeLike(*bottom[0]);
}
bias_multiplier_.Reshape(vector<int>(1, inner_dim_));
if (bias_multiplier_.cpu_data()[inner_dim_ - 1] != Dtype(1)) {
caffe_set(inner_dim_, Dtype(1), bias_multiplier_.mutable_cpu_data());
}
}

template <typename Dtype>
void BiasLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bias_data =
((bottom.size() > 1) ? bottom[1] : this->blobs_[0].get())->cpu_data();
Dtype* top_data = top[0]->mutable_cpu_data();
if (bottom[0] != top[0]) {
const Dtype* bottom_data = bottom[0]->cpu_data();
caffe_copy(bottom[0]->count(), bottom_data, top_data);
}
for (int n = 0; n < outer_dim_; ++n) {
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, bias_dim_,
inner_dim_, Dtype(1), Dtype(1), bias_data,
bias_multiplier_.cpu_data(), Dtype(1), top_data);
top_data += dim_;
}
}

template <typename Dtype>
void BiasLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0] && bottom[0] != top[0]) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
caffe_copy(bottom[0]->count(), top_diff, bottom_diff);
}
// in-place, we don't need to do anything with the data diff
const bool bias_param = (bottom.size() == 1);
if ((!bias_param && propagate_down[1]) ||
(bias_param && this->param_propagate_down_[0])) {
const Dtype* top_diff = top[0]->cpu_diff();
Dtype* bias_diff = (bias_param ? this->blobs_[0].get() : bottom[1])
->mutable_cpu_diff();
bool accum = bias_param;
for (int n = 0; n < outer_dim_; ++n) {
caffe_cpu_gemv(CblasNoTrans, bias_dim_, inner_dim_, Dtype(1),
top_diff, bias_multiplier_.cpu_data(), Dtype(accum), bias_diff);
top_diff += dim_;
accum = true;
}
}
}

#ifdef CPU_ONLY
STUB_GPU(BiasLayer);
#endif

INSTANTIATE_CLASS(BiasLayer);
REGISTER_LAYER_CLASS(Bias);

} // namespace caffe
Loading

0 comments on commit 8a67137

Please sign in to comment.