未验证 提交 52f7e773 编写于 作者: A Aurelius84 提交者: GitHub

[Cherry-Pick] Split Macros and Add modeling unittest (#31266)

* [CustomOp] Add Modeling with Custom op unittest (#31218)

* add unittest for static/dygraph/dy2stat

* add PE unittet

* remove usless code

* add unittest in CMakeList.txt

* [CustomOp] Split build op marco & polish details (#31229)

* split build op marco & polish details

* revert register api del

* fix other unittest

* [CustomOP]Support Incremental compilation and Add Version management (#31228)

* Support Incremental compilation and Add Version management

* replace hash with hashlib

* fix test_op_num unittest

* Revert "fix test_op_num unittest"

This reverts commit 2f78de976e1d7ca60915b2310717b38a32ae204a.
Co-authored-by: NChen Weihang <chenweihang@baidu.com>
上级 536d9a3b
...@@ -38,6 +38,8 @@ class PD_DLL_DECL OpMetaInfoHelper; ...@@ -38,6 +38,8 @@ class PD_DLL_DECL OpMetaInfoHelper;
using Tensor = paddle::Tensor; using Tensor = paddle::Tensor;
///////////////// Util Marco Define ////////////////
#define PD_DISABLE_COPY_AND_ASSIGN(classname) \ #define PD_DISABLE_COPY_AND_ASSIGN(classname) \
private: \ private: \
classname(const classname&) = delete; \ classname(const classname&) = delete; \
...@@ -65,6 +67,12 @@ using Tensor = paddle::Tensor; ...@@ -65,6 +67,12 @@ using Tensor = paddle::Tensor;
END_HANDLE_THE_ERROR \ END_HANDLE_THE_ERROR \
} while (0) } while (0)
#define STATIC_ASSERT_GLOBAL_NAMESPACE(uniq_name, msg) \
struct __test_global_namespace_##uniq_name##__ {}; \
static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \
__test_global_namespace_##uniq_name##__>::value, \
msg)
///////////////// Util Define and Function //////////////// ///////////////// Util Define and Function ////////////////
inline std::string Grad(const std::string& var_name) { inline std::string Grad(const std::string& var_name) {
...@@ -288,9 +296,9 @@ class PD_DLL_DECL OpMetaInfo { ...@@ -288,9 +296,9 @@ class PD_DLL_DECL OpMetaInfo {
std::vector<std::string> attrs_; std::vector<std::string> attrs_;
// 2. func info // 2. func info
KernelFunc kernel_fn_; KernelFunc kernel_fn_{nullptr};
InferShapeFunc infer_shape_fn_; InferShapeFunc infer_shape_fn_{nullptr};
InferDtypeFunc infer_dtype_fn_; InferDtypeFunc infer_dtype_fn_{nullptr};
}; };
//////////////// Op Meta Info Map ///////////////// //////////////// Op Meta Info Map /////////////////
...@@ -321,20 +329,22 @@ class PD_DLL_DECL OpMetaInfoMap { ...@@ -321,20 +329,22 @@ class PD_DLL_DECL OpMetaInfoMap {
class PD_DLL_DECL OpMetaInfoBuilder { class PD_DLL_DECL OpMetaInfoBuilder {
public: public:
explicit OpMetaInfoBuilder(std::string&& name); explicit OpMetaInfoBuilder(std::string&& name, size_t index);
OpMetaInfoBuilder& Inputs(std::vector<std::string>&& inputs); OpMetaInfoBuilder& Inputs(std::vector<std::string>&& inputs);
OpMetaInfoBuilder& Outputs(std::vector<std::string>&& outputs); OpMetaInfoBuilder& Outputs(std::vector<std::string>&& outputs);
OpMetaInfoBuilder& Attrs(std::vector<std::string>&& attrs); OpMetaInfoBuilder& Attrs(std::vector<std::string>&& attrs);
OpMetaInfoBuilder& SetKernelFn(KernelFunc func); OpMetaInfoBuilder& SetKernelFn(KernelFunc func);
OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc func); OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc func);
OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func); OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func);
OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name);
private: private:
// Forward Op name // Forward Op name
std::string name_; std::string name_;
// Point to the currently constructed op meta info // ref current info ptr
OpMetaInfo* info_ptr_; OpMetaInfo* info_ptr_;
// The current op meta info index in vector
// - 0: op, 1: grad_op, 2: grad_grad_op
size_t index_;
}; };
/////////////////////// Op register API ///////////////////////// /////////////////////// Op register API /////////////////////////
...@@ -350,14 +360,25 @@ void LoadCustomOperatorLib(const std::string& dso_name); ...@@ -350,14 +360,25 @@ void LoadCustomOperatorLib(const std::string& dso_name);
/////////////////////// Op register Macro ///////////////////////// /////////////////////// Op register Macro /////////////////////////
#define PD_BUILD_OP_WITH_COUNTER(op_name, counter) \ #define PD_BUILD_OP(op_name) \
static ::paddle::OpMetaInfoBuilder __op_meta_info_##counter##__ = \ STATIC_ASSERT_GLOBAL_NAMESPACE( \
::paddle::OpMetaInfoBuilder(op_name) __reg_op__##op_name, "PD_BUILD_OP must be called in global namespace."); \
static ::paddle::OpMetaInfoBuilder __op_meta_info_##op_name##__ = \
#define PD_BUILD_OP_INNER(op_name, counter) \ ::paddle::OpMetaInfoBuilder(#op_name, 0)
PD_BUILD_OP_WITH_COUNTER(op_name, counter)
#define PD_BUILD_GRAD_OP(op_name) \
#define PD_BUILD_OP(op_name) PD_BUILD_OP_INNER(op_name, __COUNTER__) STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_grad_op__##op_name, \
"PD_BUILD_GRAD_OP must be called in global namespace."); \
static ::paddle::OpMetaInfoBuilder __grad_op_meta_info_##op_name##__ = \
::paddle::OpMetaInfoBuilder(#op_name, 1)
#define PD_BUILD_DOUBLE_GRAD_OP(op_name) \
STATIC_ASSERT_GLOBAL_NAMESPACE( \
__reg_grad_grad_op__##op_name, \
"PD_BUILD_DOUBLE_GRAD_OP must be called in global namespace."); \
static ::paddle::OpMetaInfoBuilder __grad_grad_op_meta_info_##op_name##__ = \
::paddle::OpMetaInfoBuilder(#op_name, 2)
} // namespace paddle } // namespace paddle
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/custom_operator.h"
#include "paddle/fluid/platform/enforce.h"
namespace paddle { namespace paddle {
...@@ -62,11 +63,38 @@ OpMetaInfoMap::GetMap() const { ...@@ -62,11 +63,38 @@ OpMetaInfoMap::GetMap() const {
//////////////// Op Meta Info Builder ///////////////// //////////////// Op Meta Info Builder /////////////////
OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name) { OpMetaInfoBuilder::OpMetaInfoBuilder(std::string&& name, size_t index) {
// 1. member assign
name_ = std::forward<std::string>(name); name_ = std::forward<std::string>(name);
index_ = index;
// 2. check and meta info build
auto& info_vector = OpMetaInfoMap::Instance()[name_]; auto& info_vector = OpMetaInfoMap::Instance()[name_];
// index check
PADDLE_ENFORCE_EQ(
info_vector.size(), index_,
platform::errors::PreconditionNotMet(
"The operator %s's meta info register failed. "
"Please make sure you call marcos as order `PD_BUILD_OP`, "
"`PD_BUILD_GRAD_OP`, `PD_BUILD_DOUBLE_GRAD_OP`.",
name_));
switch (index_) {
case 0:
break;
case 1:
name_ = name_ + "_grad";
break;
case 2:
name_ = name_ + "_grad_grad";
default:
PADDLE_THROW(platform::errors::InvalidArgument(
"Not support index `%d` when construct OpMetaInfoBuilder, "
"now only support `0, 1, 2`.",
index_));
}
auto op_meta = OpMetaInfo(name_); auto op_meta = OpMetaInfo(name_);
info_vector.emplace_back(std::move(op_meta)); info_vector.emplace_back(std::move(op_meta));
// 3. get current info ptr
info_ptr_ = &(info_vector.back()); info_ptr_ = &(info_vector.back());
} }
...@@ -93,24 +121,27 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc func) { ...@@ -93,24 +121,27 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc func) {
} }
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) { OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) {
PADDLE_ENFORCE_EQ(
index_, 0UL,
platform::errors::Unimplemented(
"Currently, the InferShapeFn setting of Grad Op is not supported, "
"And backward Tensor `X@GRAD` will use the shape of forward Tensor "
"`X` by default."));
info_ptr_->SetInferShapeFn(std::forward<InferShapeFunc>(func)); info_ptr_->SetInferShapeFn(std::forward<InferShapeFunc>(func));
return *this; return *this;
} }
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) { OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) {
PADDLE_ENFORCE_EQ(
index_, 0UL,
platform::errors::Unimplemented(
"Currently, the InferDtypeFn setting of Grad Op is not supported, "
"And backward Tensor `X@GRAD` will use the dtype of forward Tensor "
"`X` by default."));
info_ptr_->SetInferDtypeFn(std::forward<InferDtypeFunc>(func)); info_ptr_->SetInferDtypeFn(std::forward<InferDtypeFunc>(func));
return *this; return *this;
} }
OpMetaInfoBuilder& OpMetaInfoBuilder::SetBackwardOp(
const std::string& bwd_op_name) {
auto& info_vector = OpMetaInfoMap::Instance()[name_];
auto op_meta = OpMetaInfo(bwd_op_name);
info_vector.emplace_back(std::move(op_meta));
info_ptr_ = &(info_vector.back());
return *this;
}
/////////////////////// Op register API ///////////////////////// /////////////////////// Op register API /////////////////////////
void RegisterAllCustomOperator() { void RegisterAllCustomOperator() {
......
...@@ -153,12 +153,21 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx, ...@@ -153,12 +153,21 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
} }
VLOG(1) << "Run ComputeFunc."; VLOG(1) << "Run ComputeFunc.";
auto outs = func(custom_ins, custom_attrs); try {
auto outs = func(custom_ins, custom_attrs);
VLOG(1) << "Custom Operator: Share outputs into ExecutionContext."; VLOG(1) << "Custom Operator: Share outputs into ExecutionContext.";
for (size_t i = 0; i < outputs.size(); ++i) { for (size_t i = 0; i < outputs.size(); ++i) {
auto* true_out = ctx.Output<Tensor>(outputs[i]); auto* true_out = ctx.Output<Tensor>(outputs[i]);
CustomTensorUtils::ShareDataTo(outs.at(i), true_out); CustomTensorUtils::ShareDataTo(outs.at(i), true_out);
}
} catch (platform::EnforceNotMet& exception) {
throw std::move(exception);
} catch (std::exception& ex) {
PADDLE_THROW(platform::errors::External("%s", ex.what()));
} catch (...) {
PADDLE_THROW(platform::errors::Fatal(
"Custom operator raises an unknown exception in rumtime."));
} }
} }
...@@ -475,58 +484,108 @@ void RegisterOperatorWithMetaInfo( ...@@ -475,58 +484,108 @@ void RegisterOperatorWithMetaInfo(
op_name, info.proto_->InitializationErrorString())); op_name, info.proto_->InitializationErrorString()));
// InferShape // InferShape
PADDLE_ENFORCE_NOT_NULL( if (infer_shape_func == nullptr) {
infer_shape_func, // use default InferShape
platform::errors::PreconditionNotMet( info.infer_shape_ = [op_inputs, op_outputs](InferShapeContext* ctx) {
"InferShapeFn is nullptr. Need to set the InferShapeFn of custom " PADDLE_ENFORCE_EQ(
"operator by .SetInferShapeFn(PD_INFER_SHAPE(...))")); op_inputs.size(), 1UL,
info.infer_shape_ = [op_inputs, op_outputs, platform::errors::Unavailable(
infer_shape_func](InferShapeContext* ctx) { "Your custom operator contains multiple inputs. "
std::vector<std::vector<int64_t>> input_shapes; "We only allow a custom operator that contains only one input "
"and "
VLOG(1) << "Custom Operator: InferShape - get input ddim."; "only one output without setting the InferShapeFn. At this time, "
for (auto& in_name : op_inputs) { "the input shape will be directly set to the output shape.\n"
OP_INOUT_CHECK(ctx->HasInput(in_name), "Input", in_name, "Custom"); "Please set the InferShapeFn of custom "
auto ddim = ctx->GetInputDim(in_name); "operator by .SetInferShapeFn(PD_INFER_SHAPE(...))"));
input_shapes.emplace_back(framework::vectorize(ddim)); PADDLE_ENFORCE_EQ(
} op_outputs.size(), 1UL,
platform::errors::Unavailable(
"Your custom operator contains multiple outputs. "
"We only allow a custom operator that contains only one input "
"and "
"only one output without setting the InferShapeFn. At this time, "
"the input shape will be directly set to the output shape.\n"
"Please set the InferShapeFn of custom "
"operator by .SetInferShapeFn(PD_INFER_SHAPE(...))"));
VLOG(1) << "Custom Operator: Default InferShape - share ddim.";
ctx->ShareDim(op_inputs[0], op_outputs[0]);
};
} else {
info.infer_shape_ = [op_inputs, op_outputs,
infer_shape_func](InferShapeContext* ctx) {
std::vector<std::vector<int64_t>> input_shapes;
VLOG(1) << "Custom Operator: InferShape - get input ddim.";
for (auto& in_name : op_inputs) {
OP_INOUT_CHECK(ctx->HasInput(in_name), "Input", in_name, "Custom");
auto ddim = ctx->GetInputDim(in_name);
input_shapes.emplace_back(framework::vectorize(ddim));
}
VLOG(1) << "Custom Operator: InferShape - calc output ddim."; VLOG(1) << "Custom Operator: InferShape - calc output ddim.";
auto output_shapes = infer_shape_func(input_shapes); auto output_shapes = infer_shape_func(input_shapes);
VLOG(1) << "Custom Operator: InferShape - set output ddim."; VLOG(1) << "Custom Operator: InferShape - set output ddim.";
for (size_t i = 0; i < op_outputs.size(); ++i) { for (size_t i = 0; i < op_outputs.size(); ++i) {
ctx->SetOutputDim(op_outputs[i], framework::make_ddim(output_shapes[i])); ctx->SetOutputDim(op_outputs[i],
} framework::make_ddim(output_shapes[i]));
}; }
};
}
// Infer Dtype // Infer Dtype
PADDLE_ENFORCE_NOT_NULL( if (infer_dtype_func == nullptr) {
infer_dtype_func, // use defalut InferDtype
platform::errors::PreconditionNotMet( info.infer_var_type_ = [op_inputs, op_outputs](InferVarTypeContext* ctx) {
"InferDtypeFn is nullptr. Need to set the InferDtypeFn of custom " PADDLE_ENFORCE_EQ(
"operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))")); op_inputs.size(), 1UL,
info.infer_var_type_ = [op_inputs, op_outputs, platform::errors::Unavailable(
infer_dtype_func](InferVarTypeContext* ctx) { "Your custom operator contains multiple inputs. "
std::vector<DataType> input_dtypes; "We only allow a custom operator that contains only one input "
"and "
VLOG(1) << "Custom Operator: InferDtype - get input dtype."; "only one output without setting the InferDtypeFn. At this time, "
for (auto& in_name : op_inputs) { "the input dtype will be directly set to the output dtype.\n"
auto dtype = ctx->GetInputDataType(in_name); "Please set the InferDtypeFn of custom "
input_dtypes.emplace_back( "operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))"));
CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype)); PADDLE_ENFORCE_EQ(
} op_outputs.size(), 1UL,
platform::errors::Unavailable(
"Your custom operator contains multiple outputs. "
"We only allow a custom operator that contains only one input "
"and "
"only one output without setting the InferDtypeFn. At this time, "
"the input dtype will be directly set to the output dtype.\n"
"Please set the InferDtypeFn of custom "
"operator by .SetInferDtypeFn(PD_INFER_DTYPE(...))"));
VLOG(1) << "Custom Operator: InferDtype - share dtype.";
auto dtype = ctx->GetInputDataType(op_inputs[0]);
ctx->SetOutputDataType(op_outputs[0], dtype);
};
} else {
info.infer_var_type_ = [op_inputs, op_outputs,
infer_dtype_func](InferVarTypeContext* ctx) {
std::vector<DataType> input_dtypes;
VLOG(1) << "Custom Operator: InferDtype - get input dtype.";
for (auto& in_name : op_inputs) {
auto dtype = ctx->GetInputDataType(in_name);
input_dtypes.emplace_back(
CustomTensorUtils::ConvertInnerDTypeToEnumDType(dtype));
}
VLOG(1) << "Custom Operator: InferDtype - infer output dtype."; VLOG(1) << "Custom Operator: InferDtype - infer output dtype.";
auto output_dtypes = infer_dtype_func(input_dtypes); auto output_dtypes = infer_dtype_func(input_dtypes);
VLOG(1) << "Custom Operator: InferDtype - set output dtype."; VLOG(1) << "Custom Operator: InferDtype - set output dtype.";
for (size_t i = 0; i < op_outputs.size(); ++i) { for (size_t i = 0; i < op_outputs.size(); ++i) {
ctx->SetOutputDataType( ctx->SetOutputDataType(
op_outputs[i], op_outputs[i],
CustomTensorUtils::ConvertEnumDTypeToInnerDType(output_dtypes[i])); CustomTensorUtils::ConvertEnumDTypeToInnerDType(output_dtypes[i]));
} }
}; };
}
// Kernel func // Kernel func
RegisterOperatorKernel(op_name, kernel_fn, op_inputs, op_outputs, op_attrs); RegisterOperatorKernel(op_name, kernel_fn, op_inputs, op_outputs, op_attrs);
......
...@@ -3,10 +3,12 @@ if(WITH_GPU) ...@@ -3,10 +3,12 @@ if(WITH_GPU)
# 'test_custom_relu_op_setup/jit' compile .cc and .cu file # 'test_custom_relu_op_setup/jit' compile .cc and .cu file
py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py) py_test(test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py)
py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py) py_test(test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py)
py_test(test_custom_relu_model SRCS test_custom_relu_model.py)
# Compiling shared library will cost some time, but running process is very fast. # Compiling shared library will cost some time, but running process is very fast.
set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250) set_tests_properties(test_custom_relu_op_setup PROPERTIES TIMEOUT 250)
set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180) set_tests_properties(test_custom_relu_op_jit PROPERTIES TIMEOUT 180)
set_tests_properties(test_custom_relu_model PROPERTIES TIMEOUT 180)
endif() endif()
py_test(test_sysconfig SRCS test_sysconfig.py) py_test(test_sysconfig SRCS test_sysconfig.py)
......
...@@ -150,15 +150,7 @@ std::vector<paddle::Tensor> AttrTestBackward( ...@@ -150,15 +150,7 @@ std::vector<paddle::Tensor> AttrTestBackward(
return {grad_x}; return {grad_x};
} }
std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> x_shape) { PD_BUILD_OP(attr_test)
return {x_shape};
}
std::vector<paddle::DataType> InferDType(paddle::DataType x_dtype) {
return {x_dtype};
}
PD_BUILD_OP("attr_test")
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out"}) .Outputs({"Out"})
.Attrs({"bool_attr: bool", .Attrs({"bool_attr: bool",
...@@ -170,10 +162,9 @@ PD_BUILD_OP("attr_test") ...@@ -170,10 +162,9 @@ PD_BUILD_OP("attr_test")
"float_vec_attr: std::vector<float>", "float_vec_attr: std::vector<float>",
"int64_vec_attr: std::vector<int64_t>", "int64_vec_attr: std::vector<int64_t>",
"str_vec_attr: std::vector<std::string>"}) "str_vec_attr: std::vector<std::string>"})
.SetKernelFn(PD_KERNEL(AttrTestForward)) .SetKernelFn(PD_KERNEL(AttrTestForward));
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType)) PD_BUILD_GRAD_OP(attr_test)
.SetBackwardOp("attr_test_grad")
.Inputs({paddle::Grad("Out")}) .Inputs({paddle::Grad("Out")})
.Outputs({paddle::Grad("X")}) .Outputs({paddle::Grad("X")})
.Attrs({"int_attr: int", .Attrs({"int_attr: int",
......
...@@ -96,21 +96,12 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x, ...@@ -96,21 +96,12 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
} }
} }
std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape) { PD_BUILD_OP(custom_relu)
return {x_shape};
}
std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype) {
return {x_dtype};
}
PD_BUILD_OP("custom_relu")
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(ReluForward)) .SetKernelFn(PD_KERNEL(ReluForward));
.SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType)) PD_BUILD_GRAD_OP(custom_relu)
.SetBackwardOp("relu2_grad")
.Inputs({"X", "Out", paddle::Grad("Out")}) .Inputs({"X", "Out", paddle::Grad("Out")})
.Outputs({paddle::Grad("X")}) .Outputs({paddle::Grad("X")})
.SetKernelFn(PD_KERNEL(ReluBackward)); .SetKernelFn(PD_KERNEL(ReluBackward));
...@@ -25,19 +25,14 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x, ...@@ -25,19 +25,14 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
const paddle::Tensor& out, const paddle::Tensor& out,
const paddle::Tensor& grad_out); const paddle::Tensor& grad_out);
std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape);
std::vector<paddle::DataType> ReluInferDType(paddle::DataType x_dtype);
// Reuse codes in `custom_relu_op.cc/cu` to register another custom operator // Reuse codes in `custom_relu_op.cc/cu` to register another custom operator
// to test jointly compile multi operators at same time. // to test jointly compile multi operators at same time.
PD_BUILD_OP("custom_relu_dup") PD_BUILD_OP(custom_relu_dup)
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(ReluForward)) .SetKernelFn(PD_KERNEL(ReluForward));
.SetInferShapeFn(PD_INFER_SHAPE(ReluInferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(ReluInferDType)) PD_BUILD_GRAD_OP(custom_relu_dup)
.SetBackwardOp("relu3_grad")
.Inputs({"X", "Out", paddle::Grad("Out")}) .Inputs({"X", "Out", paddle::Grad("Out")})
.Outputs({paddle::Grad("X")}) .Outputs({paddle::Grad("X")})
.SetKernelFn(PD_KERNEL(ReluBackward)); .SetKernelFn(PD_KERNEL(ReluBackward));
...@@ -26,14 +26,6 @@ void assign_cpu_kernel(const data_t* x_data, ...@@ -26,14 +26,6 @@ void assign_cpu_kernel(const data_t* x_data,
} }
} }
std::vector<std::vector<int64_t>> InferShape(std::vector<int64_t> x_shape) {
return {x_shape};
}
std::vector<paddle::DataType> InferDType(paddle::DataType x_dtype) {
return {x_dtype};
}
std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) { std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU); auto out = paddle::Tensor(paddle::PlaceType::kCPU);
out.reshape(x.shape()); out.reshape(x.shape());
...@@ -47,12 +39,10 @@ std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) { ...@@ -47,12 +39,10 @@ std::vector<paddle::Tensor> DispatchTestInterger(const paddle::Tensor& x) {
return {out}; return {out};
} }
PD_BUILD_OP("dispatch_test_integer") PD_BUILD_OP(dispatch_test_integer)
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestInterger)) .SetKernelFn(PD_KERNEL(DispatchTestInterger));
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) { std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU); auto out = paddle::Tensor(paddle::PlaceType::kCPU);
...@@ -67,12 +57,10 @@ std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) { ...@@ -67,12 +57,10 @@ std::vector<paddle::Tensor> DispatchTestComplex(const paddle::Tensor& x) {
return {out}; return {out};
} }
PD_BUILD_OP("dispatch_test_complex") PD_BUILD_OP(dispatch_test_complex)
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestComplex)) .SetKernelFn(PD_KERNEL(DispatchTestComplex));
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
std::vector<paddle::Tensor> DispatchTestFloatAndInteger( std::vector<paddle::Tensor> DispatchTestFloatAndInteger(
const paddle::Tensor& x) { const paddle::Tensor& x) {
...@@ -88,12 +76,10 @@ std::vector<paddle::Tensor> DispatchTestFloatAndInteger( ...@@ -88,12 +76,10 @@ std::vector<paddle::Tensor> DispatchTestFloatAndInteger(
return {out}; return {out};
} }
PD_BUILD_OP("dispatch_test_float_and_integer") PD_BUILD_OP(dispatch_test_float_and_integer)
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger)) .SetKernelFn(PD_KERNEL(DispatchTestFloatAndInteger));
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
std::vector<paddle::Tensor> DispatchTestFloatAndComplex( std::vector<paddle::Tensor> DispatchTestFloatAndComplex(
const paddle::Tensor& x) { const paddle::Tensor& x) {
...@@ -109,12 +95,10 @@ std::vector<paddle::Tensor> DispatchTestFloatAndComplex( ...@@ -109,12 +95,10 @@ std::vector<paddle::Tensor> DispatchTestFloatAndComplex(
return {out}; return {out};
} }
PD_BUILD_OP("dispatch_test_float_and_complex") PD_BUILD_OP(dispatch_test_float_and_complex)
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndComplex)) .SetKernelFn(PD_KERNEL(DispatchTestFloatAndComplex));
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex( std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex(
const paddle::Tensor& x) { const paddle::Tensor& x) {
...@@ -130,9 +114,7 @@ std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex( ...@@ -130,9 +114,7 @@ std::vector<paddle::Tensor> DispatchTestFloatAndIntegerAndComplex(
return {out}; return {out};
} }
PD_BUILD_OP("dispatch_test_float_and_integer_and_complex") PD_BUILD_OP(dispatch_test_float_and_integer_and_complex)
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out"}) .Outputs({"Out"})
.SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex)) .SetKernelFn(PD_KERNEL(DispatchTestFloatAndIntegerAndComplex));
.SetInferShapeFn(PD_INFER_SHAPE(InferShape))
.SetInferDtypeFn(PD_INFER_DTYPE(InferDType));
...@@ -68,7 +68,7 @@ std::vector<paddle::DataType> InferDtype(paddle::DataType x_dtype) { ...@@ -68,7 +68,7 @@ std::vector<paddle::DataType> InferDtype(paddle::DataType x_dtype) {
return {x_dtype, paddle::DataType::FLOAT64, paddle::DataType::INT32}; return {x_dtype, paddle::DataType::FLOAT64, paddle::DataType::INT32};
} }
PD_BUILD_OP("multi_out") PD_BUILD_OP(multi_out)
.Inputs({"X"}) .Inputs({"X"})
.Outputs({"Out", "Fake_float64", "ZFake_int32"}) .Outputs({"Out", "Fake_float64", "ZFake_int32"})
.SetKernelFn(PD_KERNEL(MultiOutCPU)) .SetKernelFn(PD_KERNEL(MultiOutCPU))
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
import paddle
from paddle import nn
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_compile_args
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file = '{}\\custom_relu_for_model_jit\\custom_relu_for_model_jit.pyd'.format(
get_build_directory())
if os.name == 'nt' and os.path.isfile(file):
cmd = 'del {}'.format(file)
run_cmd(cmd, True)
# Compile and load custom op Just-In-Time.
# custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this
# source file
custom_module = load(
name='custom_relu_for_model_jit',
sources=['custom_relu_op.cc', 'custom_relu_op.cu'],
extra_include_paths=paddle_includes, # add for Coverage CI
extra_cxx_cflags=extra_compile_args, # add for Coverage CI
extra_cuda_cflags=extra_compile_args, # add for Coverage CI
verbose=True)
class Net(nn.Layer):
"""
A simple exmaple for Regression Model.
"""
def __init__(self, in_dim, out_dim, use_custom_op=False):
super(Net, self).__init__()
self.fc1 = nn.Linear(in_dim, in_dim)
self.fc2 = nn.Linear(in_dim, out_dim)
self.relu_act = custom_module.custom_relu if use_custom_op else nn.functional.relu
def forward(self, x):
out = self.fc1(x)
out = self.relu_act(out)
out = self.fc2(out)
out = self.relu_act(out)
out = paddle.mean(out, axis=-1)
return out
class TestDygraphModel(unittest.TestCase):
def setUp(self):
self.seed = 2021
self.in_dim = 10
self.out_dim = 64
self.batch_num = 10
self.batch_size = 4
self.datas = [
np.random.uniform(
size=[self.batch_size, self.in_dim]).astype('float32')
for i in range(self.batch_num)
]
self.labels = [
np.random.uniform(size=[self.batch_size, 1]).astype('float32')
for i in range(self.batch_num)
]
self.devices = ['cpu', 'gpu']
# for saving model
self.model_path_template = "infer_model/custom_relu_dygaph_model_{}.pdparams"
self.model_dy2stat_path = "infer_model/custom_relu_model_dy2sta"
# for dy2stat
self.x_spec = paddle.static.InputSpec(
shape=[None, self.in_dim], dtype='float32', name='x')
def test_train_eval(self):
for device in self.devices:
# set device
paddle.set_device(device)
# for train
origin_relu_train_out = self.train_model(use_custom_op=False)
custom_relu_train_out = self.train_model(use_custom_op=True)
custom_relu_dy2stat_train_out = self.train_model(
use_custom_op=True, dy2stat=True) # for to_static
self.assertTrue(
np.array_equal(origin_relu_train_out, custom_relu_train_out))
self.assertTrue(
np.array_equal(origin_relu_train_out,
custom_relu_dy2stat_train_out))
# for eval
origin_relu_eval_out = self.eval_model(use_custom_op=False)
custom_relu_eval_out = self.eval_model(use_custom_op=True)
custom_relu_dy2stat_eval_out = self.eval_model(
use_custom_op=True, dy2stat=True) # for to_static
self.assertTrue(
np.array_equal(origin_relu_eval_out, custom_relu_eval_out))
self.assertTrue(
np.array_equal(origin_relu_eval_out,
custom_relu_dy2stat_eval_out))
def train_model(self, use_custom_op=False, dy2stat=False):
# reset random seed
paddle.seed(self.seed)
np.random.seed(self.seed)
# paddle.framework.random._manual_program_seed(SEED)
net = Net(self.in_dim, self.out_dim, use_custom_op)
if dy2stat:
net = paddle.jit.to_static(net, input_spec=[self.x_spec])
mse_loss = paddle.nn.MSELoss()
sgd = paddle.optimizer.SGD(learning_rate=0.1,
parameters=net.parameters())
for batch_id in range(self.batch_num):
x = paddle.to_tensor(self.datas[batch_id])
y = paddle.to_tensor(self.labels[batch_id])
out = net(x)
loss = mse_loss(out, y)
loss.backward()
sgd.minimize(loss)
net.clear_gradients()
# save inference model
net.eval()
if dy2stat:
paddle.jit.save(net, self.model_dy2stat_path)
else:
paddle.save(net.state_dict(),
self.model_path_template.format(use_custom_op))
return out.numpy()
def eval_model(self, use_custom_op=False, dy2stat=False):
net = Net(self.in_dim, self.out_dim, use_custom_op)
if dy2stat:
net = paddle.jit.load(self.model_dy2stat_path)
else:
state_dict = paddle.load(
self.model_path_template.format(use_custom_op))
net.set_state_dict(state_dict)
sample_x = paddle.to_tensor(self.datas[0])
net.eval()
out = net(sample_x)
return out.numpy()
class TestStaticModel(unittest.TestCase):
def setUp(self):
self.seed = 2021
self.in_dim = 10
self.out_dim = 64
self.batch_num = 10
self.batch_size = 8
self.datas = [
np.random.uniform(
size=[self.batch_size, self.in_dim]).astype('float32')
for i in range(self.batch_num)
]
self.labels = [
np.random.uniform(size=[self.batch_size, 1]).astype('float32')
for i in range(self.batch_num)
]
self.devices = ['cpu', 'gpu']
# for saving model
self.model_path_template = "infer_model/custom_relu_static_model_{}_{}"
paddle.enable_static()
def tearDown(self):
paddle.disable_static()
def test_train_eval(self):
for device in self.devices:
# for train
original_relu_train_out = self.train_model(
device, use_custom_op=False)
custom_relu_train_out = self.train_model(device, use_custom_op=True)
# using PE
original_relu_train_pe_out = self.train_model(
device, use_custom_op=False, use_pe=True)
custom_relu_train_pe_out = self.train_model(
device, use_custom_op=True, use_pe=True)
print(original_relu_train_out)
print(custom_relu_train_out)
print(original_relu_train_pe_out)
print(custom_relu_train_pe_out)
self.assertTrue(
np.array_equal(original_relu_train_out, custom_relu_train_out))
self.assertTrue(
np.array_equal(original_relu_train_pe_out,
custom_relu_train_pe_out))
# for eval
original_relu_eval_out = self.eval_model(
device, use_custom_op=False)
custom_relu_eval_out = self.eval_model(device, use_custom_op=True)
# using PE
original_relu_eval_pe_out = self.eval_model(
device, use_custom_op=False, use_pe=True)
custom_relu_eval_pe_out = self.eval_model(
device, use_custom_op=True, use_pe=True)
print(original_relu_eval_out)
print(custom_relu_eval_out)
print(original_relu_eval_pe_out)
print(custom_relu_eval_pe_out)
self.assertTrue(
np.array_equal(original_relu_eval_out, custom_relu_eval_out))
self.assertTrue(
np.array_equal(original_relu_eval_pe_out,
custom_relu_eval_pe_out))
def train_model(self, device, use_custom_op=False, use_pe=False):
# reset random seed
paddle.seed(self.seed)
np.random.seed(self.seed)
# set device
paddle.set_device(device)
with paddle.static.scope_guard(paddle.static.Scope()):
with paddle.static.program_guard(paddle.static.Program()):
x = paddle.static.data(
shape=[None, self.in_dim], name='x', dtype='float32')
y = paddle.static.data(
shape=[None, 1], name='y', dtype='float32')
net = Net(self.in_dim, self.out_dim, use_custom_op)
out = net(x)
loss = nn.functional.mse_loss(out, y)
sgd = paddle.optimizer.SGD(learning_rate=0.01)
sgd.minimize(loss)
exe = exe = paddle.static.Executor()
exe.run(paddle.static.default_startup_program())
# For PE
if use_pe:
places = paddle.static.cpu_places(
) if device is 'cpu' else paddle.static.cuda_places()
main_program = paddle.static.CompiledProgram(
paddle.static.default_main_program(
)).with_data_parallel(
loss_name=loss.name, places=places)
else:
main_program = paddle.static.default_main_program()
for batch_id in range(self.batch_num):
x_data = self.datas[batch_id]
y_data = self.labels[batch_id]
res = exe.run(main_program,
feed={'x': x_data,
'y': y_data},
fetch_list=[out])
# save model
paddle.static.save_inference_model(
self.model_path_template.format(use_custom_op, use_pe),
[x], [out], exe)
return res[0]
def eval_model(self, device, use_custom_op=False, use_pe=False):
paddle.set_device(device)
with paddle.static.scope_guard(paddle.static.Scope()):
with paddle.static.program_guard(paddle.static.Program()):
exe = paddle.static.Executor()
[inference_program, feed_target_names,
fetch_targets] = paddle.static.load_inference_model(
self.model_path_template.format(use_custom_op, use_pe),
exe)
x_data = self.datas[0]
results = exe.run(inference_program,
feed={feed_target_names[0]: x_data},
fetch_list=fetch_targets)
return results[0]
if __name__ == '__main__':
unittest.main()
...@@ -22,11 +22,14 @@ from setuptools.command.easy_install import easy_install ...@@ -22,11 +22,14 @@ from setuptools.command.easy_install import easy_install
from setuptools.command.build_ext import build_ext from setuptools.command.build_ext import build_ext
from distutils.command.build import build from distutils.command.build import build
from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag, bootstrap_context from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag
from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags, add_std_without_repeat, get_build_directory from .extension_utils import is_cuda_file, prepare_unix_cudaflags, prepare_win_cudaflags
from .extension_utils import _import_module_from_library, CustomOpInfo, _write_setup_file, _jit_compile, parse_op_name_from from .extension_utils import _import_module_from_library, _write_setup_file, _jit_compile
from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS, OS_NAME from .extension_utils import check_abi_compatibility, log_v, CustomOpInfo, parse_op_name_from
from .extension_utils import use_new_custom_op_load_method, MSVC_COMPILE_FLAGS from .extension_utils import use_new_custom_op_load_method, clean_object_if_change_cflags
from .extension_utils import bootstrap_context, get_build_directory, add_std_without_repeat
from .extension_utils import IS_WINDOWS, OS_NAME, MSVC_COMPILE_FLAGS, MSVC_COMPILE_FLAGS
# Note(zhouwei): On windows, it will export function 'PyInit_[name]' by default, # Note(zhouwei): On windows, it will export function 'PyInit_[name]' by default,
# The solution is: 1.User add function PyInit_[name] 2. set not to export # The solution is: 1.User add function PyInit_[name] 2. set not to export
...@@ -357,6 +360,13 @@ class BuildExtension(build_ext, object): ...@@ -357,6 +360,13 @@ class BuildExtension(build_ext, object):
def build_extensions(self): def build_extensions(self):
self._check_abi() self._check_abi()
# Note(Aurelius84): If already compiling source before, we should check whether
# cflags have changed and delete the built shared library to re-compile the source
# even though source file content keep unchanaged.
so_name = self.get_ext_fullpath(self.extensions[0].name)
clean_object_if_change_cflags(
os.path.abspath(so_name), self.extensions[0])
# Consider .cu, .cu.cc as valid source extensions. # Consider .cu, .cu.cc as valid source extensions.
self.compiler.src_extensions += ['.cu', '.cu.cc'] self.compiler.src_extensions += ['.cu', '.cu.cc']
# Save the original _compile method for later. # Save the original _compile method for later.
......
...@@ -16,7 +16,9 @@ import os ...@@ -16,7 +16,9 @@ import os
import re import re
import six import six
import sys import sys
import json
import glob import glob
import hashlib
import logging import logging
import collections import collections
import textwrap import textwrap
...@@ -219,6 +221,106 @@ class CustomOpInfo: ...@@ -219,6 +221,106 @@ class CustomOpInfo:
return next(reversed(self.op_info_map.items())) return next(reversed(self.op_info_map.items()))
VersionFields = collections.namedtuple('VersionFields', [
'sources',
'extra_compile_args',
'extra_link_args',
'library_dirs',
'runtime_library_dirs',
'include_dirs',
'define_macros',
'undef_macros',
])
class VersionManager:
def __init__(self, version_field):
self.version_field = version_field
self.version = self.hasher(version_field)
def hasher(self, version_field):
from paddle.fluid.layers.utils import flatten
md5 = hashlib.md5()
for field in version_field._fields:
elem = getattr(version_field, field)
if not elem: continue
if isinstance(elem, (list, tuple, dict)):
flat_elem = flatten(elem)
md5 = combine_hash(md5, tuple(flat_elem))
else:
raise RuntimeError(
"Support types with list, tuple and dict, but received {} with {}.".
format(type(elem), elem))
return md5.hexdigest()
@property
def details(self):
return self.version_field._asdict()
def combine_hash(md5, value):
"""
Return new hash value.
DO NOT use `hash()` beacuse it doesn't generate stable value between different process.
See https://stackoverflow.com/questions/27522626/hash-function-in-python-3-3-returns-different-results-between-sessions
"""
md5.update(repr(value).encode())
return md5
def clean_object_if_change_cflags(so_path, extension):
"""
If already compiling source before, we should check whether cflags
have changed and delete the built object to re-compile the source
even though source file content keeps unchanaged.
"""
def serialize(path, version_info):
assert isinstance(version_info, dict)
with open(path, 'w') as f:
f.write(json.dumps(version_info, indent=4, sort_keys=True))
def deserialize(path):
assert os.path.exists(path)
with open(path, 'r') as f:
content = f.read()
return json.loads(content)
# version file
VERSION_FILE = "version.txt"
base_dir = os.path.dirname(so_path)
so_name = os.path.basename(so_path)
version_file = os.path.join(base_dir, VERSION_FILE)
# version info
args = [getattr(extension, field, None) for field in VersionFields._fields]
version_field = VersionFields._make(args)
versioner = VersionManager(version_field)
if os.path.exists(so_path) and os.path.exists(version_file):
old_version_info = deserialize(version_file)
so_version = old_version_info.get(so_name, None)
# delete shared library file if versison is changed to re-compile it.
if so_version is not None and so_version != versioner.version:
log_v(
"Re-Compiling {}, because specified cflags have been changed. New signature {} has been saved into {}.".
format(so_name, versioner.version, version_file))
os.remove(so_path)
# upate new version information
new_version_info = versioner.details
new_version_info[so_name] = versioner.version
serialize(version_file, new_version_info)
else:
# If compile at first time, save compiling detail information for debug.
if not os.path.exists(base_dir):
os.makedirs(base_dir)
details = versioner.details
details[so_name] = versioner.version
serialize(version_file, details)
def prepare_unix_cudaflags(cflags): def prepare_unix_cudaflags(cflags):
""" """
Prepare all necessary compiled flags for nvcc compiling CUDA files. Prepare all necessary compiled flags for nvcc compiling CUDA files.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册