提交 a5a63ad4 编写于 作者: M MyPandaShaoxiang

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle-Lite into fpga_patch

test=develop
......@@ -108,7 +108,7 @@ void yolobox(lite::Tensor* X,
auto anchors_data = anchors.data();
const float* X_data = X->data<float>();
float* ImgSize_data = ImgSize->mutable_data<float>();
int* ImgSize_data = ImgSize->mutable_data<int>();
float* Boxes_data = Boxes->mutable_data<float>();
......@@ -116,8 +116,8 @@ void yolobox(lite::Tensor* X,
float box[4];
for (int i = 0; i < n; i++) {
int img_height = static_cast<int>(ImgSize_data[2 * i]);
int img_width = static_cast<int>(ImgSize_data[2 * i + 1]);
int img_height = ImgSize_data[2 * i];
int img_width = ImgSize_data[2 * i + 1];
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) {
......
......@@ -45,7 +45,8 @@ inline ActivationType GetActivationType(const std::string &type) {
} else if (type == "identity" || type == "") {
return ActivationType::kIdentity;
}
PADDLE_ENFORCE(false, "Not support type %s", type);
LOG(ERROR) << "Not support type " << type;
// PADDLE_ENFORCE(false, "Not support type %s", type);
// PADDLE_THROW("Not support type %s.", type);
}
......
......@@ -19,7 +19,7 @@ limitations under the License. */
#include "lite/core/context.h"
#include "lite/core/tensor.h"
#include "lite/fluid/eigen.h"
#include "lite/fluid/lod.h"
// #include "lite/fluid/lod.h"
#include "lite/utils/paddle_enforce.h"
namespace paddle {
......
# lite_cc_library(activation_compute_x86 SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
add_kernel(activation_compute_x86 X86 basic SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_ops)
# lite_cc_library(mean_compute_x86 SRCS mean_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_library(fill_constant_compute_x86 SRCS fill_constant_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_library(sgd_compute_x86 SRCS sgd_compute.cc DEPS ${lite_kernel_deps})
......@@ -18,13 +18,15 @@ add_kernel(reshape_compute_x86 X86 basic SRCS reshape_compute.cc DEPS ${lite_ker
# lite_cc_library(pool_compute_x86 SRCS pool_compute.cc DEPS ${lite_kernel_deps} pooling)
# lite_cc_library(batch_norm_compute_x86 SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_library(uniform_random_compute_x86 SRCS uniform_random_compute.cc DEPS ${lite_kernel_deps} )
add_kernel(gru_compute_x86 X86 basic SRCS gru_compute.cc DEPS ${lite_kernel_deps} blas math_function sequence2batch gru_compute)
#add_kernel(gru_compute_x86 X86 basic SRCS gru_compute.cc DEPS ${lite_kernel_deps})
add_kernel(sequence_expand_as_compute_x86 X86 basic SRCS sequence_expand_as_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_test(test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86)
# lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86)
# lite_cc_test(test_pool2d_compute_x86 SRCS pool_compute_test.cc DEPS pool_compute_x86)
# lite_cc_test(test_softmax_compute_x86 SRCS softmax_compute_test.cc DEPS softmax_compute_x86)
# lite_cc_test(test_elementwise_compute_x86 SRCS elementwise_compute_test.cc DEPS elementwise_compute_x86)
# lite_cc_test(test_relu_compute_x86 SRCS relu_compute_test.cc DEPS relu_compute_x86)
# lite_cc_test(test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86)
# lite_cc_test(test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86)
# lite_cc_test(test_batch_norm_compute_x86 SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_x86)
......@@ -49,4 +51,7 @@ lite_cc_test(test_sequence_pool_compute_x86 SRCS sequence_pool_compute_test.cc D
lite_cc_test(test_shape_compute_x86 SRCS shape_compute_test.cc DEPS shape_compute_x86)
lite_cc_test(test_softmax_compute_x86 SRCS softmax_compute_test.cc DEPS softmax_compute_x86)
lite_cc_test(test_elementwise_compute_x86 SRCS elementwise_compute_test.cc DEPS elementwise_compute_x86)
lite_cc_test(test_relu_compute_x86 SRCS relu_compute_test.cc DEPS activation_compute_x86)
lite_cc_test(test_sequence_expand_as_compute_x86 SRCS sequence_expand_as_compute_test.cc DEPS sequence_expand_as_compute_x86)
lite_cc_test(test_gru_compute_x86 SRCS gru_compute_test.cc DEPS gru_compute_x86)
lite_cc_test(test_matmul_compute_x86 SRCS matmul_compute_test.cc DEPS matmul_compute_x86)
......@@ -12,94 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/activation_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
template <typename Functor>
void Activate(const platform::CPUDeviceContext& context,
const framework::LoDTensor* X,
framework::LoDTensor* Out) {
using T = typename Functor::ELEMENT_TYPE;
auto* place = context.eigen_device();
auto x =
framework::EigenVector<T>::Flatten(paddle::operators::detail::Ref(X));
auto out =
framework::EigenVector<T>::Flatten(paddle::operators::detail::Ref(Out));
Functor()(*place, x, out);
}
template <typename Functor>
void ActivateGrad(const platform::CPUDeviceContext& context,
const framework::LoDTensor* X,
const framework::LoDTensor* Out,
const framework::LoDTensor* Out_grad,
framework::LoDTensor* X_grad) {
using T = typename Functor::ELEMENT_TYPE;
auto* place = context.eigen_device();
auto x =
framework::EigenVector<T>::Flatten(paddle::operators::detail::Ref(X));
auto out =
framework::EigenVector<T>::Flatten(paddle::operators::detail::Ref(Out));
auto x_grad = framework::EigenVector<T>::Flatten(
paddle::operators::detail::Ref(X_grad));
auto out_grad = framework::EigenVector<T>::Flatten(
paddle::operators::detail::Ref(Out_grad));
Functor()(*place, x, out, out_grad, x_grad);
}
template <typename T>
class SquareCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
void Run() override {
auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::ActivationParam>();
CHECK(context.x86_device_context());
param.Out->template mutable_data<T>();
Activate<paddle::operators::SquareFunctor<T>>(*context.x86_device_context(),
&param.X->raw_tensor(),
&param.Out->raw_tensor());
}
virtual ~SquareCompute() = default;
};
template <typename T>
class SquareGradCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationGradParam;
void Run() override {
auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::ActivationGradParam>();
CHECK(context.x86_device_context());
param.X_grad->template mutable_data<T>();
ActivateGrad<paddle::operators::SquareGradFunctor<T>>(
*context.x86_device_context(),
&param.X->raw_tensor(),
&param.Out->raw_tensor(),
&param.Out_grad->raw_tensor(),
&param.X_grad->raw_tensor());
}
virtual ~SquareGradCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
#include "lite/kernels/x86/activation_compute.h"
// float
REGISTER_LITE_KERNEL(square,
......@@ -112,16 +25,13 @@ REGISTER_LITE_KERNEL(square,
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
REGISTER_LITE_KERNEL(square_grad,
// float
REGISTER_LITE_KERNEL(relu,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::SquareGradCompute<float>,
paddle::lite::kernels::x86::ReluCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput(paddle::framework::GradVarName("Out"),
{LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput(paddle::framework::GradVarName("X"),
{LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <utility>
#include <vector>
#include "lite/core/kernel.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
#include "lite/fluid/eigen.h"
#include "lite/operators/activation_ops.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
enum ActBwdOpFwdDeps {
kNoDeps = 0x00, // Do not need any forward input/output
kDepX = 0x01, // Only need forward input X
kDepOut = 0x02, // Only need forward output Out
// Never add kDepXOut, because Out can be always calculated
// by forward input X in backward part.
// FIXME(zjl): but in MKLDNN abs, X and Out are all needed...
// Developers should not rely on this enum value!
kDepXOut = 0x03
};
template <typename T>
struct BaseActivationFunctor {
using ELEMENT_TYPE = T;
using AttrPair = std::vector<std::pair<const char*, float*>>;
AttrPair GetAttrs() { return AttrPair(); }
/* NOTE(*): Output reuse X memory if X is not dependented by its Gradient.
For example, sigmoid op's gradient didn't involve x, so its output can
reuse
input memory. But abs op's gradient use x, it can not be inplaced.
gradient did use x.
*/
bool Inplace() const { return false; }
};
template <typename Functor>
bool Activate(const lite::Tensor* X, lite::Tensor* Out) {
using T = typename Functor::ELEMENT_TYPE;
auto place = lite::fluid::EigenDeviceType<TARGET(kX86)>();
CHECK_OR_FALSE(X)
CHECK_OR_FALSE(Out)
auto x = lite::fluid::EigenVector<T>::Flatten(*X);
auto out = lite::fluid::EigenVector<T>::Flatten(*Out);
Functor()(place, x, out);
}
// square(x) = x^2
template <typename T>
struct SquareFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.square();
}
};
template <typename T>
class SquareCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
void Run() override {
auto& param = *param_.get_mutable<operators::ActivationParam>();
param.Out->template mutable_data<T>();
Activate<SquareFunctor<T>>(param.X, param.Out);
}
virtual ~SquareCompute() = default;
};
// relu(x) = max(x, 0)
template <typename T>
struct ReluFunctor : public BaseActivationFunctor<T> {
template <typename Device, typename X, typename Out>
void operator()(Device d, X x, Out out) const {
out.device(d) = x.cwiseMax(static_cast<T>(0));
}
};
template <typename T>
class ReluCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
using param_t = operators::ActivationParam;
void Run() override {
auto& param = *param_.get_mutable<operators::ActivationParam>();
param.Out->template mutable_data<T>();
Activate<ReluFunctor<T>>(param.X, param.Out);
}
virtual ~ReluCompute() = default;
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/activation_compute.cc"
#include <gtest/gtest.h>
#include <iostream>
#include <memory>
#include <utility>
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
TEST(relu_x86, retrive_op) {
auto relu =
KernelRegistry::Global().Create<TARGET(kX86), PRECISION(kFloat)>("relu");
ASSERT_FALSE(relu.empty());
ASSERT_TRUE(relu.front());
}
TEST(relu_x86, init) {
ReluComputeCompute<float> relu;
ASSERT_EQ(relu.precision(), PRECISION(kFloat));
ASSERT_EQ(relu.target(), TARGET(kX86));
}
TEST(relu_x86, run_test) {
lite::Tensor x, out;
constexpr int batch_size = 1;
std::vector<int64_t> x_shape{batch_size, 3, 2, 2};
x.Resize(lite::DDim(x_shape));
std::vector<int64_t> out_shape{batch_size, 3, 2, 2};
out.Resize(lite::DDim(out_shape));
auto x_data = x.mutable_data<float>();
auto out_data = out.mutable_data<float>();
for (int64_t i = 0; i < x.dims().production(); i++) {
int sign = i % 2 == 0 ? 1 : -1;
x_data[i] = static_cast<float>(i * sign);
}
// ReluCompute relu;
ReluCompute<float> relu;
operators::Param param;
param.x = &x;
param.y = &y;
param.out = &out;
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<X86Context>();
sequence_expand_as.SetContext(std::move(ctx));
sequence_expand_as.SetParam(param);
sequence_expand_as.Run();
auto out_data = out.mutable_data<float>();
LOG(INFO) << "output: ";
for (int i = 0; i < out.dims().production(); i++) {
LOG(INFO) << out_data[i];
}
}
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(sequence_expand_as, kX86, kFloat, kNCHW, def);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/gru_compute.h"
DEFINE_int32(paddle_num_threads,
1,
"Number of threads for each paddle instance.");
REGISTER_LITE_KERNEL(gru,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::GRUCompute<float>,
def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("H0", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Weight", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Batch_gate", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Batch_reset_hidden_prev",
{LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Batch_hidden", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Hidden", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "lite/backends/x86/math/blas.h"
#include "lite/backends/x86/math/detail/gru_cpu_kernel.h"
#include "lite/backends/x86/math/detail/gru_kernel.h"
#include "lite/backends/x86/math/gru_compute.h"
#include "lite/backends/x86/math/math_function.h"
#include "lite/backends/x86/math/sequence2batch.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/types.h"
#include "lite/fluid/eigen.h"
DECLARE_int32(paddle_num_threads);
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
using Tensor = lite::Tensor;
template <typename T>
inline void ReorderInitState(const lite::Context<TARGET(kX86)>& context,
const Tensor& src,
const std::vector<uint64_t>& index_lod,
Tensor* dst,
bool indexed_src) {
lite::x86::math::CopyMatrixRowsFunctor<TARGET(kX86), T> row_shuffle;
dst->Resize(src.dims());
dst->mutable_data<T>();
row_shuffle(context, src, index_lod, dst, indexed_src);
}
template <typename T>
class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
void Run() override {
auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::GRUParam>();
bool origin_mode = param.origin_mode;
bool is_reverse = param.is_reverse;
auto* input = param.input;
auto* h0 = param.h0;
auto* weight = param.weight;
const T* weight_data = weight->data<T>();
auto* bias = param.bias;
auto* batch_gate = param.batch_gate;
batch_gate->mutable_data<T>();
auto* batch_reset_hidden_prev = param.batch_reset_hidden_prev;
batch_reset_hidden_prev->mutable_data<T>();
auto* batch_hidden = param.batch_hidden;
batch_hidden->mutable_data<T>();
auto* hidden = param.hidden;
hidden->mutable_data<T>();
auto hidden_dims = hidden->dims();
lite::x86::math::LoDTensor2BatchFunctor<TARGET(kX86), T> to_batch;
to_batch(context, *input, batch_gate, true, is_reverse);
if (bias) {
lite::x86::math::RowwiseAdd<TARGET(kX86), T> add_bias;
add_bias(context, *batch_gate, *bias, batch_gate);
}
int frame_size = hidden_dims[1];
lite::x86::math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
gru_value.state_weight =
const_cast<T*>(weight_data + 2 * frame_size * frame_size);
Tensor ordered_h0;
std::vector<size_t> order(batch_gate->lod()[2]);
if (h0) {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
ReorderInitState<T>(context, *h0, order, &ordered_h0, true);
gru_value.prev_out_value = ordered_h0.mutable_data<T>();
} else {
gru_value.prev_out_value = nullptr;
}
auto batch_starts = batch_gate->lod()[0];
size_t seq_len = batch_starts.size() - 1;
auto active_node =
lite::x86::math::detail::GetActivationType(param.activation);
auto active_gate =
lite::x86::math::detail::GetActivationType(param.gate_activation);
#ifdef PADDLE_WITH_MKLML
// use MKL packed to speedup GEMM
if (FLAGS_paddle_num_threads >= 4) {
auto blas = lite::x86::math::GetBlas<TARGET(kX86), T>(context);
T* packed_gate = blas.GEMM_ALLOC(CblasBMatrix,
1 /*height of C*/,
frame_size * 2 /*width of weight*/,
frame_size /*height of height*/);
CHECK(packed_gate);
blas.GEMM_PACK(CblasBMatrix,
CblasNoTrans,
1 /*cur bs?*/,
frame_size * 2,
frame_size,
T(1.0),
gru_value.gate_weight,
frame_size * 2,
packed_gate);
T* packed_state = blas.GEMM_ALLOC(CblasBMatrix,
1 /*height of C*/,
frame_size /*width of weight*/,
frame_size /*height of height*/);
CHECK(packed_state);
blas.GEMM_PACK(CblasBMatrix,
CblasNoTrans,
1 /*cur bs?*/,
frame_size,
frame_size,
T(1.0),
gru_value.state_weight,
frame_size,
packed_state);
for (size_t n = 0; n < seq_len; n++) {
int64_t bstart = static_cast<int64_t>(batch_starts[n]);
int64_t bend = static_cast<int64_t>(batch_starts[n + 1]);
int64_t cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice<T>(bstart, bend);
Tensor reset_hidden_prev_t =
batch_reset_hidden_prev->Slice<T>(bstart, bend);
Tensor hidden_t = batch_hidden->Slice<T>(bstart, bend);
gru_value.output_value = hidden_t.mutable_data<T>();
gru_value.gate_value = gate_t.mutable_data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.mutable_data<T>();
if (gru_value.prev_out_value) {
blas.GEMM_COMPUTE(CblasNoTrans,
CblasPacked,
cur_batch_size,
frame_size * 2,
frame_size,
gru_value.prev_out_value,
frame_size,
packed_gate,
frame_size * 2,
T(1),
gru_value.gate_value,
frame_size * 3);
}
lite::x86::math::detail::forward_final_output(
lite::x86::math::detail::forward::gru_finalOutput<T>(),
gru_value,
frame_size,
cur_batch_size,
active_node,
origin_mode);
gru_value.prev_out_value = gru_value.output_value;
}
blas.GEMM_FREE(packed_gate);
blas.GEMM_FREE(packed_state);
} else {
#endif
for (size_t n = 0; n < seq_len; n++) {
int64_t bstart = static_cast<int64_t>(batch_starts[n]);
int64_t bend = static_cast<int64_t>(batch_starts[n + 1]);
int64_t cur_batch_size = bend - bstart;
Tensor gate_t = batch_gate->Slice<T>(bstart, bend);
Tensor reset_hidden_prev_t =
batch_reset_hidden_prev->Slice<T>(bstart, bend);
Tensor hidden_t = batch_hidden->Slice<T>(bstart, bend);
gru_value.output_value = hidden_t.mutable_data<T>();
gru_value.gate_value = gate_t.mutable_data<T>();
gru_value.reset_output_value = reset_hidden_prev_t.mutable_data<T>();
lite::x86::math::GRUUnitFunctor<TARGET(kX86), T>::compute(
context,
gru_value,
frame_size,
cur_batch_size,
active_node,
active_gate,
origin_mode);
gru_value.prev_out_value = gru_value.output_value;
}
#ifdef PADDLE_WITH_MKLML
}
#endif
lite::x86::math::Batch2LoDTensorFunctor<TARGET(kX86), T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(context, *batch_hidden, hidden);
}
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/gru_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <memory>
#include <utility>
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
TEST(gru_x86, retrive_op) {
auto gru =
KernelRegistry::Global().Create<TARGET(kX86), PRECISION(kFloat)>("gru");
ASSERT_FALSE(gru.empty());
ASSERT_TRUE(gru.front());
}
TEST(gru_x86, init) {
GRUCompute<float> gru;
ASSERT_EQ(gru.precision(), PRECISION(kFloat));
ASSERT_EQ(gru.target(), TARGET(kX86));
}
TEST(gru_x86, run_test) {
lite::Tensor input, h0, weight, bias;
lite::Tensor batch_gate, batch_reset_hidden_prev, batch_hidden, hidden;
constexpr int batch_size = 9;
std::vector<int64_t> input_shape{batch_size, 15};
input.Resize(lite::DDim(input_shape));
std::vector<int64_t> weight_shape{5, 15};
weight.Resize(lite::DDim(weight_shape));
std::vector<int64_t> h0_shape{3, 5};
h0.Resize(lite::DDim(h0_shape));
std::vector<int64_t> bias_shape{1, 15};
bias.Resize(lite::DDim(bias_shape));
std::vector<int64_t> batch_gate_shape{batch_size, 15};
batch_gate.Resize(lite::DDim(batch_gate_shape));
std::vector<int64_t> batch_reset_hidden_prev_shape{batch_size, 5};
batch_reset_hidden_prev.Resize(lite::DDim(batch_reset_hidden_prev_shape));
std::vector<int64_t> batch_hidden_shape{batch_size, 5};
batch_hidden.Resize(lite::DDim(batch_hidden_shape));
std::vector<int64_t> hidden_shape{batch_size, 5};
hidden.Resize(lite::DDim(hidden_shape));
std::vector<std::vector<uint64_t>> lod{{0, 2, 6, 9}};
input.set_lod(lod);
auto input_data = input.mutable_data<float>();
auto weight_data = weight.mutable_data<float>();
auto h0_data = h0.mutable_data<float>();
auto bias_data = bias.mutable_data<float>();
for (int64_t i = 0; i < input.dims().production(); i++) {
input_data[i] = static_cast<float>(0);
}
for (int64_t i = 0; i < weight.dims().production(); i++) {
weight_data[i] = static_cast<float>(0);
}
for (int64_t i = 0; i < h0.dims().production(); i++) {
h0_data[i] = static_cast<float>(0);
}
for (int64_t i = 0; i < bias.dims().production(); i++) {
bias_data[i] = static_cast<float>(0);
}
// ReluCompute relu;
GRUCompute<float> gru;
operators::GRUParam param;
param.input = &input;
param.h0 = &h0;
param.weight = &weight;
param.bias = &bias;
param.batch_gate = &batch_gate;
param.batch_reset_hidden_prev = &batch_reset_hidden_prev;
param.batch_hidden = &batch_hidden;
param.hidden = &hidden;
param.gate_activation = "sigmoid";
param.activation = "tanh";
param.is_reverse = false;
param.origin_mode = false;
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<X86Context>();
gru.SetContext(std::move(ctx));
gru.SetParam(param);
gru.Run();
auto batch_gate_data = batch_gate.mutable_data<float>();
auto batch_reset_hidden_prev_data =
batch_reset_hidden_prev.mutable_data<float>();
auto batch_hidden_data = batch_hidden.mutable_data<float>();
auto hidden_data = hidden.mutable_data<float>();
std::vector<float> batch_gate_out{
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0,
0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0, 0, 0, 0};
std::vector<float> batch_reset_hidden_prev_out{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
std::vector<float> batch_hidden_out{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
std::vector<float> hidden_out{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
LOG(INFO) << "output: ";
for (int i = 0; i < batch_gate.dims().production(); i++) {
LOG(INFO) << batch_gate_data[i];
EXPECT_NEAR(batch_gate_data[i], batch_gate_out[i], 1e-3);
}
for (int i = 0; i < batch_reset_hidden_prev.dims().production(); i++) {
LOG(INFO) << batch_reset_hidden_prev_data[i];
EXPECT_NEAR(
batch_reset_hidden_prev_data[i], batch_reset_hidden_prev_out[i], 1e-3);
}
for (int i = 0; i < batch_hidden.dims().production(); i++) {
LOG(INFO) << batch_hidden_data[i];
EXPECT_NEAR(batch_hidden_data[i], batch_hidden_out[i], 1e-3);
}
for (int i = 0; i < hidden.dims().production(); i++) {
LOG(INFO) << hidden_data[i];
EXPECT_NEAR(hidden_data[i], hidden_out[i], 1e-3);
}
}
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(gru, kX86, kFloat, kNCHW, def);
......@@ -12,11 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/relu_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/kernels/x86/activation_compute.h"
namespace paddle {
namespace lite {
......@@ -64,6 +64,8 @@ TEST(relu_x86, run_test) {
LOG(INFO) << "output: ";
for (int i = 0; i < out.dims().production(); i++) {
LOG(INFO) << out_data[i];
int sign = i % 2 == 0 ? 1 : 0;
ASSERT_EQ(out_data[i], i * sign);
}
}
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/sequence_expand_as_compute.h"
REGISTER_LITE_KERNEL(sequence_expand_as,
kX86,
kFloat,
kNCHW,
paddle::lite::kernels::x86::SequenceExpandAsCompute<float>,
def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/types.h"
#include "lite/fluid/eigen.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
using Tensor = lite::Tensor;
template <typename T>
struct SequenceExpandFunctor {
void operator()(const Tensor &x,
const std::vector<size_t> &ref_lod, /*expand referenced lod*/
Tensor *out) {
int64_t hight = x.dims()[0];
int64_t width = x.data_size() / hight;
const T *in_data = x.data<T>();
T *out_data = out->mutable_data<T, T>();
for (int h_id = 0; h_id < hight; ++h_id) {
size_t span = ref_lod[h_id + 1] - ref_lod[h_id];
if (span == 0) continue;
const T *src = in_data + h_id * width;
for (int64_t w_id = 0; w_id < width; ++w_id) {
T ele = src[w_id];
size_t offset = ref_lod[h_id] * width;
for (size_t k = 0; k < span; ++k) {
out_data[offset + k * width + w_id] = ele;
}
}
}
}
};
template <typename T>
class SequenceExpandAsCompute
: public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
public:
void Run() override {
auto &param = *param_.get_mutable<operators::SequenceExpandAsParam>();
auto *x = param.x;
auto *y = param.y;
auto *out = param.out;
auto &y_lod = y->lod();
CHECK_EQ(y_lod.size(), 1);
CHECK_GT(y_lod[0].size(), 1);
out->mutable_data<T, T>();
SequenceExpandFunctor<T> seq_espand_functor;
seq_espand_functor(*x, y_lod[0], out);
}
};
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/x86/sequence_expand_as_compute.h"
#include <gtest/gtest.h>
#include <iostream>
#include <memory>
#include <utility>
#include <vector>
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace x86 {
TEST(sequence_expand_as_x86, retrive_op) {
auto sequence_expand_as =
KernelRegistry::Global().Create<TARGET(kX86), PRECISION(kFloat)>(
"sequence_expand_as");
ASSERT_FALSE(sequence_expand_as.empty());
ASSERT_TRUE(sequence_expand_as.front());
}
TEST(sequence_expand_as_x86, init) {
SequenceExpandAsCompute<float> sequence_expand_as;
ASSERT_EQ(sequence_expand_as.precision(), PRECISION(kFloat));
ASSERT_EQ(sequence_expand_as.target(), TARGET(kX86));
}
TEST(sequence_expand_as_x86, run_test) {
lite::Tensor x, y, out;
std::vector<int64_t> x_shape{4, 1};
x.Resize(lite::DDim(x_shape));
std::vector<int64_t> y_shape{1, 5};
y.Resize(lite::DDim(y_shape));
std::vector<int64_t> out_shape{8, 1};
out.Resize(lite::DDim(out_shape));
auto x_data = x.mutable_data<float>();
auto y_data = y.mutable_data<float>();
for (int64_t i = 0; i < x.dims().production(); i++) {
x_data[i] = static_cast<float>(i);
}
for (int64_t i = 0; i < y.dims().production(); i++) {
y_data[i] = static_cast<float>(i);
}
std::vector<std::vector<uint64_t>> lod{{0, 3, 6, 7, 8}};
y.set_lod(lod);
// MulCompute mul;
SequenceExpandAsCompute<float> sequence_expand_as;
operators::SequenceExpandAsParam param;
param.x = &x;
param.y = &y;
param.out = &out;
std::unique_ptr<KernelContext> ctx(new KernelContext);
ctx->As<X86Context>();
sequence_expand_as.SetContext(std::move(ctx));
sequence_expand_as.SetParam(param);
sequence_expand_as.Run();
auto out_data = out.mutable_data<float>();
int index = 1;
int lod_sum = lod[0][index];
LOG(INFO) << "output: ";
for (int i = 0; i < out.dims().production(); i++) {
LOG(INFO) << out_data[i];
if (i >= lod_sum) {
index++;
lod_sum = lod[0][index];
}
ASSERT_EQ(out_data[i], x_data[index - 1]);
}
}
} // namespace x86
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(sequence_expand_as, kX86, kFloat, kNCHW, def);
......@@ -71,6 +71,7 @@ add_operator(roi_align_op basic SRCS roi_align_op.cc DEPS ${op_DEPS})
add_operator(box_clip_op basic SRCS box_clip_op.cc DEPS ${op_DEPS})
add_operator(flatten_op basic SRCS flatten_op.cc DEPS ${op_DEPS})
add_operator(fake_quantize_range_abs_max_op basic SRCS fake_quantize_range_abs_max.cc DEPS ${op_DEPS})
add_operator(sequence_expand_as_op_lite basic SRCS sequence_expand_as_op.cc DEPS ${op_DEPS})
add_operator(range_op basic SRCS range_op.cc DEPS ${op_DEPS})
add_operator(assign_value_op basic SRCS assign_value_op.cc DEPS ${op_DEPS})
......
......@@ -682,6 +682,12 @@ struct SequenceExpandParam {
int ref_level{-1};
};
struct SequenceExpandAsParam {
const lite::Tensor* x{nullptr};
const lite::Tensor* y{nullptr};
lite::Tensor* out{nullptr};
};
struct ReduceMaxParam {
const lite::Tensor* X{};
lite::Tensor* Out{};
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/operators/sequence_expand_as_op.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace operators {
bool SequenceExpandAsOpLite::CheckShape() const {
CHECK_OR_FALSE(param_.x)
CHECK_OR_FALSE(param_.y)
CHECK_OR_FALSE(param_.out)
auto x_dims = param_.x->dims();
CHECK_EQ_OR_FALSE(x_dims.size(), 2)
auto y_lod = param_.y->lod();
CHECK_EQ_OR_FALSE(y_lod.size(), 1)
CHECK_EQ_OR_FALSE(static_cast<size_t>(x_dims[0]), y_lod[0].size() - 1)
return true;
}
bool SequenceExpandAsOpLite::InferShape() const {
auto x_dims = param_.x->dims();
auto y_lod = param_.y->lod();
auto out_dims = x_dims;
int64_t out_first_dim = 0;
if (y_lod[0].size() <= 1) {
out_first_dim = x_dims[0];
} else {
for (size_t i = 1; i < y_lod[0].size(); ++i) {
out_first_dim += (y_lod[0][i] - y_lod[0][i - 1]);
}
}
out_dims[0] = out_first_dim;
param_.out->Resize(out_dims);
param_.out->set_lod(y_lod);
return true;
}
bool SequenceExpandAsOpLite::AttachImpl(const cpp::OpDesc &op_desc,
lite::Scope *scope) {
auto x = op_desc.Input("X").front();
auto y = op_desc.Input("Y").front();
auto out = op_desc.Output("Out").front();
param_.x = scope->FindVar(x)->GetMutable<lite::Tensor>();
param_.y = scope->FindVar(y)->GetMutable<lite::Tensor>();
param_.out = scope->FindVar(out)->GetMutable<lite::Tensor>();
return true;
}
} // namespace operators
} // namespace lite
} // namespace paddle
REGISTER_LITE_OP(sequence_expand_as,
paddle::lite::operators::SequenceExpandAsOpLite)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/core/scope.h"
#include "lite/utils/all.h"
namespace paddle {
namespace lite {
namespace operators {
class SequenceExpandAsOpLite : public OpLite {
public:
SequenceExpandAsOpLite() {}
explicit SequenceExpandAsOpLite(const std::string &op_type)
: OpLite(op_type) {}
bool CheckShape() const override;
bool InferShape() const override;
bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override;
void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
std::string DebugString() const override { return "sequence_expand_as"; }
private:
mutable SequenceExpandAsParam param_;
};
} // namespace operators
} // namespace lite
} // namespace paddle
......@@ -31,6 +31,19 @@ bool YoloBoxOp::CheckShape() const {
CHECK_OR_FALSE(ImgSize);
CHECK_OR_FALSE(Boxes);
CHECK_OR_FALSE(Scores);
auto dim_x = X->dims();
auto dim_imgsize = ImgSize->dims();
std::vector<int> anchors = param_.anchors;
int anchor_num = anchors.size() / 2;
auto class_num = param_.class_num;
CHECK_OR_FALSE(dim_x.size() == 4);
CHECK_OR_FALSE(dim_x[1] == anchor_num * (5 + class_num));
CHECK_OR_FALSE(dim_imgsize[0] == dim_x[0]);
CHECK_OR_FALSE(dim_imgsize[1] == 2);
CHECK_OR_FALSE(anchors.size() > 0 && anchors.size() % 2 == 0);
CHECK_OR_FALSE(class_num > 0);
return true;
}
bool YoloBoxOp::InferShape() const {
......
......@@ -101,7 +101,7 @@ class YoloBoxComputeTester : public arena::TestCase {
float conf_thresh_ = 0.f;
int downsample_ratio_ = 0;
DDim _dims0_{{1, 2, 2, 1}};
DDim _dims0_{{1, 255, 13, 13}};
DDim _dims1_{{1, 2}};
public:
......@@ -115,7 +115,10 @@ class YoloBoxComputeTester : public arena::TestCase {
anchors_(anchors),
class_num_(class_num),
conf_thresh_(conf_thresh),
downsample_ratio_(downsample_ratio) {}
downsample_ratio_(downsample_ratio) {
int anchor_num = anchors_.size() / 2;
_dims0_[1] = anchor_num * (5 + class_num);
}
void RunBaseline(Scope* scope) override {
const lite::Tensor* X = scope->FindTensor(input0_);
......@@ -149,14 +152,14 @@ class YoloBoxComputeTester : public arena::TestCase {
auto anchors_data = anchors.data();
const float* in_data = in->data<float>();
const float* imgsize_data = imgsize->data<float>();
const int* imgsize_data = imgsize->data<int>();
float* boxes_data = boxes->mutable_data<float>();
float* scores_data = scores->mutable_data<float>();
float box[4];
for (int i = 0; i < n; i++) {
int img_height = static_cast<int>(imgsize_data[2 * i]);
int img_width = static_cast<int>(imgsize_data[2 * i + 1]);
int img_height = imgsize_data[2 * i];
int img_width = imgsize_data[2 * i + 1];
for (int j = 0; j < an_num; j++) {
for (int k = 0; k < h; k++) {
for (int l = 0; l < w; l++) {
......@@ -218,7 +221,7 @@ class YoloBoxComputeTester : public arena::TestCase {
}
std::vector<int> data1(_dims1_.production());
for (int i = 0; i < _dims1_.production(); i++) {
data1[i] = i + 8;
data1[i] = 608;
}
SetCommonTensor(input0_, _dims0_, data0.data());
SetCommonTensor(input1_, _dims1_, data1.data());
......@@ -227,10 +230,9 @@ class YoloBoxComputeTester : public arena::TestCase {
void test_yolobox(Place place) {
for (int class_num : {1, 2, 3, 4}) {
for (float conf_thresh : {0.5, 0.2, 0.7}) {
for (int downsample_ratio : {1, 2, 3}) {
std::vector<int> anchor({1, 2, 3, 4});
for (float conf_thresh : {0.01, 0.2, 0.7}) {
for (int downsample_ratio : {16, 32}) {
std::vector<int> anchor({10, 13, 16, 30});
std::unique_ptr<arena::TestCase> tester(new YoloBoxComputeTester(
place, "def", anchor, class_num, conf_thresh, downsample_ratio));
arena::Arena arena(std::move(tester), place, 2e-5);
......
......@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "common/type_define.h"
#include "cstring"
#include "io/paddle_inference_api.h"
......
......@@ -18,6 +18,7 @@
#include <utility>
#include <vector>
#include "common/enforce.h"
#include "common/type_define.h"
#include "framework/tensor.h"
#ifdef PADDLE_MOBILE_FPGA
#include <fpga/common/fpga_common.h>
......@@ -35,7 +36,9 @@ PaddleMobilePredictor<Device, T>::PaddleMobilePredictor(
template <typename Device, typename T>
bool PaddleMobilePredictor<Device, T>::Init(const PaddleMobileConfig &config) {
paddle_mobile_.reset(new PaddleMobile<Device, T>());
PaddleMobileConfigInternal configInternal;
configInternal.load_when_predict = config.load_when_predict;
paddle_mobile_.reset(new PaddleMobile<Device, T>(configInternal));
#ifdef PADDLE_MOBILE_CL
paddle_mobile_->SetCLPath(config.cl_path);
#endif
......@@ -135,14 +138,14 @@ bool PaddleMobilePredictor<Device, T>::Run(
void ConvertPaddleTensors(const PaddleTensor &src, framework::Tensor *des) {
des->Resize(framework::make_ddim(src.shape));
des->external_data = src.data.data();
des->set_type(src.dtypeid);
des->set_type(static_cast<kTypeId_t>(static_cast<int>(src.dtypeid)));
des->layout =
src.layout == LAYOUT_HWC ? framework::LAYOUT_HWC : framework::LAYOUT_CHW;
}
void ConvertTensors(const framework::Tensor &src, PaddleTensor *des) {
des->shape = framework::vectorize2int(src.dims());
des->dtypeid = src.type();
des->dtypeid = static_cast<PaddlekTypeId_t>(static_cast<int>(src.type()));
des->layout = src.layout == framework::LAYOUT_HWC ? LAYOUT_HWC : LAYOUT_CHW;
auto num = src.numel();
......@@ -164,7 +167,8 @@ void PaddleMobilePredictor<Device, T>::FeedPaddleTensors(
auto num = inputs.size();
std::vector<framework::Tensor> tensors(num, framework::Tensor());
for (int i = 0; i < num; i++) {
if (inputs[i].dtypeid == type_id<int8_t>().hash_code()) {
if (static_cast<kTypeId_t>(static_cast<int>(inputs[i].dtypeid)) ==
type_id<int8_t>().hash_code()) {
tensors[i].init(type_id<int8_t>().hash_code());
} else {
tensors[i].init(type_id<float>().hash_code());
......
......@@ -25,7 +25,6 @@ limitations under the License. */
#include <memory>
#include <string>
#include <vector>
#include "common/type_define.h"
namespace paddle_mobile {
......@@ -86,6 +85,56 @@ class PaddleBuf {
bool memory_owned_{true};
};
typedef enum {
paddle_void = 0,
paddle_float,
paddle_int,
paddle_uint16_t,
paddle_double,
paddle_int64_t,
paddle_size_t,
paddle_int16_t,
paddle_int8_t,
paddle_uint8_t,
paddle_bool,
paddle_string,
paddle_floats = 100,
paddle_ints,
paddle_int64_ts,
paddle_size_ts,
paddle_bools,
paddle_strings,
paddle_const_float = 200,
paddle_const_int,
paddle_block = 300,
paddle_tensor,
paddle_lod_tensor,
paddle_blocks,
paddle_tensors,
paddle_lod_tensors,
paddle_p_block = 400,
paddle_p_tensor,
paddle_p_lod_tensor,
paddle_p_blocks,
paddle_p_tensors,
paddle_p_lod_tensors,
paddle_scopes = 500,
paddle_selected_rows,
paddle_dim0 = 600,
paddle_dim1,
paddle_dim2,
paddle_dim3,
paddle_dim4,
paddle_dim5,
paddle_dim6,
paddle_dim7,
paddle_dim8,
paddle_dim9,
#ifdef PADDLE_MOBILE_CL
paddle_cl_image,
#endif
} PaddlekTypeId_t;
struct PaddleTensor {
PaddleTensor() = default;
std::string name; // variable name.
......@@ -93,7 +142,7 @@ struct PaddleTensor {
std::vector<int> lod;
PaddleBuf data; // blob of data.
PaddleDType dtype;
kTypeId_t dtypeid;
PaddlekTypeId_t dtypeid;
LayoutType layout;
};
......@@ -166,6 +215,7 @@ struct PaddleMobileConfig : public PaddlePredictor::Config {
bool quantification = false;
bool lod_mode = false;
int thread_num = 1;
bool load_when_predict = false;
std::string cl_path;
struct PaddleModelMemoryPack memory_pack;
};
......
......@@ -61,7 +61,7 @@ build_for_android() {
elif [ "${PLATFORM}" = "arm-v8a" ]; then
ABI="arm64-v8a"
ARM_PLATFORM="V8"
CXX_FLAGS="-march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog"
CXX_FLAGS="-march=armv8-a -pie -fPIE -w -Wno-error=format-security -llog -fuse-ld=gold"
else
echo "unknown platform!"
exit -1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册