diff --git a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td index 0455a7740d3473f6d46780585b6483b86a706594..cf77aeb07582fa808ca314670353df8ce4eb9eab 100644 --- a/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td +++ b/tensorflow/compiler/mlir/tensorflow/ir/tf_generated_ops.td @@ -509,7 +509,7 @@ array([b'3.14', b'2.72'], dtype=object) }]; let arguments = (ins - TensorOf<[TF_Bool, TF_Complex128, TF_Complex64, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8]>:$input, + TensorOf<[TF_Bool, TF_Complex128, TF_Complex64, TF_Float32, TF_Float64, TF_Int16, TF_Int32, TF_Int64, TF_Int8, TF_Variant]>:$input, DefaultValuedAttr:$precision, DefaultValuedAttr:$scientific, @@ -15226,4 +15226,4 @@ execution the transfer corresponds to.}]>:$dynamic_key, let results = (outs); TF_DerivedOperandTypeListAttr Tinputs = TF_DerivedOperandTypeListAttr<0>; -} \ No newline at end of file +} diff --git a/tensorflow/core/kernels/aggregate_ops.h b/tensorflow/core/kernels/aggregate_ops.h index 5023d0dc8e7347c06896f6125e6ab357063666ae..f4351f390f4d32b67210a4efe4b6a96faffd57b6 100644 --- a/tensorflow/core/kernels/aggregate_ops.h +++ b/tensorflow/core/kernels/aggregate_ops.h @@ -370,24 +370,77 @@ class AddNOpinput(i).shape().DebugString(), ".")); } - // Step 2: attempt to add using + // Step 2: Sum input variants in a tree-like structure using // BinaryOpVariants(ADD_VARIANT_BINARY_OP, ...) // For the output create a default-constructed variant object. - // TODO(ebrevdo): Perform summation in a tree-structure. - Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({})); - Variant* v_out = &(out.scalar()()); - OP_REQUIRES_OK(ctx, BinaryOpVariants( - ctx, ADD_VARIANT_BINARY_OP, - ctx->input(0).template scalar()(), - ctx->input(1).template scalar()(), v_out)); - for (int i = 2; i < num; ++i) { - const Variant tmp = std::move(*v_out); - const Variant& inp = ctx->input(i).template scalar()(); - OP_REQUIRES_OK(ctx, BinaryOpVariants(ctx, ADD_VARIANT_BINARY_OP, - inp, tmp, v_out)); + // + // Pairwise summation provides better numerical precision by + // reducing round-off error: + // + // https://en.wikipedia.org/wiki/Pairwise_summation + // + // These two vectors are used to store and mark intermediate sums. + gtl::InlinedVector temp_filled(num, false); + gtl::InlinedVector temp(num); + + // Tree-based summation. + int skip = 1; + int n = num; + while (skip < n) { + int i = skip; + while (i < n) { + // TODO(ebrevdo, rmlarsen): Parallelize the pairwise summations in the + // inner loop if the variants are "large". + + // x[i - skip] += x[i] + OP_REQUIRES_OK(ctx, + AddVariantTo(ctx, i - skip, i, &temp, &temp_filled)); + // We won't use this index again, recover its memory. + temp[i].clear(); + i += 2 * skip; + } + if (i == n) { + // x[0] += x[i - skip] + OP_REQUIRES_OK(ctx, + AddVariantTo(ctx, 0, i - skip, &temp, &temp_filled)); + // We won't use this index again, recover its memory. + temp[i - skip].clear(); + n -= skip; + } + skip *= 2; } + + Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({})); + out.scalar()() = std::move(temp[0]); ctx->set_output(0, out); } + + private: + // AddVariantTo efficiently performs: + // temp[lhs_ix] <- array(lhs_ix) + array(rhs_ix) + // where array(ix) := (temp_filled[ix] + // ? temp[ix] + // : ctx->input(ix).scalar()()) + // This reduces (possibly expensive) copying of Variants from + // the inputs into temp at the lowest levels of the summation tree. + static inline Status AddVariantTo(OpKernelContextT* ctx, const int lhs_ix, + const int rhs_ix, + gtl::InlinedVector* temp, + gtl::InlinedVector* temp_filled) { + Variant tmp; + if (temp_filled->at(lhs_ix)) tmp = std::move(temp->at(lhs_ix)); + const Variant& a = temp_filled->at(lhs_ix) + ? tmp + : ctx->input(lhs_ix).template scalar()(); + const Variant& b = temp_filled->at(rhs_ix) + ? temp->at(rhs_ix) + : ctx->input(rhs_ix).template scalar()(); + Variant* c = &temp->at(lhs_ix); + TF_RETURN_IF_ERROR( + BinaryOpVariants(ctx, ADD_VARIANT_BINARY_OP, a, b, c)); + temp_filled->at(lhs_ix) = true; + return Status::OK(); + } }; } // namespace tensorflow diff --git a/tensorflow/core/kernels/as_string_op.cc b/tensorflow/core/kernels/as_string_op.cc index b9af976a654d99e374557b435e2fb8b6c8123bb9..ef0c3ff9856e6dd0cf5b96fa1b1cb671dae332ec 100644 --- a/tensorflow/core/kernels/as_string_op.cc +++ b/tensorflow/core/kernels/as_string_op.cc @@ -20,6 +20,9 @@ limitations under the License. #include "tensorflow/core/framework/kernel_def_builder.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/framework/variant_encode_decode.h" +#include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/stringprintf.h" @@ -112,6 +115,8 @@ class AsStringOp : public OpKernel { break; case DT_BOOL: break; + case DT_VARIANT: + break; default: bool type_not_supported = true; OP_REQUIRES(ctx, !type_not_supported, @@ -156,6 +161,12 @@ class AsStringOp : public OpKernel { output_flat(i) = (input_flat(i)) ? "true" : "false"; } } break; + case (DT_VARIANT): { + const auto& input_flat = input_tensor->flat(); + for (int i = 0; i < input_flat.size(); ++i) { + output_flat(i) = input_flat(i).DebugString(); + } + } break; case (DT_COMPLEX64): { const auto& input_flat = input_tensor->flat(); for (int i = 0; i < input_flat.size(); ++i) { diff --git a/tensorflow/core/kernels/as_string_op_test.cc b/tensorflow/core/kernels/as_string_op_test.cc index dff78e25e720256ea86da665ea43bd54f7e7f346..159263c31418b49d8498e423f66c196f45b39ac9 100644 --- a/tensorflow/core/kernels/as_string_op_test.cc +++ b/tensorflow/core/kernels/as_string_op_test.cc @@ -18,6 +18,9 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/framework/variant.h" +#include "tensorflow/core/framework/variant_encode_decode.h" +#include "tensorflow/core/framework/variant_tensor_data.h" #include "tensorflow/core/kernels/ops_testutil.h" #include "tensorflow/core/kernels/ops_util.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -148,6 +151,25 @@ TEST_F(AsStringGraphTest, Bool) { test::ExpectTensorEqual(expected, *GetOutput(0)); } +TEST_F(AsStringGraphTest, Variant) { + TF_ASSERT_OK(Init(DT_VARIANT)); + + AddInput(DT_VARIANT, TensorShape({4})); + auto inputs = mutable_input(0)->flat(); + inputs(0) = 2; + inputs(1) = 3; + inputs(2) = true; + inputs(3) = Tensor("hi"); + TF_ASSERT_OK(RunOpKernel()); + Tensor expected(allocator(), DT_STRING, TensorShape({4})); + test::FillValues( + &expected, {"Variant", "Variant", + "Variant", + ("Variant>")}); + test::ExpectTensorEqual(expected, *GetOutput(0)); +} + TEST_F(AsStringGraphTest, String) { Status s = Init(DT_STRING); ASSERT_EQ(error::INVALID_ARGUMENT, s.code()); diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index d0247e0cb94c05fd5cc875ddbca4c70a2b4ab3f6..6623524c7173b30bc8bdc35ea1cbf70b32e25b7f 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -116,7 +116,7 @@ REGISTER_OP("AsString") .Output("output: string") .Attr( "T: {int8, int16, int32, int64, complex64, complex128, float, double, " - "bool}") + "bool, variant}") .Attr("precision: int = -1") .Attr("scientific: bool = false") .Attr("shortest: bool = false") diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index a8d40b8438c0354f0d3be4edb318e27ebd61c08b..b6f3b6b8c82d49a4e920eb32ab10e5799bac6820 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1606,6 +1606,7 @@ cuda_py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:math_ops", + "//tensorflow/python:string_ops", "//third_party/py/numpy", ], ) diff --git a/tensorflow/python/kernel_tests/aggregate_ops_test.py b/tensorflow/python/kernel_tests/aggregate_ops_test.py index d9787cc3bf6b6bdbdc917c9d40b8ebdfed9eb3bb..adb4f3a0f2f10c5f223bfecee5835988747e5050 100644 --- a/tensorflow/python/kernel_tests/aggregate_ops_test.py +++ b/tensorflow/python/kernel_tests/aggregate_ops_test.py @@ -26,8 +26,8 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops -from tensorflow.python.ops import logging_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import string_ops from tensorflow.python.platform import test @@ -100,24 +100,28 @@ class AddNTest(test.TestCase): # TODO(ebrevdo): Re-enable use_gpu=True once non-DMA Variant # copying between CPU and GPU is supported. with self.session(use_gpu=False): - variant_const_3 = create_constant_variant(3) - variant_const_4 = create_constant_variant(4) - variant_const_5 = create_constant_variant(5) - # 3 + 3 + 5 + 4 = 15. - result = math_ops.add_n((variant_const_3, variant_const_3, - variant_const_5, variant_const_4)) + num_tests = 127 + values = list(range(100)) + variant_consts = [create_constant_variant(x) for x in values] + sum_count_indices = np.random.randint(1, 29, size=num_tests) + sum_indices = [ + np.random.randint(100, size=count) for count in sum_count_indices] + expected_sums = [np.sum(x) for x in sum_indices] + variant_sums = [math_ops.add_n([variant_consts[i] for i in x]) + for x in sum_indices] - # Smoke test -- ensure this executes without trouble. + # We use as_string() to get the Variant DebugString for the + # variant_sums; we know its value so we can check via string equality + # here. + # # Right now, non-numpy-compatible objects cannot be returned from a # session.run call; similarly, objects that can't be converted to # native numpy types cannot be passed to ops.convert_to_tensor. - # For now, run the test and examine the output to see that the result is - # equal to 15. - result_op = logging_ops.Print( - result, [variant_const_3, variant_const_4, variant_const_5, result], - message=("Variants stored an int: c(3), c(4), c(5), " - "add_n(c(3), c(3), c(5), c(4)): ")).op - result_op.run() + variant_sums_string = string_ops.as_string(variant_sums) + self.assertAllEqual( + variant_sums_string, + ["Variant".format(s).encode("utf-8") + for s in expected_sums]) if __name__ == "__main__":