apache
diff --git a/‎src/common/utils.h
Lines changed: 3 additions & 3 deletions b/‎src/common/utils.h
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/operator/mshadow_op.h
Lines changed: 94 additions & 0 deletions b/‎src/operator/mshadow_op.h
Lines changed: 94 additions & 0 deletions
diff --git a/‎src/operator/mxnet_op.h
Lines changed: 10 additions & 4 deletions b/‎src/operator/mxnet_op.h
Lines changed: 10 additions & 4 deletions
diff --git a/‎src/operator/numpy/np_elemwise_broadcast_op.cc
Lines changed: 92 additions & 8 deletions b/‎src/operator/numpy/np_elemwise_broadcast_op.cc
Lines changed: 92 additions & 8 deletions
diff --git a/‎src/operator/numpy/np_elemwise_broadcast_op.cu
Lines changed: 35 additions & 5 deletions b/‎src/operator/numpy/np_elemwise_broadcast_op.cu
Lines changed: 35 additions & 5 deletions
@@ -842,7 +842,7 @@ inline bool is_float(const int dtype) {
   return dtype == mshadow::kFloat32 || dtype == mshadow::kFloat64 || dtype == mshadow::kFloat16;
 }
 
-inline int more_precise_type(const int type1, const int type2) {
+inline int get_more_precise_type(const int type1, const int type2) {
   if (type1 == type2) return type1;
   if (is_float(type1) && is_float(type2)) {
     if (type1 == mshadow::kFloat64 || type2 == mshadow::kFloat64) {
@@ -870,12 +870,12 @@ inline int more_precise_type(const int type1, const int type2) {
   return mshadow::kInt8;
 }
 
-inline int np_binary_out_type(const int type1, const int type2) {
+inline int np_binary_out_infer_type(const int type1, const int type2) {
   if ((type1 == mshadow::kUint8 && type2 == mshadow::kInt8) ||
       (type1 == mshadow::kInt8 && type2 == mshadow::kUint8)) {
     return mshadow::kInt32;
   }
-  return more_precise_type(type1, type2);
+  return get_more_precise_type(type1, type2);
 }
 
 }  // namespace common
 
@@ -194,6 +194,100 @@ MXNET_BINARY_MATH_OP_NC(right, b);
 
 MXNET_BINARY_MATH_OP_NC(mul, a * b);
 
+#ifndef _WIN32
+struct mixed_plus {
+  template<typename DType,
+           typename std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) {
+    return static_cast<mshadow::half::half_t>(a) + b;
+  }
+
+  template<typename DType,
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static float Map(DType a, float b) {
+    return static_cast<float>(a) + b;
+  }
+
+  template<typename DType,
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_same<DType, float>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static double Map(DType a, double b) {
+    return static_cast<double>(a) + b;
+  }
+};
+
+struct mixed_minus {
+  template<typename DType,
+           typename std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) {
+    return static_cast<mshadow::half::half_t>(a) - b;
+  }
+
+  template<typename DType,
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static float Map(DType a, float b) {
+    return static_cast<float>(a) - b;
+  }
+
+  template<typename DType,
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_same<DType, float>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static double Map(DType a, double b) {
+    return static_cast<double>(a) - b;
+  }
+};
+
+struct mixed_rminus {
+  template<typename DType,
+           typename std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) {
+    return b - static_cast<mshadow::half::half_t>(a);
+  }
+
+  template<typename DType,
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static float Map(DType a, float b) {
+    return b - static_cast<float>(a);
+  }
+
+  template<typename DType,
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_same<DType, float>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static double Map(DType a, double b) {
+    return b - static_cast<double>(a);
+  }
+};
+
+struct mixed_mul {
+  template<typename DType,
+           typename std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static mshadow::half::half_t Map(DType a, mshadow::half::half_t b) {
+    return static_cast<mshadow::half::half_t>(a) * b;
+  }
+
+  template<typename DType,
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static float Map(DType a, float b) {
+    return static_cast<float>(a) * b;
+  }
+
+  template<typename DType,
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_same<DType, float>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
+  MSHADOW_XINLINE static double Map(DType a, double b) {
+    return static_cast<double>(a) * b;
+  }
+};
+#endif
+
 MXNET_BINARY_MATH_OP_NC(div, a / b);
 
 MXNET_BINARY_MATH_OP_NC(plus, a + b);
 
@@ -859,14 +859,17 @@ struct op_with_req {
 
   /*! \brief inputs are two tensors with a float output tensor */
   template<typename DType,
-           typename std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
   MSHADOW_XINLINE static void Map(index_t i, float *out, const DType *lhs, const float *rhs) {
     KERNEL_ASSIGN(out[i], req, OP::Map(lhs[i], rhs[i]));
   }
 
   /*! \brief inputs are two tensors with a double output tensor */
   template<typename DType,
-           typename std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_same<DType, float>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
   MSHADOW_XINLINE static void Map(index_t i, double *out, const DType *lhs, const double *rhs) {
     KERNEL_ASSIGN(out[i], req, OP::Map(lhs[i], rhs[i]));
   }
@@ -883,14 +886,17 @@ struct op_with_req {
 
   /*! \brief inputs are two tensors with a float output tensor */
   template<typename DType,
-           typename std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
   MSHADOW_XINLINE static void Map(index_t i, float *out, const DType *lhs, const float value) {
     KERNEL_ASSIGN(out[i], req, OP::Map(lhs[i], value));
   }
 
   /*! \brief inputs are two tensors with a double output tensor */
   template<typename DType,
-           typename std::enable_if<std::is_integral<DType>::value, int>::type = 0>
+           typename std::enable_if<std::is_same<DType, mshadow::half::half_t>::value ||
+                                   std::is_same<DType, float>::value ||
+                                   std::is_integral<DType>::value, int>::type = 0>
   MSHADOW_XINLINE static void Map(index_t i, double *out, const DType *lhs, const double value) {
     KERNEL_ASSIGN(out[i], req, OP::Map(lhs[i], value));
   }
 
@@ -23,8 +23,7 @@
  * \brief CPU Implementation of basic functions for elementwise numpy binary broadcast operator.
  */
 
-#include "../tensor/elemwise_binary_broadcast_op.h"
-#include "../tensor/elemwise_binary_scalar_op.h"
+#include "./np_elemwise_broadcast_op.h"
 
 namespace mxnet {
 namespace op {
@@ -55,17 +54,102 @@ bool NumpyBinaryScalarType(const nnvm::NodeAttrs& attrs,
   .add_argument("data", "NDArray-or-Symbol", "source input")        \
   .add_argument("scalar", "float", "scalar input")
 
+bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs,
+                                   std::vector<int>* in_attrs,
+                                   std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 2U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  const int ltype = in_attrs->at(0);
+  const int rtype = in_attrs->at(1);
+  if (ltype != -1 && rtype != -1 && (ltype != rtype)) {
+    // Only when both input types are known and not the same, we enter the mixed-precision mode
+    TYPE_ASSIGN_CHECK(*out_attrs, 0, common::np_binary_out_infer_type(ltype, rtype));
+  } else {
+    return ElemwiseType<2, 1>(attrs, in_attrs, out_attrs);
+  }
+  return true;
+}
 
-MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_add)
-.set_attr<FCompute>("FCompute<cpu>", BinaryBroadcastCompute<cpu, op::mshadow_op::plus>)
+#ifdef _WIN32
+#define MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(name)                \
+  NNVM_REGISTER_OP(name)                                                       \
+  .set_num_inputs(2)                                                           \
+  .set_num_outputs(1)                                                          \
+  .set_attr<nnvm::FListInputNames>("FListInputNames",                          \
+    [](const NodeAttrs& attrs) {                                               \
+      return std::vector<std::string>{"lhs", "rhs"};                           \
+    })                                                                         \
+  .set_attr<mxnet::FInferShape>("FInferShape", BinaryBroadcastShape)           \
+  .set_attr<nnvm::FInferType>("FInferType", NumpyBinaryMixedPrecisionType)     \
+  .set_attr<nnvm::FInplaceOption>("FInplaceOption",                            \
+    [](const NodeAttrs& attrs){                                                \
+      return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}};                \
+    })                                                                         \
+  .set_attr<FResourceRequest>("FResourceRequest",                              \
+  [](const NodeAttrs& attrs) {                                                 \
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};          \
+  })                                                                           \
+  .add_argument("lhs", "NDArray-or-Symbol", "First input to the function")     \
+  .add_argument("rhs", "NDArray-or-Symbol", "Second input to the function")
+#else
+#define MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(name)                \
+  NNVM_REGISTER_OP(name)                                                       \
+  .set_num_inputs(2)                                                           \
+  .set_num_outputs(1)                                                          \
+  .set_attr<nnvm::FListInputNames>("FListInputNames",                          \
+    [](const NodeAttrs& attrs) {                                               \
+      return std::vector<std::string>{"lhs", "rhs"};                           \
+    })                                                                         \
+  .set_attr<mxnet::FInferShape>("FInferShape", BinaryBroadcastShape)           \
+  .set_attr<nnvm::FInferType>("FInferType", NumpyBinaryMixedPrecisionType)     \
+  .set_attr<nnvm::FInplaceOption>("FInplaceOption",                            \
+    [](const NodeAttrs& attrs){                                                \
+      return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}};                \
+    })                                                                         \
+  .add_argument("lhs", "NDArray-or-Symbol", "First input to the function")     \
+  .add_argument("rhs", "NDArray-or-Symbol", "Second input to the function")
+#endif
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_add)
+#ifndef _WIN32
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  MixedBinaryBroadcastCompute<cpu, op::mshadow_op::plus, op::mshadow_op::mixed_plus,
+                              op::mshadow_op::mixed_plus>)
+#else
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  MixedBinaryBroadcastCompute<cpu, op::mshadow_op::plus, op::mshadow_op::plus,
+                              op::mshadow_op::plus>)
+#endif
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_broadcast_add"});
 
-MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_subtract)
-.set_attr<FCompute>("FCompute<cpu>", BinaryBroadcastCompute<cpu, op::mshadow_op::minus>)
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_subtract)
+#ifndef _WIN32
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  MixedBinaryBroadcastCompute<cpu, op::mshadow_op::minus, op::mshadow_op::mixed_minus,
+                              op::mshadow_op::mixed_rminus>)
+#else
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  MixedBinaryBroadcastCompute<cpu, op::mshadow_op::minus, op::mshadow_op::minus,
+                              op::mshadow_op::minus>)
+#endif
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_broadcast_sub"});
 
-MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_multiply)
-.set_attr<FCompute>("FCompute<cpu>", BinaryBroadcastCompute<cpu, op::mshadow_op::mul>)
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply)
+#ifndef _WIN32
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  MixedBinaryBroadcastCompute<cpu, op::mshadow_op::mul, op::mshadow_op::mixed_mul,
+                              op::mshadow_op::mixed_mul>)
+#else
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  MixedBinaryBroadcastCompute<cpu, op::mshadow_op::mul, op::mshadow_op::mul,
+                              op::mshadow_op::mul>)
+#endif
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_broadcast_mul"});
 
 MXNET_OPERATOR_REGISTER_BINARY_BROADCAST(_npi_mod)
 
@@ -22,20 +22,50 @@
  * \file np_elemwise_broadcast_op.cu
  * \brief GPU Implementation of basic functions for elementwise binary broadcast operator.
  */
-#include "../tensor/elemwise_binary_broadcast_op.h"
-#include "../tensor/elemwise_binary_scalar_op.h"
+
+#include "./np_elemwise_broadcast_op.h"
 
 namespace mxnet {
 namespace op {
 
 NNVM_REGISTER_OP(_npi_add)
-.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastCompute<gpu, op::mshadow_op::plus>);
+#ifndef _WIN32
+.set_attr<FCompute>(
+  "FCompute<gpu>",
+  MixedBinaryBroadcastCompute<gpu, op::mshadow_op::plus, op::mshadow_op::mixed_plus,
+                              op::mshadow_op::mixed_plus>);
+#else
+.set_attr<FCompute>(
+  "FCompute<gpu>",
+  MixedBinaryBroadcastCompute<gpu, op::mshadow_op::plus, op::mshadow_op::plus,
+                              op::mshadow_op::plus>);
+#endif
 
 NNVM_REGISTER_OP(_npi_subtract)
-.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastCompute<gpu, op::mshadow_op::minus>);
+#ifndef _WIN32
+.set_attr<FCompute>(
+  "FCompute<gpu>",
+  MixedBinaryBroadcastCompute<gpu, op::mshadow_op::minus, op::mshadow_op::mixed_minus,
+                              op::mshadow_op::mixed_rminus>);
+#else
+.set_attr<FCompute>(
+  "FCompute<gpu>",
+  MixedBinaryBroadcastCompute<gpu, op::mshadow_op::minus, op::mshadow_op::minus,
+                              op::mshadow_op::minus>);
+#endif
 
 NNVM_REGISTER_OP(_npi_multiply)
-.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastCompute<gpu, op::mshadow_op::mul>);
+#ifndef _WIN32
+.set_attr<FCompute>(
+  "FCompute<gpu>",
+  MixedBinaryBroadcastCompute<gpu, op::mshadow_op::mul, op::mshadow_op::mixed_mul,
+                              op::mshadow_op::mixed_mul>);
+#else
+.set_attr<FCompute>(
+  "FCompute<gpu>",
+  MixedBinaryBroadcastCompute<gpu, op::mshadow_op::mul, op::mshadow_op::mul,
+                              op::mshadow_op::mul>);
+#endif
 
 NNVM_REGISTER_OP(_npi_mod)
 .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastCompute<gpu, mshadow_op::mod>);
Original file line number	Diff line number	Diff line change
`@@ -842,7 +842,7 @@ inline bool is_float(const int dtype) {`
`842`	`842`	`return dtype == mshadow::kFloat32 \|\| dtype == mshadow::kFloat64 \|\| dtype == mshadow::kFloat16;`
`843`	`843`	`}`
`844`	`844`
`845`		`-inline int more_precise_type(const int type1, const int type2) {`
	`845`	`+inline int get_more_precise_type(const int type1, const int type2) {`
`846`	`846`	`if (type1 == type2) return type1;`
`847`	`847`	`if (is_float(type1) && is_float(type2)) {`
`848`	`848`	`if (type1 == mshadow::kFloat64 \|\| type2 == mshadow::kFloat64) {`
`@@ -870,12 +870,12 @@ inline int more_precise_type(const int type1, const int type2) {`
`870`	`870`	`return mshadow::kInt8;`
`871`	`871`	`}`
`872`	`872`
`873`		`-inline int np_binary_out_type(const int type1, const int type2) {`
	`873`	`+inline int np_binary_out_infer_type(const int type1, const int type2) {`
`874`	`874`	`if ((type1 == mshadow::kUint8 && type2 == mshadow::kInt8) \|\|`
`875`	`875`	`(type1 == mshadow::kInt8 && type2 == mshadow::kUint8)) {`
`876`	`876`	`return mshadow::kInt32;`
`877`	`877`	`}`
`878`		`- return more_precise_type(type1, type2);`
	`878`	`+ return get_more_precise_type(type1, type2);`
`879`	`879`	`}`
`880`	`880`
`881`	`881`	`} // namespace common`