[SPARK-52264][PS][TESTS] Test divide-by-zero behavior with more numeric data types

xinrong-meng · xinrong-meng · commit 940e9058fe2f · 2025-05-27T13:30:24.000-07:00
### What changes were proposed in this pull request? Test divide-by-zero behavior with more numeric data types ### Why are the changes needed? To ensure that divide-by-zero operations in Pandas on Spark behave consistently with native Pandas, also as a safeguard when ANSI mode is enabled. Part of https://issues.apache.org/jira/browse/SPARK-52169. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Test change only ### Was this patch authored or co-authored using generative AI tooling? No Closes #50988 from xinrong-meng/test_divide_0_more. Authored-by: Xinrong Meng <xinrong@apache.org> Signed-off-by: Xinrong Meng <xinrong@apache.org>
diff --git a/python/pyspark/pandas/tests/computation/test_binary_ops.py b/python/pyspark/pandas/tests/computation/test_binary_ops.py
@@ -113,19 +113,69 @@ def test_binary_operator_sub(self):
 
     @unittest.skipIf(is_ansi_mode_test, ansi_mode_not_supported_message)
     def test_divide_by_zero_behavior(self):
+        # float / float
+        for dtype in [np.float32, np.float64]:
+            pdf = pd.DataFrame(
+                {
+                    "a": [1.0, -1.0, 0.0, np.nan],
+                    "b": [0.0, 0.0, 0.0, 0.0],
+                },
+                dtype=dtype,
+            )
+            psdf = ps.from_pandas(pdf)
+
+            self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
+
+        # int / int
+        for dtype in [np.int32, np.int64]:
+            pdf = pd.DataFrame(
+                {
+                    "a": [1, -1, 0],
+                    "b": [0, 0, 0],
+                },
+                dtype=dtype,
+            )
+            psdf = ps.from_pandas(pdf)
+            self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
+
+        # float / int
         pdf = pd.DataFrame(
             {
-                "a": [1.0, -1.0, 0.0, np.nan],
-                "b": [0.0, 0.0, 0.0, 0.0],
+                "a": pd.Series([1.0, -1.0, 0.0, np.nan]),
+                "b": pd.Series([0, 0, 0, 0]),
             }
         )
         psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
+
+        # int / float
+        pdf = pd.DataFrame(
+            {
+                "a": pd.Series([1, -1, 0]),
+                "b": pd.Series([0.0, 0.0, 0.0]),
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
 
-        # a / b: .. divide by zero
+        # bool
+        pdf = pd.DataFrame(
+            {
+                "a": pd.Series([True, False]),
+                "b": pd.Series([0, 0]),
+            }
+        )
+        psdf = ps.from_pandas(pdf)
         self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
 
-        # b / a: 0 divided by ..
-        self.assert_eq(psdf["b"] / psdf["a"], pdf["b"] / pdf["a"])
+        pdf = pd.DataFrame(
+            {
+                "a": pd.Series([True, False]),
+                "b": pd.Series([0.0, 0.0]),
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
 
     def test_binary_operator_truediv(self):
         # Positive