Skip to content

Commit 940e905

Browse files
committed
[SPARK-52264][PS][TESTS] Test divide-by-zero behavior with more numeric data types
### What changes were proposed in this pull request? Test divide-by-zero behavior with more numeric data types ### Why are the changes needed? To ensure that divide-by-zero operations in Pandas on Spark behave consistently with native Pandas, also as a safeguard when ANSI mode is enabled. Part of https://issues.apache.org/jira/browse/SPARK-52169. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Test change only ### Was this patch authored or co-authored using generative AI tooling? No Closes #50988 from xinrong-meng/test_divide_0_more. Authored-by: Xinrong Meng <[email protected]> Signed-off-by: Xinrong Meng <[email protected]>
1 parent cc7bf98 commit 940e905

File tree

1 file changed

+55
-5
lines changed

1 file changed

+55
-5
lines changed

python/pyspark/pandas/tests/computation/test_binary_ops.py

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -113,19 +113,69 @@ def test_binary_operator_sub(self):
113113

114114
@unittest.skipIf(is_ansi_mode_test, ansi_mode_not_supported_message)
115115
def test_divide_by_zero_behavior(self):
116+
# float / float
117+
for dtype in [np.float32, np.float64]:
118+
pdf = pd.DataFrame(
119+
{
120+
"a": [1.0, -1.0, 0.0, np.nan],
121+
"b": [0.0, 0.0, 0.0, 0.0],
122+
},
123+
dtype=dtype,
124+
)
125+
psdf = ps.from_pandas(pdf)
126+
127+
self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
128+
129+
# int / int
130+
for dtype in [np.int32, np.int64]:
131+
pdf = pd.DataFrame(
132+
{
133+
"a": [1, -1, 0],
134+
"b": [0, 0, 0],
135+
},
136+
dtype=dtype,
137+
)
138+
psdf = ps.from_pandas(pdf)
139+
self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
140+
141+
# float / int
116142
pdf = pd.DataFrame(
117143
{
118-
"a": [1.0, -1.0, 0.0, np.nan],
119-
"b": [0.0, 0.0, 0.0, 0.0],
144+
"a": pd.Series([1.0, -1.0, 0.0, np.nan]),
145+
"b": pd.Series([0, 0, 0, 0]),
120146
}
121147
)
122148
psdf = ps.from_pandas(pdf)
149+
self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
150+
151+
# int / float
152+
pdf = pd.DataFrame(
153+
{
154+
"a": pd.Series([1, -1, 0]),
155+
"b": pd.Series([0.0, 0.0, 0.0]),
156+
}
157+
)
158+
psdf = ps.from_pandas(pdf)
159+
self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
123160

124-
# a / b: .. divide by zero
161+
# bool
162+
pdf = pd.DataFrame(
163+
{
164+
"a": pd.Series([True, False]),
165+
"b": pd.Series([0, 0]),
166+
}
167+
)
168+
psdf = ps.from_pandas(pdf)
125169
self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
126170

127-
# b / a: 0 divided by ..
128-
self.assert_eq(psdf["b"] / psdf["a"], pdf["b"] / pdf["a"])
171+
pdf = pd.DataFrame(
172+
{
173+
"a": pd.Series([True, False]),
174+
"b": pd.Series([0.0, 0.0]),
175+
}
176+
)
177+
psdf = ps.from_pandas(pdf)
178+
self.assert_eq(psdf["a"] / psdf["b"], pdf["a"] / pdf["b"])
129179

130180
def test_binary_operator_truediv(self):
131181
# Positive

0 commit comments

Comments
 (0)