Skip to content

Commit b032702

Browse files
Support JSON lines with empty struct (#7162)
* Test cast_array_to_features with empty struct * Support cast_array_to_feature with empty struct * Rename test to align with function name
1 parent 2eb4edb commit b032702

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

src/datasets/table.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2001,8 +2001,6 @@ def cast_array_to_feature(
20012001
feature = sequence_kwargs.pop("feature")
20022002
feature = {name: Sequence(subfeature, **sequence_kwargs) for name, subfeature in feature.items()}
20032003
if isinstance(feature, dict) and (array_fields := {field.name for field in array.type}) <= set(feature):
2004-
if array.type.num_fields == 0:
2005-
return array
20062004
null_array = pa.array([None] * len(array))
20072005
arrays = [
20082006
_c(array.field(name) if name in array_fields else null_array, subfeature)

tests/test_table.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,12 +1142,19 @@ def test_cast_decimal_array_to_features():
11421142
cast_array_to_feature(arr, Sequence(Value("string")), allow_decimal_to_str=False)
11431143

11441144

1145-
def test_cast_array_to_features_with_struct_with_missing_fields():
1146-
arr = pa.array([{"age": 25}, {"age": 63}])
1145+
@pytest.mark.parametrize(
1146+
"array_list, expected_list",
1147+
[
1148+
([{"age": 25}, {"age": 63}], [{"age": 25, "name": None}, {"age": 63, "name": None}]),
1149+
([{}, {}], [{"age": None, "name": None}, {"age": None, "name": None}]), # completely empty struct
1150+
],
1151+
)
1152+
def test_cast_array_to_feature_with_struct_with_missing_fields(array_list, expected_list):
1153+
arr = pa.array(array_list)
11471154
feature = {"age": Value("int32"), "name": Value("string")}
11481155
cast_array = cast_array_to_feature(arr, feature)
11491156
assert cast_array.type == pa.struct({"age": pa.int32(), "name": pa.string()})
1150-
assert cast_array.to_pylist() == [{"age": 25, "name": None}, {"age": 63, "name": None}]
1157+
assert cast_array.to_pylist() == expected_list
11511158

11521159

11531160
def test_cast_array_to_features_nested():

0 commit comments

Comments
 (0)