Skip to content

Commit 4c81add

Browse files
Merge remote-tracking branch 'upstream/main' into apachegh-43683-pandas-string-dtype
2 parents 42ecbe8 + 6252e9c commit 4c81add

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+836
-152
lines changed

.github/workflows/dev_pr/helpers.js

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ const https = require('https');
2020
/**
2121
* Given the title of a PullRequest return the Issue
2222
*
23-
* @param {String} title
23+
* @param {String} title
2424
* @returns {Issue} or null if no issue detected.
2525
*
2626
* @typedef {Object} Issue
@@ -62,6 +62,5 @@ function detectIssue(title) {
6262

6363
module.exports = {
6464
detectIssue,
65-
getJiraInfo,
6665
getGitHubInfo
6766
};

.github/workflows/dev_pr/issue_check.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ module.exports = async ({github, context}) => {
7878
const pullRequestNumber = context.payload.number;
7979
const title = context.payload.pull_request.title;
8080
const issue = helpers.detectIssue(title)
81-
if (issue){
81+
if (issue && issue.kind === "github") {
8282
await verifyGitHubIssue(github, context, pullRequestNumber, issue.id);
8383
}
8484
};

.github/workflows/dev_pr/link.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ module.exports = async ({github, context}) => {
7777
const pullRequestNumber = context.payload.number;
7878
const title = context.payload.pull_request.title;
7979
const issue = helpers.detectIssue(title);
80-
if (issue){
80+
if (issue && issue.kind === "github") {
8181
await commentGitHubURL(github, context, pullRequestNumber, issue.id);
8282
}
8383
};

ci/docker/debian-12-cpp.dockerfile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,11 @@ RUN /arrow/ci/scripts/install_azurite.sh
108108
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
109109
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
110110

111+
# Prioritize system packages and local installation.
112+
#
113+
# The following dependencies will be downloaded due to missing/invalid packages
114+
# provided by the distribution:
115+
# - opentelemetry-cpp-dev is not packaged
111116
ENV ARROW_ACERO=ON \
112117
ARROW_AZURE=ON \
113118
ARROW_BUILD_TESTS=ON \
@@ -134,6 +139,7 @@ ENV ARROW_ACERO=ON \
134139
AWSSDK_SOURCE=BUNDLED \
135140
Azure_SOURCE=BUNDLED \
136141
google_cloud_cpp_storage_SOURCE=BUNDLED \
142+
opentelemetry_cpp_SOURCE=BUNDLED \
137143
ORC_SOURCE=BUNDLED \
138144
PATH=/usr/lib/ccache/:$PATH \
139145
PYTHON=python3 \

ci/docker/fedora-39-cpp.dockerfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ ENV ARROW_ACERO=ON \
103103
CC=gcc \
104104
CXX=g++ \
105105
google_cloud_cpp_storage_SOURCE=BUNDLED \
106+
opentelemetry_cpp_SOURCE=BUNDLED \
106107
PARQUET_BUILD_EXAMPLES=ON \
107108
PARQUET_BUILD_EXECUTABLES=ON \
108109
PATH=/usr/lib/ccache/:$PATH \

ci/docker/ubuntu-20.04-cpp.dockerfile

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,16 +135,19 @@ RUN /arrow/ci/scripts/install_ceph.sh
135135
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
136136
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
137137

138-
# Prioritize system packages and local installation
138+
# Prioritize system packages and local installation.
139+
#
139140
# The following dependencies will be downloaded due to missing/invalid packages
140141
# provided by the distribution:
141142
# - Abseil is not packaged
142143
# - libc-ares-dev does not install CMake config files
143144
# - flatbuffer is not packaged
144145
# - libgtest-dev only provide sources
145146
# - libprotobuf-dev only provide sources
147+
# - opentelemetry-cpp-dev is not packaged
148+
#
146149
# ARROW-17051: this build uses static Protobuf, so we must also use
147-
# static Arrow to run Flight/Flight SQL tests
150+
# static Arrow to run Flight/Flight SQL tests.
148151
ENV absl_SOURCE=BUNDLED \
149152
ARROW_ACERO=ON \
150153
ARROW_AZURE=OFF \
@@ -179,6 +182,7 @@ ENV absl_SOURCE=BUNDLED \
179182
google_cloud_cpp_storage_SOURCE=BUNDLED \
180183
gRPC_SOURCE=BUNDLED \
181184
GTest_SOURCE=BUNDLED \
185+
opentelemetry_cpp_SOURCE=BUNDLED \
182186
ORC_SOURCE=BUNDLED \
183187
PARQUET_BUILD_EXAMPLES=ON \
184188
PARQUET_BUILD_EXECUTABLES=ON \

ci/docker/ubuntu-22.04-cpp.dockerfile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,13 @@ RUN /arrow/ci/scripts/install_azurite.sh
184184
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
185185
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
186186

187-
# Prioritize system packages and local installation
187+
# Prioritize system packages and local installation.
188+
#
188189
# The following dependencies will be downloaded due to missing/invalid packages
189190
# provided by the distribution:
190191
# - Abseil is old
191192
# - libc-ares-dev does not install CMake config files
193+
# - opentelemetry-cpp-dev is not packaged
192194
ENV absl_SOURCE=BUNDLED \
193195
ARROW_ACERO=ON \
194196
ARROW_AZURE=ON \
@@ -222,6 +224,7 @@ ENV absl_SOURCE=BUNDLED \
222224
AWSSDK_SOURCE=BUNDLED \
223225
Azure_SOURCE=BUNDLED \
224226
google_cloud_cpp_storage_SOURCE=BUNDLED \
227+
opentelemetry_cpp_SOURCE=BUNDLED \
225228
ORC_SOURCE=BUNDLED \
226229
PARQUET_BUILD_EXAMPLES=ON \
227230
PARQUET_BUILD_EXECUTABLES=ON \

ci/docker/ubuntu-24.04-cpp.dockerfile

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,11 @@ RUN /arrow/ci/scripts/install_azurite.sh
172172
COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
173173
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin
174174

175-
# Prioritize system packages and local installation
175+
# Prioritize system packages and local installation.
176+
#
177+
# The following dependencies will be downloaded due to missing/invalid packages
178+
# provided by the distribution:
179+
# - opentelemetry-cpp-dev is not packaged
176180
ENV ARROW_ACERO=ON \
177181
ARROW_AZURE=ON \
178182
ARROW_BUILD_STATIC=ON \
@@ -205,6 +209,7 @@ ENV ARROW_ACERO=ON \
205209
AWSSDK_SOURCE=BUNDLED \
206210
Azure_SOURCE=BUNDLED \
207211
google_cloud_cpp_storage_SOURCE=BUNDLED \
212+
opentelemetry_cpp_SOURCE=BUNDLED \
208213
ORC_SOURCE=BUNDLED \
209214
PARQUET_BUILD_EXAMPLES=ON \
210215
PARQUET_BUILD_EXECUTABLES=ON \

ci/scripts/cpp_build.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ else
225225
-DgRPC_SOURCE=${gRPC_SOURCE:-} \
226226
-DGTest_SOURCE=${GTest_SOURCE:-} \
227227
-Dlz4_SOURCE=${lz4_SOURCE:-} \
228+
-Dopentelemetry-cpp_SOURCE=${opentelemetry_cpp_SOURCE:-} \
228229
-DORC_SOURCE=${ORC_SOURCE:-} \
229230
-DPARQUET_BUILD_EXAMPLES=${PARQUET_BUILD_EXAMPLES:-OFF} \
230231
-DPARQUET_BUILD_EXECUTABLES=${PARQUET_BUILD_EXECUTABLES:-OFF} \

ci/scripts/integration_spark.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,6 @@ if [ "${SPARK_VERSION:1:2}" == "2." ]; then
3232
export ARROW_PRE_0_15_IPC_FORMAT=1
3333
fi
3434

35-
# Get Arrow Java version
36-
pushd ${source_dir}/java
37-
arrow_version=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'`
38-
popd
39-
4035
export MAVEN_OPTS="-Xss256m -Xmx2g -XX:ReservedCodeCacheSize=1g -Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
4136
export MAVEN_OPTS="${MAVEN_OPTS} -Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
4237

@@ -47,8 +42,13 @@ pushd ${spark_dir}
4742
build/mvn -B -DskipTests package
4843

4944
# Run pyarrow related Python tests only
45+
# "pyspark.sql.tests.arrow.test_arrow_grouped_map" and
46+
# "pyspark.sql.tests.arrow.test_arrow_cogrouped_map" currently fail.
47+
# See: https://github.com/apache/arrow/issues/44986
5048
spark_python_tests=(
51-
"pyspark.sql.tests.test_arrow")
49+
"pyspark.sql.tests.arrow.test_arrow"
50+
"pyspark.sql.tests.arrow.test_arrow_map"
51+
"pyspark.sql.tests.arrow.test_arrow_python_udf")
5252

5353
case "${SPARK_VERSION}" in
5454
v1.*|v2.*|v3.0.*|v3.1.*|v3.2.*|v3.3.*)

cpp/CMakePresets.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
"name": "features-minimal",
7979
"hidden": true,
8080
"cacheVariables": {
81+
"ARROW_MIMALLOC": "OFF",
8182
"ARROW_WITH_RE2": "OFF",
8283
"ARROW_WITH_UTF8PROC": "OFF"
8384
}
@@ -91,7 +92,8 @@
9192
"ARROW_CSV": "ON",
9293
"ARROW_DATASET": "ON",
9394
"ARROW_FILESYSTEM": "ON",
94-
"ARROW_JSON": "ON"
95+
"ARROW_JSON": "ON",
96+
"ARROW_MIMALLOC": "ON"
9597
}
9698
},
9799
{

cpp/cmake_modules/DefineOptions.cmake

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -362,29 +362,11 @@ takes precedence over ccache if a storage backend is configured" ON)
362362

363363
define_option(ARROW_IPC "Build the Arrow IPC extensions" ON)
364364

365-
set(ARROW_JEMALLOC_DESCRIPTION "Build the Arrow jemalloc-based allocator")
366-
if(WIN32
367-
OR CMAKE_SYSTEM_NAME STREQUAL "FreeBSD"
368-
OR CMAKE_SYSTEM_PROCESSOR MATCHES "aarch|ARM|arm"
369-
OR NOT ARROW_ENABLE_THREADING)
370-
# jemalloc is not supported on Windows.
371-
#
372-
# jemalloc is the default malloc implementation on FreeBSD and can't
373-
# be built with --disable-libdl on FreeBSD. Because lazy-lock feature
374-
# is required on FreeBSD. Lazy-lock feature requires libdl.
375-
#
376-
# jemalloc may have a problem on ARM.
377-
# See also: https://github.com/apache/arrow/issues/44342
378-
#
379-
# jemalloc requires thread.
380-
define_option(ARROW_JEMALLOC ${ARROW_JEMALLOC_DESCRIPTION} OFF)
381-
else()
382-
define_option(ARROW_JEMALLOC ${ARROW_JEMALLOC_DESCRIPTION} ON)
383-
endif()
365+
define_option(ARROW_JEMALLOC "Build the Arrow jemalloc-based allocator" OFF)
384366

385367
define_option(ARROW_JSON "Build Arrow with JSON support (requires RapidJSON)" OFF)
386368

387-
define_option(ARROW_MIMALLOC "Build the Arrow mimalloc-based allocator" OFF)
369+
define_option(ARROW_MIMALLOC "Build the Arrow mimalloc-based allocator" ON)
388370

389371
define_option(ARROW_PARQUET
390372
"Build the Parquet libraries"

cpp/cmake_modules/ThirdpartyToolchain.cmake

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -738,9 +738,8 @@ if(DEFINED ENV{ARROW_ORC_URL})
738738
set(ORC_SOURCE_URL "$ENV{ARROW_ORC_URL}")
739739
else()
740740
set_urls(ORC_SOURCE_URL
741-
"https://www.apache.org/dyn/closer.cgi?action=download&filename=/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
742-
"https://downloads.apache.org/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
743-
"https://github.com/apache/orc/archive/rel/release-${ARROW_ORC_BUILD_VERSION}.tar.gz"
741+
"https://www.apache.org/dyn/closer.lua/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz?action=download"
742+
"https://dlcdn.apache.org/orc/orc-${ARROW_ORC_BUILD_VERSION}/orc-${ARROW_ORC_BUILD_VERSION}.tar.gz"
744743
)
745744
endif()
746745

@@ -817,6 +816,7 @@ if(DEFINED ENV{ARROW_THRIFT_URL})
817816
set(THRIFT_SOURCE_URL "$ENV{ARROW_THRIFT_URL}")
818817
else()
819818
set(THRIFT_SOURCE_URL
819+
"https://www.apache.org/dyn/closer.lua/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz?action=download"
820820
"https://dlcdn.apache.org/thrift/${ARROW_THRIFT_BUILD_VERSION}/thrift-${ARROW_THRIFT_BUILD_VERSION}.tar.gz"
821821
)
822822
endif()
@@ -2061,10 +2061,14 @@ macro(build_substrait)
20612061

20622062
# Missing dll-interface:
20632063
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "/wd4251")
2064-
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
2065-
"Clang")
2066-
# Protobuf generated files trigger some errors on CLANG TSAN builds
2067-
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-error=shorten-64-to-32")
2064+
else()
2065+
# GH-44954: silence [[deprecated]] declarations in protobuf-generated code
2066+
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-deprecated")
2067+
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID STREQUAL
2068+
"Clang")
2069+
# Protobuf generated files trigger some errors on CLANG TSAN builds
2070+
list(APPEND SUBSTRAIT_SUPPRESSED_FLAGS "-Wno-error=shorten-64-to-32")
2071+
endif()
20682072
endif()
20692073

20702074
set(SUBSTRAIT_SOURCES)
@@ -2116,6 +2120,7 @@ macro(build_substrait)
21162120

21172121
add_library(substrait STATIC ${SUBSTRAIT_SOURCES})
21182122
set_target_properties(substrait PROPERTIES POSITION_INDEPENDENT_CODE ON)
2123+
target_compile_options(substrait PRIVATE "${SUBSTRAIT_SUPPRESSED_FLAGS}")
21192124
target_include_directories(substrait PUBLIC ${SUBSTRAIT_INCLUDES})
21202125
target_link_libraries(substrait PUBLIC ${ARROW_PROTOBUF_LIBPROTOBUF})
21212126
add_dependencies(substrait substrait_gen)
@@ -4966,7 +4971,6 @@ if(ARROW_WITH_OPENTELEMETRY)
49664971
# cURL is required whether we build from source or use an existing installation
49674972
# (OTel's cmake files do not call find_curl for you)
49684973
find_curl()
4969-
set(opentelemetry-cpp_SOURCE "AUTO")
49704974
resolve_dependency(opentelemetry-cpp)
49714975
set(ARROW_OPENTELEMETRY_LIBS
49724976
opentelemetry-cpp::trace

cpp/src/arrow/CMakeLists.txt

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,7 @@ if(WIN32)
166166
list(APPEND ARROW_SYSTEM_LINK_LIBS "ws2_32")
167167
endif()
168168

169-
if(NOT WIN32 AND NOT APPLE)
170-
# Pass -lrt on Linux only
169+
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
171170
list(APPEND ARROW_SYSTEM_LINK_LIBS rt)
172171
endif()
173172

@@ -674,6 +673,7 @@ set(ARROW_TESTING_SRCS
674673
testing/fixed_width_test_util.cc
675674
testing/generator.cc
676675
testing/gtest_util.cc
676+
testing/math.cc
677677
testing/process.cc
678678
testing/random.cc
679679
testing/util.cc)
@@ -726,9 +726,6 @@ set(ARROW_COMPUTE_SRCS
726726
compute/function.cc
727727
compute/function_internal.cc
728728
compute/kernel.cc
729-
compute/key_hash_internal.cc
730-
compute/key_map_internal.cc
731-
compute/light_array_internal.cc
732729
compute/ordering.cc
733730
compute/registry.cc
734731
compute/kernels/chunked_internal.cc
@@ -747,20 +744,7 @@ set(ARROW_COMPUTE_SRCS
747744
compute/kernels/vector_selection.cc
748745
compute/kernels/vector_selection_filter_internal.cc
749746
compute/kernels/vector_selection_internal.cc
750-
compute/kernels/vector_selection_take_internal.cc
751-
compute/row/encode_internal.cc
752-
compute/row/compare_internal.cc
753-
compute/row/grouper.cc
754-
compute/row/row_encoder_internal.cc
755-
compute/row/row_internal.cc
756-
compute/util.cc
757-
compute/util_internal.cc)
758-
759-
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
760-
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc)
761-
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc)
762-
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
763-
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
747+
compute/kernels/vector_selection_take_internal.cc)
764748

765749
if(ARROW_COMPUTE)
766750
# Include the remaining kernels
@@ -793,10 +777,25 @@ if(ARROW_COMPUTE)
793777
compute/kernels/vector_replace.cc
794778
compute/kernels/vector_run_end_encode.cc
795779
compute/kernels/vector_select_k.cc
796-
compute/kernels/vector_sort.cc)
780+
compute/kernels/vector_sort.cc
781+
compute/key_hash_internal.cc
782+
compute/key_map_internal.cc
783+
compute/light_array_internal.cc
784+
compute/row/encode_internal.cc
785+
compute/row/compare_internal.cc
786+
compute/row/grouper.cc
787+
compute/row/row_encoder_internal.cc
788+
compute/row/row_internal.cc
789+
compute/util.cc
790+
compute/util_internal.cc)
797791

798792
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx2.cc)
799793
append_runtime_avx512_src(ARROW_COMPUTE_SRCS compute/kernels/aggregate_basic_avx512.cc)
794+
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc)
795+
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc)
796+
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/compare_internal_avx2.cc)
797+
append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/row/encode_internal_avx2.cc)
798+
append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/util_avx2.cc)
800799
endif()
801800

802801
arrow_add_object_library(ARROW_COMPUTE ${ARROW_COMPUTE_SRCS})

cpp/src/arrow/compute/CMakeLists.txt

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,18 @@ add_arrow_test(internals_test
8787
function_test.cc
8888
exec_test.cc
8989
kernel_test.cc
90-
light_array_test.cc
91-
registry_test.cc
92-
key_hash_test.cc
93-
row/compare_test.cc
94-
row/grouper_test.cc
95-
row/row_encoder_internal_test.cc
96-
row/row_test.cc
97-
util_internal_test.cc)
90+
registry_test.cc)
9891

9992
add_arrow_compute_test(expression_test SOURCES expression_test.cc)
93+
add_arrow_compute_test(row_test
94+
SOURCES
95+
key_hash_test.cc
96+
light_array_test.cc
97+
row/compare_test.cc
98+
row/grouper_test.cc
99+
row/row_encoder_internal_test.cc
100+
row/row_test.cc
101+
util_internal_test.cc)
100102

101103
add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")
102104

0 commit comments

Comments
 (0)