Skip to content

Commit aa8e329

Browse files
committed
Merge branch 'for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu
Pull percpu updates from Dennis Zhou: "Percpu had a cleanup come in that makes use of the cpu bitmask helpers instead of the current iterative approach. This clean up then had an adverse interaction when clang's inlining sensitivity is changed such that not all sites are inlined resulting in modpost being upset with section mismatch due to percpu setup being marked __init. That was fixed by introducing __flatten to compiler_attributes.h" * 'for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu: percpu: fix clang modpost section mismatch percpu: reduce the number of cpu distance comparisons
2 parents 5cf0fd5 + 258e081 commit aa8e329

File tree

2 files changed

+27
-15
lines changed

2 files changed

+27
-15
lines changed

include/linux/compiler_attributes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,12 @@
210210
# define fallthrough do {} while (0) /* fallthrough */
211211
#endif
212212

213+
/*
214+
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes
215+
* clang: https://clang.llvm.org/docs/AttributeReference.html#flatten
216+
*/
217+
# define __flatten __attribute__((flatten))
218+
213219
/*
214220
* Note the missing underscores.
215221
*

mm/percpu.c

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7070

7171
#include <linux/bitmap.h>
72+
#include <linux/cpumask.h>
7273
#include <linux/memblock.h>
7374
#include <linux/err.h>
7475
#include <linux/lcm.h>
@@ -2662,13 +2663,14 @@ early_param("percpu_alloc", percpu_alloc_setup);
26622663
* On success, pointer to the new allocation_info is returned. On
26632664
* failure, ERR_PTR value is returned.
26642665
*/
2665-
static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
2666+
static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
26662667
size_t reserved_size, size_t dyn_size,
26672668
size_t atom_size,
26682669
pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
26692670
{
26702671
static int group_map[NR_CPUS] __initdata;
26712672
static int group_cnt[NR_CPUS] __initdata;
2673+
static struct cpumask mask __initdata;
26722674
const size_t static_size = __per_cpu_end - __per_cpu_start;
26732675
int nr_groups = 1, nr_units = 0;
26742676
size_t size_sum, min_unit_size, alloc_size;
@@ -2681,6 +2683,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
26812683
/* this function may be called multiple times */
26822684
memset(group_map, 0, sizeof(group_map));
26832685
memset(group_cnt, 0, sizeof(group_cnt));
2686+
cpumask_clear(&mask);
26842687

26852688
/* calculate size_sum and ensure dyn_size is enough for early alloc */
26862689
size_sum = PFN_ALIGN(static_size + reserved_size +
@@ -2702,24 +2705,27 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
27022705
upa--;
27032706
max_upa = upa;
27042707

2708+
cpumask_copy(&mask, cpu_possible_mask);
2709+
27052710
/* group cpus according to their proximity */
2706-
for_each_possible_cpu(cpu) {
2707-
group = 0;
2708-
next_group:
2709-
for_each_possible_cpu(tcpu) {
2710-
if (cpu == tcpu)
2711-
break;
2712-
if (group_map[tcpu] == group && cpu_distance_fn &&
2713-
(cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
2714-
cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
2715-
group++;
2716-
nr_groups = max(nr_groups, group + 1);
2717-
goto next_group;
2718-
}
2719-
}
2711+
for (group = 0; !cpumask_empty(&mask); group++) {
2712+
/* pop the group's first cpu */
2713+
cpu = cpumask_first(&mask);
27202714
group_map[cpu] = group;
27212715
group_cnt[group]++;
2716+
cpumask_clear_cpu(cpu, &mask);
2717+
2718+
for_each_cpu(tcpu, &mask) {
2719+
if (!cpu_distance_fn ||
2720+
(cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE &&
2721+
cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) {
2722+
group_map[tcpu] = group;
2723+
group_cnt[group]++;
2724+
cpumask_clear_cpu(tcpu, &mask);
2725+
}
2726+
}
27222727
}
2728+
nr_groups = group;
27232729

27242730
/*
27252731
* Wasted space is caused by a ratio imbalance of upa to group_cnt.

0 commit comments

Comments
 (0)