sst-linux/include/linux/percpu.h
Eric Dumazet 63cc8c7515 percpu: introduce DEFINE_PER_CPU_PAGE_ALIGNED() macro
While examining holes in percpu section I found this :

c05f5000 D per_cpu__current_task
c05f5000 D __per_cpu_start
c05f5004 D per_cpu__cpu_number
c05f5008 D per_cpu__irq_regs
c05f500c d per_cpu__cpu_devices
c05f5040 D per_cpu__cyc2ns

<Big Hole of about 4000 bytes>

c05f6000 d per_cpu__cpuid4_info
c05f6004 d per_cpu__cache_kobject
c05f6008 d per_cpu__index_kobject

<Big Hole of about 4000 bytes>

c05f7000 D per_cpu__gdt_page

This is because gdt_page is a percpu variable, defined with
a page alignement, and linker is doing its job, two times because of .o
nesting in the build process.

I introduced a new macro DEFINE_PER_CPU_PAGE_ALIGNED() to avoid
wasting this space. All page aligned variables (only one at this time)
are put in a separate
subsection .data.percpu.page_aligned, at the very begining of percpu zone.

Before patch , on a x86_32 machine :

.data.percpu                30232   3227471872
.data.percpu                22168   3227471872

Thats 8064 bytes saved for each CPU.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2008-05-25 07:03:46 +02:00

146 lines
4.2 KiB
C

#ifndef __LINUX_PERCPU_H
#define __LINUX_PERCPU_H
#include <linux/preempt.h>
#include <linux/slab.h> /* For kmalloc() */
#include <linux/smp.h>
#include <linux/cpumask.h>
#include <asm/percpu.h>
#ifdef CONFIG_SMP
#define DEFINE_PER_CPU(type, name) \
__attribute__((__section__(".data.percpu"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#ifdef MODULE
#define SHARED_ALIGNED_SECTION ".data.percpu"
#else
#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned"
#endif
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
__attribute__((__section__(SHARED_ALIGNED_SECTION))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
____cacheline_aligned_in_smp
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
__attribute__((__section__(".data.percpu.page_aligned"))) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#else
#define DEFINE_PER_CPU(type, name) \
PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)
#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
DEFINE_PER_CPU(type, name)
#endif
#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
#ifndef PERCPU_ENOUGH_ROOM
#ifdef CONFIG_MODULES
#define PERCPU_MODULE_RESERVE 8192
#else
#define PERCPU_MODULE_RESERVE 0
#endif
#define PERCPU_ENOUGH_ROOM \
(__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE)
#endif /* PERCPU_ENOUGH_ROOM */
/*
* Must be an lvalue. Since @var must be a simple identifier,
* we force a syntax error here if it isn't.
*/
#define get_cpu_var(var) (*({ \
extern int simple_identifier_##var(void); \
preempt_disable(); \
&__get_cpu_var(var); }))
#define put_cpu_var(var) preempt_enable()
#ifdef CONFIG_SMP
struct percpu_data {
void *ptrs[1];
};
#define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
/*
* Use this to get to a cpu's version of the per-cpu object dynamically
* allocated. Non-atomic access to the current CPU's version should
* probably be combined with get_cpu()/put_cpu().
*/
#define percpu_ptr(ptr, cpu) \
({ \
struct percpu_data *__p = __percpu_disguise(ptr); \
(__typeof__(ptr))__p->ptrs[(cpu)]; \
})
extern void *percpu_populate(void *__pdata, size_t size, gfp_t gfp, int cpu);
extern void percpu_depopulate(void *__pdata, int cpu);
extern int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
cpumask_t *mask);
extern void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask);
extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
extern void percpu_free(void *__pdata);
#else /* CONFIG_SMP */
#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
static inline void percpu_depopulate(void *__pdata, int cpu)
{
}
static inline void __percpu_depopulate_mask(void *__pdata, cpumask_t *mask)
{
}
static inline void *percpu_populate(void *__pdata, size_t size, gfp_t gfp,
int cpu)
{
return percpu_ptr(__pdata, cpu);
}
static inline int __percpu_populate_mask(void *__pdata, size_t size, gfp_t gfp,
cpumask_t *mask)
{
return 0;
}
static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
{
return kzalloc(size, gfp);
}
static inline void percpu_free(void *__pdata)
{
kfree(__pdata);
}
#endif /* CONFIG_SMP */
#define percpu_populate_mask(__pdata, size, gfp, mask) \
__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
#define percpu_depopulate_mask(__pdata, mask) \
__percpu_depopulate_mask((__pdata), &(mask))
#define percpu_alloc_mask(size, gfp, mask) \
__percpu_alloc_mask((size), (gfp), &(mask))
#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
/* (legacy) interface for use without CPU hotplug handling */
#define __alloc_percpu(size) percpu_alloc_mask((size), GFP_KERNEL, \
cpu_possible_map)
#define alloc_percpu(type) (type *)__alloc_percpu(sizeof(type))
#define free_percpu(ptr) percpu_free((ptr))
#define per_cpu_ptr(ptr, cpu) percpu_ptr((ptr), (cpu))
#endif /* __LINUX_PERCPU_H */