
Commit 5fbd036b55 ("sched: Cleanup cpu_active madness"), which was supposed to finally sort the cpu_active mess, instead uncovered more. Since CPU_STARTING is ran before setting the cpu online, there's a (small) window where the cpu has active,!online. If during this time there's a wakeup of a task that used to reside on that cpu select_task_rq() will use select_fallback_rq() to compute an alternative cpu to run on since we find !online. select_fallback_rq() however will compute the new cpu against cpu_active, this means that it can return the same cpu it started out with, the !online one, since that cpu is in fact marked active. This results in us trying to scheduling a task on an offline cpu and triggering a WARN in the IPI code. The solution proposed by Chuansheng Liu of setting cpu_active in set_cpu_online() is buggy, firstly not all archs actually use set_cpu_online(), secondly, not all archs call set_cpu_online() with IRQs disabled, this means we would introduce either the same race or the race from fd8a7de17 ("x86: cpu-hotplug: Prevent softirq wakeup on wrong CPU") -- albeit much narrower. [ By setting online first and active later we have a window of online,!active, fresh and bound kthreads have task_cpu() of 0 and since cpu0 isn't in tsk_cpus_allowed() we end up in select_fallback_rq() which excludes !active, resulting in a reset of ->cpus_allowed and the thread running all over the place. ] The solution is to re-work select_fallback_rq() to require active _and_ online. This makes the active,!online case work as expected, OTOH archs running CPU_STARTING after setting online are now vulnerable to the issue from fd8a7de17 -- these are alpha and blackfin. Reported-by: Chuansheng Liu <chuansheng.liu@intel.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Frysinger <vapier@gentoo.org> Cc: linux-alpha@vger.kernel.org Link: http://lkml.kernel.org/n/tip-hubqk1i10o4dpvlm06gq7v6j@git.kernel.org Signed-off-by: Ingo Molnar <mingo@kernel.org>
246 lines
5.6 KiB
C
246 lines
5.6 KiB
C
#ifndef _LINUX_CPUSET_H
|
|
#define _LINUX_CPUSET_H
|
|
/*
|
|
* cpuset interface
|
|
*
|
|
* Copyright (C) 2003 BULL SA
|
|
* Copyright (C) 2004-2006 Silicon Graphics, Inc.
|
|
*
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/nodemask.h>
|
|
#include <linux/cgroup.h>
|
|
#include <linux/mm.h>
|
|
|
|
#ifdef CONFIG_CPUSETS
|
|
|
|
extern int number_of_cpusets; /* How many cpusets are defined in system? */
|
|
|
|
extern int cpuset_init(void);
|
|
extern void cpuset_init_smp(void);
|
|
extern void cpuset_update_active_cpus(void);
|
|
extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
|
|
extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
|
|
extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
|
|
#define cpuset_current_mems_allowed (current->mems_allowed)
|
|
void cpuset_init_current_mems_allowed(void);
|
|
int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
|
|
|
|
extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask);
|
|
extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask);
|
|
|
|
static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
|
|
{
|
|
return number_of_cpusets <= 1 ||
|
|
__cpuset_node_allowed_softwall(node, gfp_mask);
|
|
}
|
|
|
|
static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
|
|
{
|
|
return number_of_cpusets <= 1 ||
|
|
__cpuset_node_allowed_hardwall(node, gfp_mask);
|
|
}
|
|
|
|
static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
|
|
{
|
|
return cpuset_node_allowed_softwall(zone_to_nid(z), gfp_mask);
|
|
}
|
|
|
|
static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
|
|
{
|
|
return cpuset_node_allowed_hardwall(zone_to_nid(z), gfp_mask);
|
|
}
|
|
|
|
extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
|
|
const struct task_struct *tsk2);
|
|
|
|
#define cpuset_memory_pressure_bump() \
|
|
do { \
|
|
if (cpuset_memory_pressure_enabled) \
|
|
__cpuset_memory_pressure_bump(); \
|
|
} while (0)
|
|
extern int cpuset_memory_pressure_enabled;
|
|
extern void __cpuset_memory_pressure_bump(void);
|
|
|
|
extern const struct file_operations proc_cpuset_operations;
|
|
struct seq_file;
|
|
extern void cpuset_task_status_allowed(struct seq_file *m,
|
|
struct task_struct *task);
|
|
|
|
extern int cpuset_mem_spread_node(void);
|
|
extern int cpuset_slab_spread_node(void);
|
|
|
|
static inline int cpuset_do_page_mem_spread(void)
|
|
{
|
|
return current->flags & PF_SPREAD_PAGE;
|
|
}
|
|
|
|
static inline int cpuset_do_slab_mem_spread(void)
|
|
{
|
|
return current->flags & PF_SPREAD_SLAB;
|
|
}
|
|
|
|
extern int current_cpuset_is_being_rebound(void);
|
|
|
|
extern void rebuild_sched_domains(void);
|
|
|
|
extern void cpuset_print_task_mems_allowed(struct task_struct *p);
|
|
|
|
/*
|
|
* reading current mems_allowed and mempolicy in the fastpath must protected
|
|
* by get_mems_allowed()
|
|
*/
|
|
static inline void get_mems_allowed(void)
|
|
{
|
|
current->mems_allowed_change_disable++;
|
|
|
|
/*
|
|
* ensure that reading mems_allowed and mempolicy happens after the
|
|
* update of ->mems_allowed_change_disable.
|
|
*
|
|
* the write-side task finds ->mems_allowed_change_disable is not 0,
|
|
* and knows the read-side task is reading mems_allowed or mempolicy,
|
|
* so it will clear old bits lazily.
|
|
*/
|
|
smp_mb();
|
|
}
|
|
|
|
static inline void put_mems_allowed(void)
|
|
{
|
|
/*
|
|
* ensure that reading mems_allowed and mempolicy before reducing
|
|
* mems_allowed_change_disable.
|
|
*
|
|
* the write-side task will know that the read-side task is still
|
|
* reading mems_allowed or mempolicy, don't clears old bits in the
|
|
* nodemask.
|
|
*/
|
|
smp_mb();
|
|
--ACCESS_ONCE(current->mems_allowed_change_disable);
|
|
}
|
|
|
|
static inline void set_mems_allowed(nodemask_t nodemask)
|
|
{
|
|
task_lock(current);
|
|
current->mems_allowed = nodemask;
|
|
task_unlock(current);
|
|
}
|
|
|
|
#else /* !CONFIG_CPUSETS */
|
|
|
|
static inline int cpuset_init(void) { return 0; }
|
|
static inline void cpuset_init_smp(void) {}
|
|
|
|
static inline void cpuset_update_active_cpus(void)
|
|
{
|
|
partition_sched_domains(1, NULL, NULL);
|
|
}
|
|
|
|
static inline void cpuset_cpus_allowed(struct task_struct *p,
|
|
struct cpumask *mask)
|
|
{
|
|
cpumask_copy(mask, cpu_possible_mask);
|
|
}
|
|
|
|
static inline void cpuset_cpus_allowed_fallback(struct task_struct *p)
|
|
{
|
|
}
|
|
|
|
static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
|
|
{
|
|
return node_possible_map;
|
|
}
|
|
|
|
#define cpuset_current_mems_allowed (node_states[N_HIGH_MEMORY])
|
|
static inline void cpuset_init_current_mems_allowed(void) {}
|
|
|
|
static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
|
|
const struct task_struct *tsk2)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
static inline void cpuset_memory_pressure_bump(void) {}
|
|
|
|
static inline void cpuset_task_status_allowed(struct seq_file *m,
|
|
struct task_struct *task)
|
|
{
|
|
}
|
|
|
|
static inline int cpuset_mem_spread_node(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int cpuset_slab_spread_node(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int cpuset_do_page_mem_spread(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int cpuset_do_slab_mem_spread(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline int current_cpuset_is_being_rebound(void)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
static inline void rebuild_sched_domains(void)
|
|
{
|
|
partition_sched_domains(1, NULL, NULL);
|
|
}
|
|
|
|
static inline void cpuset_print_task_mems_allowed(struct task_struct *p)
|
|
{
|
|
}
|
|
|
|
static inline void set_mems_allowed(nodemask_t nodemask)
|
|
{
|
|
}
|
|
|
|
static inline void get_mems_allowed(void)
|
|
{
|
|
}
|
|
|
|
static inline void put_mems_allowed(void)
|
|
{
|
|
}
|
|
|
|
#endif /* !CONFIG_CPUSETS */
|
|
|
|
#endif /* _LINUX_CPUSET_H */
|