Идея в том, чтобы выделять память под привязанные к cpu потоки из соответствующей им ноды. Неправильно тащить стек и прочее из соседней.
Реально работает только для SLAB. Кажется, меньшими правками это сделать не получится.
Идея не моя, реализация моя. Нумы у меня дома нет.
Может быть, я очень сильно ошибаюсь.. Гляньте, пожалуйста, кому не в падлу.
diff -uprN -X linux-2.6.37.3/Documentation/dontdiff linux-2.6.37.3/include/linux/kthread.h linux-2.6.37.3.my/include/linux/kthread.h
--- linux-2.6.37.3/include/linux/kthread.h 2011-02-25 02:09:00.000000000 +0300
+++ linux-2.6.37.3.my/include/linux/kthread.h 2011-03-12 02:53:47.953108058 +0300
@@ -4,10 +4,18 @@
#include <linux/err.h>
#include <linux/sched.h>
-struct task_struct *kthread_create(int (*threadfn)(void *data),
- void *data,
- const char namefmt[], ...)
- __attribute__((format(printf, 3, 4)));
+struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+ void *data, unsigned int cpu,
+ const char namefmt[], ...)
+ __attribute__((format(printf, 4, 5)));
+
+#define kthread_create(threadfn, data, namefmt, ...) \
+({ \
+ struct task_struct *__new; \
+ __new = kthread_create_on_cpu(threadfn, data, NR_CPUS, \
+ namefmt, ## __VA_ARGS__); \
+ __new; \
+})
/**
* kthread_run - create and wake a thread.
diff -uprN -X linux-2.6.37.3/Documentation/dontdiff linux-2.6.37.3/include/linux/mempolicy.h linux-2.6.37.3.my/include/linux/mempolicy.h
--- linux-2.6.37.3/include/linux/mempolicy.h 2011-02-25 02:09:00.000000000 +0300
+++ linux-2.6.37.3.my/include/linux/mempolicy.h 2011-03-13 01:04:02.949794632 +0300
@@ -201,6 +201,8 @@ struct mempolicy *mpol_shared_policy_loo
extern void numa_default_policy(void);
extern void numa_policy_init(void);
+extern void numa_get_mempolicy(int *policy, nodemask_t *nmask);
+extern void numa_set_mempolicy(unsigned short policy, nodemask_t *nmask);
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
enum mpol_rebind_step step);
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
@@ -317,6 +319,14 @@ static inline void numa_default_policy(v
{
}
+static inline void numa_get_mempolicy(int *policy, nodemask_t *nmask)
+{
+}
+
+static inline void numa_set_mempolicy(int policy, nodemask_t *nmask)
+{
+}
+
static inline void mpol_rebind_task(struct task_struct *tsk,
const nodemask_t *new,
enum mpol_rebind_step step)
diff -uprN -X linux-2.6.37.3/Documentation/dontdiff linux-2.6.37.3/kernel/kthread.c linux-2.6.37.3.my/kernel/kthread.c
--- linux-2.6.37.3/kernel/kthread.c 2011-02-25 02:09:00.000000000 +0300
+++ linux-2.6.37.3.my/kernel/kthread.c 2011-03-12 04:32:35.621389722 +0300
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/mempolicy.h>
#include <linux/freezer.h>
#include <trace/events/sched.h>
@@ -32,6 +33,9 @@ struct kthread_create_info
struct task_struct *result;
struct completion done;
+ /* Processor associated with a kernel thread or NR_CPUS */
+ unsigned int cpu;
+
struct list_head list;
};
@@ -101,6 +105,22 @@ static int kthread(void *_create)
static void create_kthread(struct kthread_create_info *create)
{
int pid;
+#ifdef CONFIG_NUMA
+ nodemask_t orig_mask, temp_mask;
+ int policy, node;
+
+ if (create->cpu < NR_CPUS)
+ node = cpu_to_node(create->cpu);
+ else
+ node = -1;
+
+ /* Set preferred node */
+ if (node != -1) {
+ numa_get_mempolicy(&policy, &orig_mask);
+ temp_mask = nodemask_of_node(node);
+ numa_set_mempolicy(MPOL_PREFERRED, &temp_mask);
+ }
+#endif
/* We want our own signal handler (we take no signals by default). */
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
@@ -108,12 +128,19 @@ static void create_kthread(struct kthrea
create->result = ERR_PTR(pid);
complete(&create->done);
}
+
+#ifdef CONFIG_NUMA
+ /* Restore mempolicy */
+ if (node != -1)
+ numa_set_mempolicy(policy, &orig_mask);
+#endif
}
/**
- * kthread_create - create a kthread.
+ * kthread_create_on_cpu - create a kthread.
* @threadfn: the function to run until signal_pending(current).
* @data: data ptr for @threadfn.
+ * @cpu: the cpu of the node that is used to allocate memory for stack, etc.
* @namefmt: printf-style name for the thread.
*
* Description: This helper function creates and names a kernel
@@ -129,15 +156,17 @@ static void create_kthread(struct kthrea
*
* Returns a task_struct or ERR_PTR(-ENOMEM).
*/
-struct task_struct *kthread_create(int (*threadfn)(void *data),
- void *data,
- const char namefmt[],
- ...)
+struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
+ void *data,
+ unsigned int cpu,
+ const char namefmt[],
+ ...)
{
struct kthread_create_info create;
create.threadfn = threadfn;
create.data = data;
+ create.cpu = cpu;
init_completion(&create.done);
spin_lock(&kthread_create_lock);
@@ -164,7 +193,7 @@ struct task_struct *kthread_create(int (
}
return create.result;
}
-EXPORT_SYMBOL(kthread_create);
+EXPORT_SYMBOL(kthread_create_on_cpu);
/**
* kthread_bind - bind a just-created kthread to a cpu.
diff -uprN -X linux-2.6.37.3/Documentation/dontdiff linux-2.6.37.3/kernel/softirq.c linux-2.6.37.3.my/kernel/softirq.c
--- linux-2.6.37.3/kernel/softirq.c 2011-02-25 02:09:00.000000000 +0300
+++ linux-2.6.37.3.my/kernel/softirq.c 2011-03-11 02:27:07.848430139 +0300
@@ -831,7 +831,8 @@ static int __cpuinit cpu_callback(struct
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- p = kthread_create(run_ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+ p = kthread_create_on_cpu(run_ksoftirqd, hcpu, hotcpu,
+ "ksoftirqd/%d", hotcpu);
if (IS_ERR(p)) {
printk("ksoftirqd for %i failed\n", hotcpu);
return notifier_from_errno(PTR_ERR(p));
diff -uprN -X linux-2.6.37.3/Documentation/dontdiff linux-2.6.37.3/kernel/stop_machine.c linux-2.6.37.3.my/kernel/stop_machine.c
--- linux-2.6.37.3/kernel/stop_machine.c 2011-02-25 02:09:00.000000000 +0300
+++ linux-2.6.37.3.my/kernel/stop_machine.c 2011-03-11 02:28:11.064030639 +0300
@@ -301,8 +301,8 @@ static int __cpuinit cpu_stop_cpu_callba
case CPU_UP_PREPARE:
BUG_ON(stopper->thread || stopper->enabled ||
!list_empty(&stopper->works));
- p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
- cpu);
+ p = kthread_create_on_cpu(cpu_stopper_thread, stopper, cpu,
+ "migration/%d", cpu);
if (IS_ERR(p))
return notifier_from_errno(PTR_ERR(p));
get_task_struct(p);
diff -uprN -X linux-2.6.37.3/Documentation/dontdiff linux-2.6.37.3/mm/mempolicy.c linux-2.6.37.3.my/mm/mempolicy.c
--- linux-2.6.37.3/mm/mempolicy.c 2011-02-25 02:09:00.000000000 +0300
+++ linux-2.6.37.3.my/mm/mempolicy.c 2011-03-13 01:03:28.789651219 +0300
@@ -2281,6 +2281,16 @@ void numa_default_policy(void)
do_set_mempolicy(MPOL_DEFAULT, 0, NULL);
}
+void numa_get_mempolicy(int *policy, nodemask_t *nmask)
+{
+ do_get_mempolicy(policy, nmask, 0, 0);
+}
+
+void numa_set_mempolicy(unsigned short policy, nodemask_t *nmask)
+{
+ do_set_mempolicy(policy, 0, nmask);
+}
+
/*
* Parse and format mempolicy from/to strings
*/
+ ещё несколько мест, где тред привязывается к cpu.
Последовательный вызов get+set можно заменить на одну функцию, чтобы дважды не захватывать спин-блокировку, но мне кажется, это несущественно.