blob: 8bab784a3ee5b56256a4fd1a3e5eee461152f4b1 [file] [log] [blame]
Wanlong Gao96d0e262014-05-14 17:43:05 +08001/*
2 * NUMA parameter parsing routines
3 *
4 * Copyright (c) 2014 Fujitsu Ltd.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25#include "sysemu/sysemu.h"
26#include "exec/cpu-common.h"
27#include "qemu/bitmap.h"
28#include "qom/cpu.h"
Wanlong Gao2b631ec2014-05-14 17:43:06 +080029#include "qemu/error-report.h"
30#include "include/exec/cpu-common.h" /* for RAM_ADDR_FMT */
Wanlong Gao00421092014-05-14 17:43:08 +080031#include "qapi-visit.h"
32#include "qapi/opts-visitor.h"
33#include "qapi/dealloc-visitor.h"
34#include "qapi/qmp/qerror.h"
Paolo Bonzinidfabb8b2014-05-14 17:43:15 +080035#include "hw/boards.h"
Wanlong Gao96d0e262014-05-14 17:43:05 +080036
Wanlong Gao00421092014-05-14 17:43:08 +080037QemuOptsList qemu_numa_opts = {
38 .name = "numa",
39 .implied_opt_name = "type",
40 .head = QTAILQ_HEAD_INITIALIZER(qemu_numa_opts.head),
41 .desc = { { 0 } } /* validated with OptsVisitor */
42};
43
44static void numa_node_parse(NumaNodeOptions *node, QemuOpts *opts, Error **errp)
Wanlong Gao96d0e262014-05-14 17:43:05 +080045{
Wanlong Gao00421092014-05-14 17:43:08 +080046 uint16_t nodenr;
47 uint16List *cpus = NULL;
Wanlong Gao96d0e262014-05-14 17:43:05 +080048
Wanlong Gao00421092014-05-14 17:43:08 +080049 if (node->has_nodeid) {
50 nodenr = node->nodeid;
51 } else {
52 nodenr = nb_numa_nodes;
53 }
54
55 if (nodenr >= MAX_NODES) {
56 error_setg(errp, "Max number of NUMA nodes reached: %"
57 PRIu16 "\n", nodenr);
Wanlong Gao96d0e262014-05-14 17:43:05 +080058 return;
59 }
60
Wanlong Gao00421092014-05-14 17:43:08 +080061 for (cpus = node->cpus; cpus; cpus = cpus->next) {
62 if (cpus->value > MAX_CPUMASK_BITS) {
63 error_setg(errp, "CPU number %" PRIu16 " is bigger than %d",
64 cpus->value, MAX_CPUMASK_BITS);
65 return;
Wanlong Gao96d0e262014-05-14 17:43:05 +080066 }
Wanlong Gao00421092014-05-14 17:43:08 +080067 bitmap_set(numa_info[nodenr].node_cpu, cpus->value, 1);
Wanlong Gao96d0e262014-05-14 17:43:05 +080068 }
69
Wanlong Gao00421092014-05-14 17:43:08 +080070 if (node->has_mem) {
71 uint64_t mem_size = node->mem;
72 const char *mem_str = qemu_opt_get(opts, "mem");
73 /* Fix up legacy suffix-less format */
74 if (g_ascii_isdigit(mem_str[strlen(mem_str) - 1])) {
75 mem_size <<= 20;
76 }
77 numa_info[nodenr].node_mem = mem_size;
Wanlong Gao96d0e262014-05-14 17:43:05 +080078 }
Wanlong Gao96d0e262014-05-14 17:43:05 +080079}
80
Wanlong Gao00421092014-05-14 17:43:08 +080081int numa_init_func(QemuOpts *opts, void *opaque)
Wanlong Gao96d0e262014-05-14 17:43:05 +080082{
Wanlong Gao00421092014-05-14 17:43:08 +080083 NumaOptions *object = NULL;
84 Error *err = NULL;
Wanlong Gao96d0e262014-05-14 17:43:05 +080085
Wanlong Gao00421092014-05-14 17:43:08 +080086 {
87 OptsVisitor *ov = opts_visitor_new(opts);
88 visit_type_NumaOptions(opts_get_visitor(ov), &object, NULL, &err);
89 opts_visitor_cleanup(ov);
Wanlong Gao96d0e262014-05-14 17:43:05 +080090 }
Wanlong Gao96d0e262014-05-14 17:43:05 +080091
Wanlong Gao00421092014-05-14 17:43:08 +080092 if (err) {
93 goto error;
94 }
Wanlong Gao96d0e262014-05-14 17:43:05 +080095
Wanlong Gao00421092014-05-14 17:43:08 +080096 switch (object->kind) {
97 case NUMA_OPTIONS_KIND_NODE:
98 numa_node_parse(object->node, opts, &err);
99 if (err) {
100 goto error;
Wanlong Gao96d0e262014-05-14 17:43:05 +0800101 }
102 nb_numa_nodes++;
Wanlong Gao00421092014-05-14 17:43:08 +0800103 break;
104 default:
105 abort();
Wanlong Gao96d0e262014-05-14 17:43:05 +0800106 }
Wanlong Gao00421092014-05-14 17:43:08 +0800107
108 return 0;
109
110error:
111 qerror_report_err(err);
112 error_free(err);
113
114 if (object) {
115 QapiDeallocVisitor *dv = qapi_dealloc_visitor_new();
116 visit_type_NumaOptions(qapi_dealloc_get_visitor(dv),
117 &object, NULL, NULL);
118 qapi_dealloc_visitor_cleanup(dv);
119 }
120
121 return -1;
Wanlong Gao96d0e262014-05-14 17:43:05 +0800122}
123
124void set_numa_nodes(void)
125{
126 if (nb_numa_nodes > 0) {
Wanlong Gao2b631ec2014-05-14 17:43:06 +0800127 uint64_t numa_total;
Wanlong Gao96d0e262014-05-14 17:43:05 +0800128 int i;
129
130 if (nb_numa_nodes > MAX_NODES) {
131 nb_numa_nodes = MAX_NODES;
132 }
133
134 /* If no memory size if given for any node, assume the default case
135 * and distribute the available memory equally across all nodes
136 */
137 for (i = 0; i < nb_numa_nodes; i++) {
Wanlong Gao8c859012014-05-14 17:43:07 +0800138 if (numa_info[i].node_mem != 0) {
Wanlong Gao96d0e262014-05-14 17:43:05 +0800139 break;
140 }
141 }
142 if (i == nb_numa_nodes) {
143 uint64_t usedmem = 0;
144
145 /* On Linux, the each node's border has to be 8MB aligned,
146 * the final node gets the rest.
147 */
148 for (i = 0; i < nb_numa_nodes - 1; i++) {
Wanlong Gao8c859012014-05-14 17:43:07 +0800149 numa_info[i].node_mem = (ram_size / nb_numa_nodes) &
150 ~((1 << 23UL) - 1);
151 usedmem += numa_info[i].node_mem;
Wanlong Gao96d0e262014-05-14 17:43:05 +0800152 }
Wanlong Gao8c859012014-05-14 17:43:07 +0800153 numa_info[i].node_mem = ram_size - usedmem;
Wanlong Gao96d0e262014-05-14 17:43:05 +0800154 }
155
Wanlong Gao2b631ec2014-05-14 17:43:06 +0800156 numa_total = 0;
157 for (i = 0; i < nb_numa_nodes; i++) {
Wanlong Gao8c859012014-05-14 17:43:07 +0800158 numa_total += numa_info[i].node_mem;
Wanlong Gao2b631ec2014-05-14 17:43:06 +0800159 }
160 if (numa_total != ram_size) {
161 error_report("total memory for NUMA nodes (%" PRIu64 ")"
162 " should equal RAM size (" RAM_ADDR_FMT ")",
163 numa_total, ram_size);
164 exit(1);
165 }
166
Wanlong Gao96d0e262014-05-14 17:43:05 +0800167 for (i = 0; i < nb_numa_nodes; i++) {
Wanlong Gao8c859012014-05-14 17:43:07 +0800168 if (!bitmap_empty(numa_info[i].node_cpu, MAX_CPUMASK_BITS)) {
Wanlong Gao96d0e262014-05-14 17:43:05 +0800169 break;
170 }
171 }
172 /* assigning the VCPUs round-robin is easier to implement, guest OSes
173 * must cope with this anyway, because there are BIOSes out there in
174 * real machines which also use this scheme.
175 */
176 if (i == nb_numa_nodes) {
177 for (i = 0; i < max_cpus; i++) {
Wanlong Gao8c859012014-05-14 17:43:07 +0800178 set_bit(i, numa_info[i % nb_numa_nodes].node_cpu);
Wanlong Gao96d0e262014-05-14 17:43:05 +0800179 }
180 }
181 }
182}
183
184void set_numa_modes(void)
185{
186 CPUState *cpu;
187 int i;
188
189 CPU_FOREACH(cpu) {
190 for (i = 0; i < nb_numa_nodes; i++) {
Wanlong Gao8c859012014-05-14 17:43:07 +0800191 if (test_bit(cpu->cpu_index, numa_info[i].node_cpu)) {
Wanlong Gao96d0e262014-05-14 17:43:05 +0800192 cpu->numa_node = i;
193 }
194 }
195 }
196}
Paolo Bonzinidfabb8b2014-05-14 17:43:15 +0800197
198void memory_region_allocate_system_memory(MemoryRegion *mr, Object *owner,
199 const char *name,
200 uint64_t ram_size)
201{
202 memory_region_init_ram(mr, owner, name, ram_size);
203 vmstate_register_ram_global(mr);
204}