Skip to content

Commit f43e352

Browse files
dgibsonagraf
authored andcommitted
Virtual hash page table handling on pSeries machine
On pSeries logical partitions, excepting the old POWER4-style full system partitions, the guest does not have direct access to the hardware page table. Instead, the pagetable exists in hypervisor memory, and the guest must manipulate it with hypercalls. However, our current pSeries emulation more closely resembles the old style where the guest must set up and handle the pagetables itself. This patch converts it to act like a modern partition. This involves two things: first, the hash translation path is modified to permit the has table to be stored externally to the emulated machine's RAM. The pSeries machine init code configures the CPUs to use this mode. Secondly, we emulate the PAPR hypercalls for manipulating the external hashed page table. Signed-off-by: David Gibson <[email protected]> Signed-off-by: Alexander Graf <[email protected]>
1 parent 4040ab7 commit f43e352

File tree

4 files changed

+315
-12
lines changed

4 files changed

+315
-12
lines changed

hw/spapr.c

+31-4
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,15 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
5252
sPAPREnvironment *spapr,
5353
target_phys_addr_t initrd_base,
5454
target_phys_addr_t initrd_size,
55-
const char *kernel_cmdline)
55+
const char *kernel_cmdline,
56+
long hash_shift)
5657
{
5758
void *fdt;
5859
uint64_t mem_reg_property[] = { 0, cpu_to_be64(ramsize) };
5960
uint32_t start_prop = cpu_to_be32(initrd_base);
6061
uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
62+
uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
63+
char hypertas_prop[] = "hcall-pft\0hcall-term";
6164
int i;
6265
char *modelname;
6366
int ret;
@@ -145,6 +148,8 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
145148
* full emu, for kvm we should copy it from the host */
146149
_FDT((fdt_property_cell(fdt, "clock-frequency", 1000000000)));
147150
_FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
151+
_FDT((fdt_property(fdt, "ibm,pft-size",
152+
pft_size_prop, sizeof(pft_size_prop))));
148153
_FDT((fdt_property_string(fdt, "status", "okay")));
149154
_FDT((fdt_property(fdt, "64-bit", NULL, 0)));
150155

@@ -160,6 +165,14 @@ static void *spapr_create_fdt(int *fdt_size, ram_addr_t ramsize,
160165

161166
_FDT((fdt_end_node(fdt)));
162167

168+
/* RTAS */
169+
_FDT((fdt_begin_node(fdt, "rtas")));
170+
171+
_FDT((fdt_property(fdt, "ibm,hypertas-functions", hypertas_prop,
172+
sizeof(hypertas_prop))));
173+
174+
_FDT((fdt_end_node(fdt)));
175+
163176
/* vdevice */
164177
_FDT((fdt_begin_node(fdt, "vdevice")));
165178

@@ -208,12 +221,13 @@ static void ppc_spapr_init(ram_addr_t ram_size,
208221
const char *cpu_model)
209222
{
210223
CPUState *envs[MAX_CPUS];
211-
void *fdt;
224+
void *fdt, *htab;
212225
int i;
213226
ram_addr_t ram_offset;
214227
target_phys_addr_t fdt_addr;
215228
uint32_t kernel_base, initrd_base;
216-
long kernel_size, initrd_size;
229+
long kernel_size, initrd_size, htab_size;
230+
long pteg_shift = 17;
217231
int fdt_size;
218232

219233
spapr = qemu_malloc(sizeof(*spapr));
@@ -250,6 +264,18 @@ static void ppc_spapr_init(ram_addr_t ram_size,
250264
ram_offset = qemu_ram_alloc(NULL, "ppc_spapr.ram", ram_size);
251265
cpu_register_physical_memory(0, ram_size, ram_offset);
252266

267+
/* allocate hash page table. For now we always make this 16mb,
268+
* later we should probably make it scale to the size of guest
269+
* RAM */
270+
htab_size = 1ULL << (pteg_shift + 7);
271+
htab = qemu_mallocz(htab_size);
272+
273+
for (i = 0; i < smp_cpus; i++) {
274+
envs[i]->external_htab = htab;
275+
envs[i]->htab_base = -1;
276+
envs[i]->htab_mask = htab_size - 1;
277+
}
278+
253279
spapr->vio_bus = spapr_vio_bus_init();
254280

255281
for (i = 0; i < MAX_SERIAL_PORTS; i++) {
@@ -296,7 +322,8 @@ static void ppc_spapr_init(ram_addr_t ram_size,
296322

297323
/* Prepare the device tree */
298324
fdt = spapr_create_fdt(&fdt_size, ram_size, cpu_model, envs, spapr,
299-
initrd_base, initrd_size, kernel_cmdline);
325+
initrd_base, initrd_size, kernel_cmdline,
326+
pteg_shift + 7);
300327
assert(fdt != NULL);
301328

302329
cpu_physical_memory_write(fdt_addr, fdt, fdt_size);

hw/spapr_hcall.c

+254
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,253 @@
11
#include "sysemu.h"
22
#include "cpu.h"
33
#include "qemu-char.h"
4+
#include "sysemu.h"
5+
#include "qemu-char.h"
6+
#include "exec-all.h"
47
#include "hw/spapr.h"
58

9+
#define HPTES_PER_GROUP 8
10+
11+
#define HPTE_V_SSIZE_SHIFT 62
12+
#define HPTE_V_AVPN_SHIFT 7
13+
#define HPTE_V_AVPN 0x3fffffffffffff80ULL
14+
#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
15+
#define HPTE_V_COMPARE(x, y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
16+
#define HPTE_V_BOLTED 0x0000000000000010ULL
17+
#define HPTE_V_LOCK 0x0000000000000008ULL
18+
#define HPTE_V_LARGE 0x0000000000000004ULL
19+
#define HPTE_V_SECONDARY 0x0000000000000002ULL
20+
#define HPTE_V_VALID 0x0000000000000001ULL
21+
22+
#define HPTE_R_PP0 0x8000000000000000ULL
23+
#define HPTE_R_TS 0x4000000000000000ULL
24+
#define HPTE_R_KEY_HI 0x3000000000000000ULL
25+
#define HPTE_R_RPN_SHIFT 12
26+
#define HPTE_R_RPN 0x3ffffffffffff000ULL
27+
#define HPTE_R_FLAGS 0x00000000000003ffULL
28+
#define HPTE_R_PP 0x0000000000000003ULL
29+
#define HPTE_R_N 0x0000000000000004ULL
30+
#define HPTE_R_G 0x0000000000000008ULL
31+
#define HPTE_R_M 0x0000000000000010ULL
32+
#define HPTE_R_I 0x0000000000000020ULL
33+
#define HPTE_R_W 0x0000000000000040ULL
34+
#define HPTE_R_WIMG 0x0000000000000078ULL
35+
#define HPTE_R_C 0x0000000000000080ULL
36+
#define HPTE_R_R 0x0000000000000100ULL
37+
#define HPTE_R_KEY_LO 0x0000000000000e00ULL
38+
39+
#define HPTE_V_1TB_SEG 0x4000000000000000ULL
40+
#define HPTE_V_VRMA_MASK 0x4001ffffff000000ULL
41+
42+
#define HPTE_V_HVLOCK 0x40ULL
43+
44+
static inline int lock_hpte(void *hpte, target_ulong bits)
45+
{
46+
uint64_t pteh;
47+
48+
pteh = ldq_p(hpte);
49+
50+
/* We're protected by qemu's global lock here */
51+
if (pteh & bits) {
52+
return 0;
53+
}
54+
stq_p(hpte, pteh | HPTE_V_HVLOCK);
55+
return 1;
56+
}
57+
58+
static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r,
59+
target_ulong pte_index)
60+
{
61+
target_ulong rb, va_low;
62+
63+
rb = (v & ~0x7fULL) << 16; /* AVA field */
64+
va_low = pte_index >> 3;
65+
if (v & HPTE_V_SECONDARY) {
66+
va_low = ~va_low;
67+
}
68+
/* xor vsid from AVA */
69+
if (!(v & HPTE_V_1TB_SEG)) {
70+
va_low ^= v >> 12;
71+
} else {
72+
va_low ^= v >> 24;
73+
}
74+
va_low &= 0x7ff;
75+
if (v & HPTE_V_LARGE) {
76+
rb |= 1; /* L field */
77+
#if 0 /* Disable that P7 specific bit for now */
78+
if (r & 0xff000) {
79+
/* non-16MB large page, must be 64k */
80+
/* (masks depend on page size) */
81+
rb |= 0x1000; /* page encoding in LP field */
82+
rb |= (va_low & 0x7f) << 16; /* 7b of VA in AVA/LP field */
83+
rb |= (va_low & 0xfe); /* AVAL field */
84+
}
85+
#endif
86+
} else {
87+
/* 4kB page */
88+
rb |= (va_low & 0x7ff) << 12; /* remaining 11b of AVA */
89+
}
90+
rb |= (v >> 54) & 0x300; /* B field */
91+
return rb;
92+
}
93+
94+
static target_ulong h_enter(CPUState *env, sPAPREnvironment *spapr,
95+
target_ulong opcode, target_ulong *args)
96+
{
97+
target_ulong flags = args[0];
98+
target_ulong pte_index = args[1];
99+
target_ulong pteh = args[2];
100+
target_ulong ptel = args[3];
101+
target_ulong porder;
102+
target_ulong i, pa;
103+
uint8_t *hpte;
104+
105+
/* only handle 4k and 16M pages for now */
106+
porder = 12;
107+
if (pteh & HPTE_V_LARGE) {
108+
#if 0 /* We don't support 64k pages yet */
109+
if ((ptel & 0xf000) == 0x1000) {
110+
/* 64k page */
111+
porder = 16;
112+
} else
113+
#endif
114+
if ((ptel & 0xff000) == 0) {
115+
/* 16M page */
116+
porder = 24;
117+
/* lowest AVA bit must be 0 for 16M pages */
118+
if (pteh & 0x80) {
119+
return H_PARAMETER;
120+
}
121+
} else {
122+
return H_PARAMETER;
123+
}
124+
}
125+
126+
pa = ptel & HPTE_R_RPN;
127+
/* FIXME: bounds check the pa? */
128+
129+
/* Check WIMG */
130+
if ((ptel & HPTE_R_WIMG) != HPTE_R_M) {
131+
return H_PARAMETER;
132+
}
133+
pteh &= ~0x60ULL;
134+
135+
if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
136+
return H_PARAMETER;
137+
}
138+
if (likely((flags & H_EXACT) == 0)) {
139+
pte_index &= ~7ULL;
140+
hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
141+
for (i = 0; ; ++i) {
142+
if (i == 8) {
143+
return H_PTEG_FULL;
144+
}
145+
if (((ldq_p(hpte) & HPTE_V_VALID) == 0) &&
146+
lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
147+
break;
148+
}
149+
hpte += HASH_PTE_SIZE_64;
150+
}
151+
} else {
152+
i = 0;
153+
hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
154+
if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
155+
return H_PTEG_FULL;
156+
}
157+
}
158+
stq_p(hpte + (HASH_PTE_SIZE_64/2), ptel);
159+
/* eieio(); FIXME: need some sort of barrier for smp? */
160+
stq_p(hpte, pteh);
161+
162+
assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
163+
args[0] = pte_index + i;
164+
return H_SUCCESS;
165+
}
166+
167+
static target_ulong h_remove(CPUState *env, sPAPREnvironment *spapr,
168+
target_ulong opcode, target_ulong *args)
169+
{
170+
target_ulong flags = args[0];
171+
target_ulong pte_index = args[1];
172+
target_ulong avpn = args[2];
173+
uint8_t *hpte;
174+
target_ulong v, r, rb;
175+
176+
if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
177+
return H_PARAMETER;
178+
}
179+
180+
hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
181+
while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
182+
/* We have no real concurrency in qemu soft-emulation, so we
183+
* will never actually have a contested lock */
184+
assert(0);
185+
}
186+
187+
v = ldq_p(hpte);
188+
r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
189+
190+
if ((v & HPTE_V_VALID) == 0 ||
191+
((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
192+
((flags & H_ANDCOND) && (v & avpn) != 0)) {
193+
stq_p(hpte, v & ~HPTE_V_HVLOCK);
194+
assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
195+
return H_NOT_FOUND;
196+
}
197+
args[0] = v & ~HPTE_V_HVLOCK;
198+
args[1] = r;
199+
stq_p(hpte, 0);
200+
rb = compute_tlbie_rb(v, r, pte_index);
201+
ppc_tlb_invalidate_one(env, rb);
202+
assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
203+
return H_SUCCESS;
204+
}
205+
206+
static target_ulong h_protect(CPUState *env, sPAPREnvironment *spapr,
207+
target_ulong opcode, target_ulong *args)
208+
{
209+
target_ulong flags = args[0];
210+
target_ulong pte_index = args[1];
211+
target_ulong avpn = args[2];
212+
uint8_t *hpte;
213+
target_ulong v, r, rb;
214+
215+
if ((pte_index * HASH_PTE_SIZE_64) & ~env->htab_mask) {
216+
return H_PARAMETER;
217+
}
218+
219+
hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
220+
while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
221+
/* We have no real concurrency in qemu soft-emulation, so we
222+
* will never actually have a contested lock */
223+
assert(0);
224+
}
225+
226+
v = ldq_p(hpte);
227+
r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
228+
229+
if ((v & HPTE_V_VALID) == 0 ||
230+
((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
231+
stq_p(hpte, v & ~HPTE_V_HVLOCK);
232+
assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
233+
return H_NOT_FOUND;
234+
}
235+
236+
r &= ~(HPTE_R_PP0 | HPTE_R_PP | HPTE_R_N |
237+
HPTE_R_KEY_HI | HPTE_R_KEY_LO);
238+
r |= (flags << 55) & HPTE_R_PP0;
239+
r |= (flags << 48) & HPTE_R_KEY_HI;
240+
r |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
241+
rb = compute_tlbie_rb(v, r, pte_index);
242+
stq_p(hpte, v & ~HPTE_V_VALID);
243+
ppc_tlb_invalidate_one(env, rb);
244+
stq_p(hpte + (HASH_PTE_SIZE_64/2), r);
245+
/* Don't need a memory barrier, due to qemu's global lock */
246+
stq_p(hpte, v & ~HPTE_V_HVLOCK);
247+
assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
248+
return H_SUCCESS;
249+
}
250+
6251
spapr_hcall_fn hypercall_table[(MAX_HCALL_OPCODE / 4) + 1];
7252

8253
void spapr_register_hypercall(target_ulong opcode, spapr_hcall_fn fn)
@@ -39,3 +284,12 @@ target_ulong spapr_hypercall(CPUState *env, target_ulong opcode,
39284
hcall_dprintf("Unimplemented hcall 0x" TARGET_FMT_lx "\n", opcode);
40285
return H_FUNCTION;
41286
}
287+
288+
static void hypercall_init(void)
289+
{
290+
/* hcall-pft */
291+
spapr_register_hypercall(H_ENTER, h_enter);
292+
spapr_register_hypercall(H_REMOVE, h_remove);
293+
spapr_register_hypercall(H_PROTECT, h_protect);
294+
}
295+
device_init(hypercall_init);

target-ppc/cpu.h

+2
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,8 @@ struct CPUPPCState {
670670
target_phys_addr_t htab_base;
671671
target_phys_addr_t htab_mask;
672672
target_ulong sr[32];
673+
/* externally stored hash table */
674+
uint8_t *external_htab;
673675
/* BATs */
674676
int nb_BATs;
675677
target_ulong DBAT[2][8];

0 commit comments

Comments
 (0)