Skip to content

Commit 3062e4b

Browse files
Automatic merge of 'next' into merge (2025-02-14 17:57)
2 parents 4ab6320 + 6aa989a commit 3062e4b

32 files changed

+471
-72
lines changed

Documentation/ABI/testing/sysfs-kernel-fadump

+2-1
Original file line numberDiff line numberDiff line change
@@ -55,4 +55,5 @@ Date: May 2024
5555
5656
Description: read/write
5757
This is a special sysfs file available to setup additional
58-
parameters to be passed to capture kernel.
58+
parameters to be passed to capture kernel. For HASH MMU it
59+
is exported only if RMA size higher than 768MB.

Documentation/arch/powerpc/firmware-assisted-dump.rst

+22
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,28 @@ to ensure that crash data is preserved to process later.
120120
e.g.
121121
# echo 1 > /sys/firmware/opal/mpipl/release_core
122122

123+
-- Support for Additional Kernel Arguments in Fadump
124+
Fadump has a feature that allows passing additional kernel arguments
125+
to the fadump kernel. This feature was primarily designed to disable
126+
kernel functionalities that are not required for the fadump kernel
127+
and to reduce its memory footprint while collecting the dump.
128+
129+
Command to Add Additional Kernel Parameters to Fadump:
130+
e.g.
131+
# echo "nr_cpus=16" > /sys/kernel/fadump/bootargs_append
132+
133+
The above command is sufficient to add additional arguments to fadump.
134+
An explicit service restart is not required.
135+
136+
Command to Retrieve the Additional Fadump Arguments:
137+
e.g.
138+
# cat /sys/kernel/fadump/bootargs_append
139+
140+
Note: Additional kernel arguments for fadump with HASH MMU is only
141+
supported if the RMA size is greater than 768 MB. If the RMA
142+
size is less than 768 MB, the kernel does not export the
143+
/sys/kernel/fadump/bootargs_append sysfs node.
144+
123145
Implementation details:
124146
-----------------------
125147

Documentation/arch/powerpc/papr_hcalls.rst

+11
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,17 @@ to be issued multiple times in order to be completely serviced. The
289289
subsequent hcalls to the hypervisor until the hcall is completely serviced
290290
at which point H_SUCCESS or other error is returned by the hypervisor.
291291

292+
**H_HTM**
293+
294+
| Input: flags, target, operation (op), op-param1, op-param2, op-param3
295+
| Out: *dumphtmbufferdata*
296+
| Return Value: *H_Success,H_Busy,H_LongBusyOrder,H_Partial,H_Parameter,
297+
H_P2,H_P3,H_P4,H_P5,H_P6,H_State,H_Not_Available,H_Authority*
298+
299+
H_HTM supports setup, configuration, control and dumping of Hardware Trace
300+
Macro (HTM) function and its data. HTM buffer stores tracing data for functions
301+
like core instruction, core LLAT and nest.
302+
292303
References
293304
==========
294305
.. [1] "Power Architecture Platform Reference"

arch/powerpc/include/asm/hvcall.h

+34
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,7 @@
348348
#define H_SCM_FLUSH 0x44C
349349
#define H_GET_ENERGY_SCALE_INFO 0x450
350350
#define H_PKS_SIGNED_UPDATE 0x454
351+
#define H_HTM 0x458
351352
#define H_WATCHDOG 0x45C
352353
#define H_GUEST_GET_CAPABILITIES 0x460
353354
#define H_GUEST_SET_CAPABILITIES 0x464
@@ -498,6 +499,39 @@
498499
#define H_GUEST_CAP_POWER11 (1UL<<(63-3))
499500
#define H_GUEST_CAP_BITMAP2 (1UL<<(63-63))
500501

502+
/*
503+
* Defines for H_HTM - Macros for hardware trace macro (HTM) function.
504+
*/
505+
#define H_HTM_FLAGS_HARDWARE_TARGET (1ul << 63)
506+
#define H_HTM_FLAGS_LOGICAL_TARGET (1ul << 62)
507+
#define H_HTM_FLAGS_PROCID_TARGET (1ul << 61)
508+
#define H_HTM_FLAGS_NOWRAP (1ul << 60)
509+
510+
#define H_HTM_OP_SHIFT (63-15)
511+
#define H_HTM_OP(x) ((unsigned long)(x)<<H_HTM_OP_SHIFT)
512+
#define H_HTM_OP_CAPABILITIES 0x01
513+
#define H_HTM_OP_STATUS 0x02
514+
#define H_HTM_OP_SETUP 0x03
515+
#define H_HTM_OP_CONFIGURE 0x04
516+
#define H_HTM_OP_START 0x05
517+
#define H_HTM_OP_STOP 0x06
518+
#define H_HTM_OP_DECONFIGURE 0x07
519+
#define H_HTM_OP_DUMP_DETAILS 0x08
520+
#define H_HTM_OP_DUMP_DATA 0x09
521+
#define H_HTM_OP_DUMP_SYSMEM_CONF 0x0a
522+
#define H_HTM_OP_DUMP_SYSPROC_CONF 0x0b
523+
524+
#define H_HTM_TYPE_SHIFT (63-31)
525+
#define H_HTM_TYPE(x) ((unsigned long)(x)<<H_HTM_TYPE_SHIFT)
526+
#define H_HTM_TYPE_NEST 0x01
527+
#define H_HTM_TYPE_CORE 0x02
528+
#define H_HTM_TYPE_LLAT 0x03
529+
#define H_HTM_TYPE_GLOBAL 0xff
530+
531+
#define H_HTM_TARGET_NODE_INDEX(x) ((unsigned long)(x)<<(63-15))
532+
#define H_HTM_TARGET_NODAL_CHIP_INDEX(x) ((unsigned long)(x)<<(63-31))
533+
#define H_HTM_TARGET_CORE_INDEX_ON_CHIP(x) ((unsigned long)(x)<<(63-47))
534+
501535
#ifndef __ASSEMBLY__
502536
#include <linux/types.h>
503537

arch/powerpc/include/asm/mmzone.h

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ extern cpumask_var_t node_to_cpumask_map[];
2929
#ifdef CONFIG_MEMORY_HOTPLUG
3030
extern unsigned long max_pfn;
3131
u64 memory_hotplug_max(void);
32+
u64 hot_add_drconf_memory_max(void);
3233
#else
3334
#define memory_hotplug_max() memblock_end_of_DRAM()
3435
#endif

arch/powerpc/include/asm/plpar_wrappers.h

+21
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,27 @@ static inline long register_dtl(unsigned long cpu, unsigned long vpa)
6565
return vpa_call(H_VPA_REG_DTL, cpu, vpa);
6666
}
6767

68+
static inline long htm_call(unsigned long flags, unsigned long target,
69+
unsigned long operation, unsigned long param1,
70+
unsigned long param2, unsigned long param3)
71+
{
72+
return plpar_hcall_norets(H_HTM, flags, target, operation,
73+
param1, param2, param3);
74+
}
75+
76+
static inline long htm_get_dump_hardware(unsigned long nodeindex,
77+
unsigned long nodalchipindex, unsigned long coreindexonchip,
78+
unsigned long type, unsigned long addr, unsigned long size,
79+
unsigned long offset)
80+
{
81+
return htm_call(H_HTM_FLAGS_HARDWARE_TARGET,
82+
H_HTM_TARGET_NODE_INDEX(nodeindex) |
83+
H_HTM_TARGET_NODAL_CHIP_INDEX(nodalchipindex) |
84+
H_HTM_TARGET_CORE_INDEX_ON_CHIP(coreindexonchip),
85+
H_HTM_OP(H_HTM_OP_DUMP_DATA) | H_HTM_TYPE(type),
86+
addr, size, offset);
87+
}
88+
6889
extern void vpa_init(int cpu);
6990

7091
static inline long plpar_pte_enter(unsigned long flags,

arch/powerpc/include/asm/prom.h

+2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
struct device_node;
1818
struct property;
1919

20+
#define MIN_RMA 768 /* Minimum RMA (in MB) for CAS negotiation */
21+
2022
#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */
2123
#define OF_DT_END_NODE 0x2 /* End node */
2224
#define OF_DT_PROP 0x3 /* Property: name off, size,

arch/powerpc/kernel/fadump.c

+11-10
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#include <asm/fadump-internal.h>
3434
#include <asm/setup.h>
3535
#include <asm/interrupt.h>
36+
#include <asm/prom.h>
3637

3738
/*
3839
* The CPU who acquired the lock to trigger the fadump crash should
@@ -1764,19 +1765,19 @@ void __init fadump_setup_param_area(void)
17641765
range_end = memblock_end_of_DRAM();
17651766
} else {
17661767
/*
1767-
* Passing additional parameters is supported for hash MMU only
1768-
* if the first memory block size is 768MB or higher.
1768+
* Memory range for passing additional parameters for HASH MMU
1769+
* must meet the following conditions:
1770+
* 1. The first memory block size must be higher than the
1771+
* minimum RMA (MIN_RMA) size. Bootloader can use memory
1772+
* upto RMA size. So it should be avoided.
1773+
* 2. The range should be between MIN_RMA and RMA size (ppc64_rma_size)
1774+
* 3. It must not overlap with the fadump reserved area.
17691775
*/
1770-
if (ppc64_rma_size < 0x30000000)
1776+
if (ppc64_rma_size < MIN_RMA*1024*1024)
17711777
return;
17721778

1773-
/*
1774-
* 640 MB to 768 MB is not used by PFW/bootloader. So, try reserving
1775-
* memory for passing additional parameters in this range to avoid
1776-
* being stomped on by PFW/bootloader.
1777-
*/
1778-
range_start = 0x2A000000;
1779-
range_end = range_start + 0x4000000;
1779+
range_start = MIN_RMA * 1024 * 1024;
1780+
range_end = min(ppc64_rma_size, fw_dump.boot_mem_top);
17801781
}
17811782

17821783
fw_dump.param_area = memblock_phys_alloc_range(COMMAND_LINE_SIZE,

arch/powerpc/kernel/prom_init.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1061,7 +1061,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
10611061
.virt_base = cpu_to_be32(0xffffffff),
10621062
.virt_size = cpu_to_be32(0xffffffff),
10631063
.load_base = cpu_to_be32(0xffffffff),
1064-
.min_rma = cpu_to_be32(512), /* 512MB min RMA */
1064+
.min_rma = cpu_to_be32(MIN_RMA),
10651065
.min_load = cpu_to_be32(0xffffffff), /* full client load */
10661066
.min_rma_percent = 0, /* min RMA percentage of total RAM */
10671067
.max_pft_size = 48, /* max log_2(hash table size) */

arch/powerpc/mm/numa.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -1336,7 +1336,7 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
13361336
return nid;
13371337
}
13381338

1339-
static u64 hot_add_drconf_memory_max(void)
1339+
u64 hot_add_drconf_memory_max(void)
13401340
{
13411341
struct device_node *memory = NULL;
13421342
struct device_node *dn = NULL;

arch/powerpc/perf/core-book3s.c

+20
Original file line numberDiff line numberDiff line change
@@ -2222,6 +2222,10 @@ static struct pmu power_pmu = {
22222222
#define PERF_SAMPLE_ADDR_TYPE (PERF_SAMPLE_ADDR | \
22232223
PERF_SAMPLE_PHYS_ADDR | \
22242224
PERF_SAMPLE_DATA_PAGE_SIZE)
2225+
2226+
#define SIER_TYPE_SHIFT 15
2227+
#define SIER_TYPE_MASK (0x7ull << SIER_TYPE_SHIFT)
2228+
22252229
/*
22262230
* A counter has overflowed; update its count and record
22272231
* things if requested. Note that interrupts are hard-disabled
@@ -2290,6 +2294,22 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
22902294
is_kernel_addr(mfspr(SPRN_SIAR)))
22912295
record = 0;
22922296

2297+
/*
2298+
* SIER[46-48] presents instruction type of the sampled instruction.
2299+
* In ISA v3.0 and before values "0" and "7" are considered reserved.
2300+
* In ISA v3.1, value "7" has been used to indicate "larx/stcx".
2301+
* Drop the sample if "type" has reserved values for this field with a
2302+
* ISA version check.
2303+
*/
2304+
if (event->attr.sample_type & PERF_SAMPLE_DATA_SRC &&
2305+
ppmu->get_mem_data_src) {
2306+
val = (regs->dar & SIER_TYPE_MASK) >> SIER_TYPE_SHIFT;
2307+
if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31))) {
2308+
record = 0;
2309+
atomic64_inc(&event->lost_samples);
2310+
}
2311+
}
2312+
22932313
/*
22942314
* Finally record data if requested.
22952315
*/

arch/powerpc/perf/isa207-common.c

+15-3
Original file line numberDiff line numberDiff line change
@@ -319,10 +319,18 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
319319
return;
320320
}
321321

322-
sier = mfspr(SPRN_SIER);
322+
/*
323+
* Use regs-dar for SPRN_SIER which is saved
324+
* during perf_read_regs at the beginning
325+
* of the PMU interrupt handler to avoid multiple
326+
* reads of SPRN_SIER
327+
*/
328+
sier = regs->dar;
323329
val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
324-
if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31)))
330+
if (val != 1 && val != 2 && !(val == 7 && cpu_has_feature(CPU_FTR_ARCH_31))) {
331+
dsrc->val = 0;
325332
return;
333+
}
326334

327335
idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
328336
sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> ISA207_SIER_DATA_SRC_SHIFT;
@@ -338,8 +346,12 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
338346
* to determine the exact instruction type. If the sampling
339347
* criteria is neither load or store, set the type as default
340348
* to NA.
349+
*
350+
* Use regs->dsisr for MMCRA which is saved during perf_read_regs
351+
* at the beginning of the PMU interrupt handler to avoid
352+
* multiple reads of SPRN_MMCRA
341353
*/
342-
mmcra = mfspr(SPRN_MMCRA);
354+
mmcra = regs->dsisr;
343355

344356
op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & MMCRA_SAMP_ELIG_MASK;
345357
switch (op_type) {

arch/powerpc/platforms/pseries/Kconfig

+9
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,15 @@ config CMM
128128
will be reused for other LPARs. The interface allows firmware to
129129
balance memory across many LPARs.
130130

131+
config HTMDUMP
132+
tristate "PowerVM data dumper"
133+
depends on PPC_PSERIES && DEBUG_FS
134+
default m
135+
help
136+
Select this option, if you want to enable the kernel debugfs
137+
interface to dump the Hardware Trace Macro (HTM) function data
138+
in the LPAR.
139+
131140
config HV_PERF_CTRS
132141
bool "Hypervisor supplied PMU events (24x7 & GPCI)"
133142
default y

arch/powerpc/platforms/pseries/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
1919
obj-$(CONFIG_HVCS) += hvcserver.o
2020
obj-$(CONFIG_HCALL_STATS) += hvCall_inst.o
2121
obj-$(CONFIG_CMM) += cmm.o
22+
obj-$(CONFIG_HTMDUMP) += htmdump.o
2223
obj-$(CONFIG_IO_EVENT_IRQ) += io_event_irq.o
2324
obj-$(CONFIG_LPARCFG) += lparcfg.o
2425
obj-$(CONFIG_IBMVIO) += vio.o

0 commit comments

Comments
 (0)