Skip to content

Commit 2accfdb

Browse files
committed
profiling: attempt to remove per-cpu profile flip buffer
This is the really old legacy kernel profiling code, which has long since been obviated by "real profiling" (ie 'prof' and company), and mainly remains as a source of syzbot reports. There are anecdotal reports that people still use it for boot-time profiling, but it's unlikely that such use would care about the old NUMA optimizations in this code from 2004 (commit ad02973d42: "profile: 512x Altix timer interrupt livelock fix" in the BK import archive at [1]) So in order to head off future syzbot reports, let's try to simplify this code and get rid of the per-cpu profile buffers that are quite a large portion of the complexity footprint of this thing (including CPU hotplug callbacks etc). It's unlikely anybody will actually notice, or possibly, as Thomas put it: "Only people who indulge in nostalgia will notice :)". That said, if it turns out that this code is actually actively used by somebody, we can always revert this removal. Thus the "attempt" in the summary line. [ Note: in a small nod to "the profiling code can cause NUMA problems", this also removes the "increment the last entry in the profiling array on any unknown hits" logic. That would account any program counter in a module to that single counter location, and might exacerbate any NUMA cacheline bouncing issues ] Link: https://lore.kernel.org/all/CAHk-=wgs52BxT4Zjmjz8aNvHWKxf5_ThBY4bYL1Y6CTaNL2dTw@mail.gmail.com/ Link: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git [1] Cc: Thomas Gleixner <[email protected]> Cc: Tetsuo Handa <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 7c51f7b commit 2accfdb

File tree

2 files changed

+2
-182
lines changed

2 files changed

+2
-182
lines changed

include/linux/cpuhotplug.h

-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@ enum cpuhp_state {
100100
CPUHP_WORKQUEUE_PREP,
101101
CPUHP_POWER_NUMA_PREPARE,
102102
CPUHP_HRTIMERS_PREPARE,
103-
CPUHP_PROFILE_PREPARE,
104103
CPUHP_X2APIC_PREPARE,
105104
CPUHP_SMPCFD_PREPARE,
106105
CPUHP_RELAY_PREPARE,

kernel/profile.c

+2-181
Original file line numberDiff line numberDiff line change
@@ -129,180 +129,13 @@ int __ref profile_init(void)
129129
return -ENOMEM;
130130
}
131131

132-
#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS)
133-
/*
134-
* Each cpu has a pair of open-addressed hashtables for pending
135-
* profile hits. read_profile() IPI's all cpus to request them
136-
* to flip buffers and flushes their contents to prof_buffer itself.
137-
* Flip requests are serialized by the profile_flip_mutex. The sole
138-
* use of having a second hashtable is for avoiding cacheline
139-
* contention that would otherwise happen during flushes of pending
140-
* profile hits required for the accuracy of reported profile hits
141-
* and so resurrect the interrupt livelock issue.
142-
*
143-
* The open-addressed hashtables are indexed by profile buffer slot
144-
* and hold the number of pending hits to that profile buffer slot on
145-
* a cpu in an entry. When the hashtable overflows, all pending hits
146-
* are accounted to their corresponding profile buffer slots with
147-
* atomic_add() and the hashtable emptied. As numerous pending hits
148-
* may be accounted to a profile buffer slot in a hashtable entry,
149-
* this amortizes a number of atomic profile buffer increments likely
150-
* to be far larger than the number of entries in the hashtable,
151-
* particularly given that the number of distinct profile buffer
152-
* positions to which hits are accounted during short intervals (e.g.
153-
* several seconds) is usually very small. Exclusion from buffer
154-
* flipping is provided by interrupt disablement (note that for
155-
* SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from
156-
* process context).
157-
* The hash function is meant to be lightweight as opposed to strong,
158-
* and was vaguely inspired by ppc64 firmware-supported inverted
159-
* pagetable hash functions, but uses a full hashtable full of finite
160-
* collision chains, not just pairs of them.
161-
*
162-
* -- nyc
163-
*/
164-
static void __profile_flip_buffers(void *unused)
165-
{
166-
int cpu = smp_processor_id();
167-
168-
per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu);
169-
}
170-
171-
static void profile_flip_buffers(void)
172-
{
173-
int i, j, cpu;
174-
175-
mutex_lock(&profile_flip_mutex);
176-
j = per_cpu(cpu_profile_flip, get_cpu());
177-
put_cpu();
178-
on_each_cpu(__profile_flip_buffers, NULL, 1);
179-
for_each_online_cpu(cpu) {
180-
struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j];
181-
for (i = 0; i < NR_PROFILE_HIT; ++i) {
182-
if (!hits[i].hits) {
183-
if (hits[i].pc)
184-
hits[i].pc = 0;
185-
continue;
186-
}
187-
atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
188-
hits[i].hits = hits[i].pc = 0;
189-
}
190-
}
191-
mutex_unlock(&profile_flip_mutex);
192-
}
193-
194-
static void profile_discard_flip_buffers(void)
195-
{
196-
int i, cpu;
197-
198-
mutex_lock(&profile_flip_mutex);
199-
i = per_cpu(cpu_profile_flip, get_cpu());
200-
put_cpu();
201-
on_each_cpu(__profile_flip_buffers, NULL, 1);
202-
for_each_online_cpu(cpu) {
203-
struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i];
204-
memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit));
205-
}
206-
mutex_unlock(&profile_flip_mutex);
207-
}
208-
209-
static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
210-
{
211-
unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
212-
int i, j, cpu;
213-
struct profile_hit *hits;
214-
215-
pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
216-
i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
217-
secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
218-
cpu = get_cpu();
219-
hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
220-
if (!hits) {
221-
put_cpu();
222-
return;
223-
}
224-
/*
225-
* We buffer the global profiler buffer into a per-CPU
226-
* queue and thus reduce the number of global (and possibly
227-
* NUMA-alien) accesses. The write-queue is self-coalescing:
228-
*/
229-
local_irq_save(flags);
230-
do {
231-
for (j = 0; j < PROFILE_GRPSZ; ++j) {
232-
if (hits[i + j].pc == pc) {
233-
hits[i + j].hits += nr_hits;
234-
goto out;
235-
} else if (!hits[i + j].hits) {
236-
hits[i + j].pc = pc;
237-
hits[i + j].hits = nr_hits;
238-
goto out;
239-
}
240-
}
241-
i = (i + secondary) & (NR_PROFILE_HIT - 1);
242-
} while (i != primary);
243-
244-
/*
245-
* Add the current hit(s) and flush the write-queue out
246-
* to the global buffer:
247-
*/
248-
atomic_add(nr_hits, &prof_buffer[pc]);
249-
for (i = 0; i < NR_PROFILE_HIT; ++i) {
250-
atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
251-
hits[i].pc = hits[i].hits = 0;
252-
}
253-
out:
254-
local_irq_restore(flags);
255-
put_cpu();
256-
}
257-
258-
static int profile_dead_cpu(unsigned int cpu)
259-
{
260-
struct page *page;
261-
int i;
262-
263-
for (i = 0; i < 2; i++) {
264-
if (per_cpu(cpu_profile_hits, cpu)[i]) {
265-
page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]);
266-
per_cpu(cpu_profile_hits, cpu)[i] = NULL;
267-
__free_page(page);
268-
}
269-
}
270-
return 0;
271-
}
272-
273-
static int profile_prepare_cpu(unsigned int cpu)
274-
{
275-
int i, node = cpu_to_mem(cpu);
276-
struct page *page;
277-
278-
per_cpu(cpu_profile_flip, cpu) = 0;
279-
280-
for (i = 0; i < 2; i++) {
281-
if (per_cpu(cpu_profile_hits, cpu)[i])
282-
continue;
283-
284-
page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
285-
if (!page) {
286-
profile_dead_cpu(cpu);
287-
return -ENOMEM;
288-
}
289-
per_cpu(cpu_profile_hits, cpu)[i] = page_address(page);
290-
291-
}
292-
return 0;
293-
}
294-
295-
#else /* !CONFIG_SMP */
296-
#define profile_flip_buffers() do { } while (0)
297-
#define profile_discard_flip_buffers() do { } while (0)
298-
299132
static void do_profile_hits(int type, void *__pc, unsigned int nr_hits)
300133
{
301134
unsigned long pc;
302135
pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift;
303-
atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]);
136+
if (pc < prof_len)
137+
atomic_add(nr_hits, &prof_buffer[pc]);
304138
}
305-
#endif /* !CONFIG_SMP */
306139

307140
void profile_hits(int type, void *__pc, unsigned int nr_hits)
308141
{
@@ -340,7 +173,6 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos)
340173
char *pnt;
341174
unsigned long sample_step = 1UL << prof_shift;
342175

343-
profile_flip_buffers();
344176
if (p >= (prof_len+1)*sizeof(unsigned int))
345177
return 0;
346178
if (count > (prof_len+1)*sizeof(unsigned int) - p)
@@ -386,7 +218,6 @@ static ssize_t write_profile(struct file *file, const char __user *buf,
386218
return -EINVAL;
387219
}
388220
#endif
389-
profile_discard_flip_buffers();
390221
memset(prof_buffer, 0, prof_len * sizeof(atomic_t));
391222
return count;
392223
}
@@ -404,20 +235,10 @@ int __ref create_proc_profile(void)
404235

405236
if (!prof_on)
406237
return 0;
407-
#ifdef CONFIG_SMP
408-
err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE",
409-
profile_prepare_cpu, profile_dead_cpu);
410-
if (err)
411-
return err;
412-
#endif
413238
entry = proc_create("profile", S_IWUSR | S_IRUGO,
414239
NULL, &profile_proc_ops);
415240
if (entry)
416241
proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t));
417-
#ifdef CONFIG_SMP
418-
else
419-
cpuhp_remove_state(CPUHP_PROFILE_PREPARE);
420-
#endif
421242
return err;
422243
}
423244
subsys_initcall(create_proc_profile);

0 commit comments

Comments
 (0)