From e9d848cb65d5f6f7731d12bd1b6d994bfdbcc94f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 10 May 2016 11:26:24 -0300 Subject: perf diff: Fix duplicated output column The commit b97511c5bc94 ("perf tools: Add overhead/overhead_children keys defaults via string") moved initialization of column headers but it missed to check the sort__mode. As 'perf diff' doesn't call perf_hpp__init(), the setup_overhead() also should not be called. Before: # Baseline Delta Children Overhead Shared Object Symbol # ........ ....... ........ ........ ................... ....................... # 28.48% -28.47% 28.48% 28.48% [kernel.vmlinux ] [k] intel_idle 11.51% -11.47% 11.51% 11.51% libxul.so [.] 0x0000000001a360f7 3.49% -3.49% 3.49% 3.49% [kernel.vmlinux] [k] generic_exec_single 2.91% -2.89% 2.91% 2.91% libdbus-1.so.3.8.11 [.] 0x000000000000cdc2 2.86% -2.85% 2.86% 2.86% libxcb.so.1.1.0 [.] 0x000000000000c890 2.44% -2.39% 2.44% 2.44% [kernel.vmlinux] [k] perf_event_aux_ctx After: # Baseline Delta Shared Object Symbol # ........ ....... ................... ....................... # 28.48% -28.47% [kernel.vmlinux] [k] intel_idle 11.51% -11.47% libxul.so [.] 0x0000000001a360f7 3.49% -3.49% [kernel.vmlinux] [k] generic_exec_single 2.91% -2.89% libdbus-1.so.3.8.11 [.] 0x000000000000cdc2 2.86% -2.85% libxcb.so.1.1.0 [.] 0x000000000000c890 2.44% -2.39% [kernel.vmlinux] [k] perf_event_aux_ctx Signed-off-by: Namhyung Kim Signed-off-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: # 4.5+ Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: b97511c5bc94 ("perf tools: Add overhead/overhead_children keys defaults via string") Link: http://lkml.kernel.org/r/1462890384-12486-2-git-send-email-acme@kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/sort.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 47966a1618c7..f5ba111cd9fb 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2445,6 +2445,9 @@ static char *prefix_if_not_in(const char *pre, char *str) static char *setup_overhead(char *keys) { + if (sort__mode == SORT_MODE__DIFF) + return keys; + keys = prefix_if_not_in("overhead", keys); if (symbol_conf.cumulate_callchain) -- cgit v1.2.3 From ec336c879c3b422d2876085be1cbb110e44dc0de Mon Sep 17 00:00:00 2001 From: hchrzani Date: Mon, 9 May 2016 09:36:59 +0200 Subject: perf/x86/intel/uncore: Fix CHA registers configuration procedure for Knights Landing platform CHA events in Knights Landing platform require programming filter registers properly. Remote node, local node and NonNearMemCachable bits should be set to 1 at all times. Signed-off-by: Hubert Chrzaniuk Signed-off-by: Lawrence F Meadows Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: bp@suse.de Cc: harish.chegondi@intel.com Cc: hpa@zytor.com Cc: izumi.taku@jp.fujitsu.com Cc: kan.liang@intel.com Cc: lukasz.anaczkowski@intel.com Cc: vthakkar1994@gmail.com Fixes: 77af0037de0a ('perf/x86/intel/uncore: Add Knights Landing uncore PMU support') Link: http://lkml.kernel.org/r/1462779419-17115-2-git-send-email-hubert.chrzaniuk@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snbep.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index ab2bcaaebe38..b2625867ebd1 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -219,6 +219,9 @@ #define KNL_CHA_MSR_PMON_BOX_FILTER_TID 0x1ff #define KNL_CHA_MSR_PMON_BOX_FILTER_STATE (7 << 18) #define KNL_CHA_MSR_PMON_BOX_FILTER_OP (0xfffffe2aULL << 32) +#define KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE (0x1ULL << 32) +#define KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE (0x1ULL << 33) +#define KNL_CHA_MSR_PMON_BOX_FILTER_NNC (0x1ULL << 37) /* KNL EDC/MC UCLK */ #define KNL_UCLK_MSR_PMON_CTR0_LOW 0x400 @@ -1902,6 +1905,10 @@ static int knl_cha_hw_config(struct intel_uncore_box *box, reg1->reg = HSWEP_C0_MSR_PMON_BOX_FILTER0 + KNL_CHA_MSR_OFFSET * box->pmu->pmu_idx; reg1->config = event->attr.config1 & knl_cha_filter_mask(idx); + + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_REMOTE_NODE; + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_LOCAL_NODE; + reg1->config |= KNL_CHA_MSR_PMON_BOX_FILTER_NNC; reg1->idx = idx; } return 0; -- cgit v1.2.3 From 3c3116b745c08dc76ccbed7223c4edddcfd9a186 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 May 2016 14:16:54 +0200 Subject: perf/x86/msr: Fix SMI overflow We compute 'delta' and properly sign extend it and then ignore it and recompute the raw value, loosing the sign extention. Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: kan.liang@intel.com Cc: linux-kernel@vger.kernel.org Cc: luto@kernel.org Cc: ray.huang@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/msr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index ec863b9a9f78..8bef19f098d4 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -166,7 +166,7 @@ again: if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) delta = sign_extend64(delta, 31); - local64_add(now - prev, &event->count); + local64_add(delta, &event->count); } static void msr_event_start(struct perf_event *event, int flags) -- cgit v1.2.3 From 6d6f2833bfbf296101f9f085e10488aef2601ba5 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 11 May 2016 16:51:51 +0300 Subject: perf/x86: Fix undefined shift on 32-bit kernels Jim reported: UBSAN: Undefined behaviour in arch/x86/events/intel/core.c:3708:12 shift exponent 35 is too large for 32-bit type 'long unsigned int' The use of 'unsigned long' type obviously is not correct here, make it 'unsigned long long' instead. Reported-by: Jim Cromie Signed-off-by: Andrey Ryabinin Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: H. Peter Anvin Cc: Imre Palik Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 2c33645d366d ("perf/x86: Honor the architectural performance monitoring version") Link: http://lkml.kernel.org/r/1462974711-10037-1-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a6fd4dbcf820..5210eaa4aa62 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3708,7 +3708,7 @@ __init int intel_pmu_init(void) c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; } c->idxmsk64 &= - ~(~0UL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); + ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed)); c->weight = hweight64(c->idxmsk64); } } -- cgit v1.2.3 From ab92b232ae05c382c3df0e3d6a5c6d16b639ac8c Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 10 May 2016 16:18:32 +0300 Subject: perf/x86/intel/pt: Generate PMI in the STOP region as well Currently, the PT driver always sets the PMI bit one region (page) before the STOP region so that we can wake up the consumer before we run out of room in the buffer and have to disable the event. However, we also need an interrupt in the last output region, so that we actually get to disable the event (if no more room from new data is available at that point), otherwise hardware just quietly refuses to start, but the event is scheduled in and we end up losing trace data till the event gets removed. For a cpu-wide event it is even worse since there may not be any re-scheduling at all and no chance for the ring buffer code to notice that its buffer is filled up and the event needs to be disabled (so that the consumer can re-enable it when it finishes reading the data out). In other words, all the trace data will be lost after the buffer gets filled up. This patch makes PT also generate a PMI when the last output region is full. Reported-by: Markus Metzger Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/1462886313-13660-2-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 09a77dbc73c9..7377814de30b 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -709,6 +709,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, /* clear STOP and INT from current entry */ buf->topa_index[buf->stop_pos]->stop = 0; + buf->topa_index[buf->stop_pos]->intr = 0; buf->topa_index[buf->intr_pos]->intr = 0; /* how many pages till the STOP marker */ @@ -733,6 +734,7 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, buf->intr_pos = idx; buf->topa_index[buf->stop_pos]->stop = 1; + buf->topa_index[buf->stop_pos]->intr = 1; buf->topa_index[buf->intr_pos]->intr = 1; return 0; -- cgit v1.2.3 From 9f448cd3cbcec8995935e60b27802ae56aac8cc0 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 10 May 2016 16:18:33 +0300 Subject: perf/core: Disable the event on a truncated AUX record When the PMU driver reports a truncated AUX record, it effectively means that there is no more usable room in the event's AUX buffer (even though there may still be some room, so that perf_aux_output_begin() doesn't take action). At this point the consumer still has to be woken up and the event has to be disabled, otherwise the event will just keep spinning between perf_aux_output_begin() and perf_aux_output_end() until its context gets unscheduled. Again, for cpu-wide events this means never, so once in this condition, they will be forever losing data. Fix this by disabling the event and waking up the consumer in case of a truncated AUX record. Reported-by: Markus Metzger Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/1462886313-13660-3-git-send-email-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- kernel/events/ring_buffer.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index c61f0cbd308b..7611d0f66cf8 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -347,6 +347,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, bool truncated) { struct ring_buffer *rb = handle->rb; + bool wakeup = truncated; unsigned long aux_head; u64 flags = 0; @@ -375,9 +376,16 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, aux_head = rb->user_page->aux_head = local_read(&rb->aux_head); if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) { - perf_output_wakeup(handle); + wakeup = true; local_add(rb->aux_watermark, &rb->aux_wakeup); } + + if (wakeup) { + if (truncated) + handle->event->pending_disable = 1; + perf_output_wakeup(handle); + } + handle->event = NULL; local_set(&rb->aux_nest, 0); -- cgit v1.2.3 From 2515e614834f362eed36fb5ea5d359d94a525263 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 11:32:15 -0300 Subject: perf tools: Use readdir() instead of deprecated readdir_r() The readdir() function is thread safe as long as just one thread uses a DIR, which is the case when synthesizing events for pre-existing threads by traversing /proc, so, to avoid breaking the build with glibc-2.23.90 (upcoming 2.24), use it instead of readdir_r(). See: http://man7.org/linux/man-pages/man3/readdir.3.html "However, in modern implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external synchronization is still preferable to the use of the deprecated readdir_r(3) function." Noticed while building on a Fedora Rawhide docker container. CC /tmp/build/perf/util/event.o util/event.c: In function '__event__synthesize_thread': util/event.c:466:2: error: 'readdir_r' is deprecated [-Werror=deprecated-declarations] while (!readdir_r(tasks, &dirent, &next) && next) { ^~~~~ In file included from /usr/include/features.h:368:0, from /usr/include/stdint.h:25, from /usr/lib/gcc/x86_64-redhat-linux/6.0.0/include/stdint.h:9, from /git/linux/tools/include/linux/types.h:6, from util/event.c:1: /usr/include/dirent.h:189:12: note: declared here Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-i1vj7nyjp2p750rirxgrfd3c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/event.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index dad55d04ffdd..edcf4ed4e99c 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -433,7 +433,7 @@ static int __event__synthesize_thread(union perf_event *comm_event, { char filename[PATH_MAX]; DIR *tasks; - struct dirent dirent, *next; + struct dirent *dirent; pid_t tgid, ppid; int rc = 0; @@ -462,11 +462,11 @@ static int __event__synthesize_thread(union perf_event *comm_event, return 0; } - while (!readdir_r(tasks, &dirent, &next) && next) { + while ((dirent = readdir(tasks)) != NULL) { char *end; pid_t _pid; - _pid = strtol(dirent.d_name, &end, 10); + _pid = strtol(dirent->d_name, &end, 10); if (*end) continue; @@ -575,7 +575,7 @@ int perf_event__synthesize_threads(struct perf_tool *tool, { DIR *proc; char proc_path[PATH_MAX]; - struct dirent dirent, *next; + struct dirent *dirent; union perf_event *comm_event, *mmap_event, *fork_event; int err = -1; @@ -600,9 +600,9 @@ int perf_event__synthesize_threads(struct perf_tool *tool, if (proc == NULL) goto out_free_fork; - while (!readdir_r(proc, &dirent, &next) && next) { + while ((dirent = readdir(proc)) != NULL) { char *end; - pid_t pid = strtol(dirent.d_name, &end, 10); + pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ continue; -- cgit v1.2.3 From 9a5f3bf332bbd42625b71553ca9ffdffa9fc4785 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 11:25:59 -0300 Subject: perf script: Use readdir() instead of deprecated readdir_r() The readdir() function is thread safe as long as just one thread uses a DIR, which is the case in 'perf script', so, to avoid breaking the build with glibc-2.23.90 (upcoming 2.24), use it instead of readdir_r(). See: http://man7.org/linux/man-pages/man3/readdir.3.html "However, in modern implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external synchronization is still preferable to the use of the deprecated readdir_r(3) function." Noticed while building on a Fedora Rawhide docker container. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-mt3xz7n2hl49ni2vx7kuq74g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 70 ++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3770c3dffe5e..52826696c852 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1415,21 +1415,19 @@ static int is_directory(const char *base_path, const struct dirent *dent) return S_ISDIR(st.st_mode); } -#define for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next)\ - while (!readdir_r(scripts_dir, &lang_dirent, &lang_next) && \ - lang_next) \ - if ((lang_dirent.d_type == DT_DIR || \ - (lang_dirent.d_type == DT_UNKNOWN && \ - is_directory(scripts_path, &lang_dirent))) && \ - (strcmp(lang_dirent.d_name, ".")) && \ - (strcmp(lang_dirent.d_name, ".."))) - -#define for_each_script(lang_path, lang_dir, script_dirent, script_next)\ - while (!readdir_r(lang_dir, &script_dirent, &script_next) && \ - script_next) \ - if (script_dirent.d_type != DT_DIR && \ - (script_dirent.d_type != DT_UNKNOWN || \ - !is_directory(lang_path, &script_dirent))) +#define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ + while ((lang_dirent = readdir(scripts_dir)) != NULL) \ + if ((lang_dirent->d_type == DT_DIR || \ + (lang_dirent->d_type == DT_UNKNOWN && \ + is_directory(scripts_path, lang_dirent))) && \ + (strcmp(lang_dirent->d_name, ".")) && \ + (strcmp(lang_dirent->d_name, ".."))) + +#define for_each_script(lang_path, lang_dir, script_dirent) \ + while ((script_dirent = readdir(lang_dir)) != NULL) \ + if (script_dirent->d_type != DT_DIR && \ + (script_dirent->d_type != DT_UNKNOWN || \ + !is_directory(lang_path, script_dirent))) #define RECORD_SUFFIX "-record" @@ -1575,7 +1573,7 @@ static int list_available_scripts(const struct option *opt __maybe_unused, const char *s __maybe_unused, int unset __maybe_unused) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; char script_path[MAXPATHLEN]; @@ -1590,19 +1588,19 @@ static int list_available_scripts(const struct option *opt __maybe_unused, if (!scripts_dir) return -1; - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { - script_root = get_script_root(&script_dirent, REPORT_SUFFIX); + for_each_script(lang_path, lang_dir, script_dirent) { + script_root = get_script_root(script_dirent, REPORT_SUFFIX); if (script_root) { desc = script_desc__findnew(script_root); snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent.d_name); + lang_path, script_dirent->d_name); read_script_info(desc, script_path); free(script_root); } @@ -1690,7 +1688,7 @@ static int check_ev_match(char *dir_name, char *scriptname, */ int find_scripts(char **scripts_array, char **scripts_path_array) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; struct perf_session *session; @@ -1713,9 +1711,9 @@ int find_scripts(char **scripts_array, char **scripts_path_array) return -1; } - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); #ifdef NO_LIBPERL if (strstr(lang_path, "perl")) continue; @@ -1729,16 +1727,16 @@ int find_scripts(char **scripts_array, char **scripts_path_array) if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { + for_each_script(lang_path, lang_dir, script_dirent) { /* Skip those real time scripts: xxxtop.p[yl] */ - if (strstr(script_dirent.d_name, "top.")) + if (strstr(script_dirent->d_name, "top.")) continue; sprintf(scripts_path_array[i], "%s/%s", lang_path, - script_dirent.d_name); - temp = strchr(script_dirent.d_name, '.'); + script_dirent->d_name); + temp = strchr(script_dirent->d_name, '.'); snprintf(scripts_array[i], - (temp - script_dirent.d_name) + 1, - "%s", script_dirent.d_name); + (temp - script_dirent->d_name) + 1, + "%s", script_dirent->d_name); if (check_ev_match(lang_path, scripts_array[i], session)) @@ -1756,7 +1754,7 @@ int find_scripts(char **scripts_array, char **scripts_path_array) static char *get_script_path(const char *script_root, const char *suffix) { - struct dirent *script_next, *lang_next, script_dirent, lang_dirent; + struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN]; char script_path[MAXPATHLEN]; DIR *scripts_dir, *lang_dir; @@ -1769,21 +1767,21 @@ static char *get_script_path(const char *script_root, const char *suffix) if (!scripts_dir) return NULL; - for_each_lang(scripts_path, scripts_dir, lang_dirent, lang_next) { + for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, - lang_dirent.d_name); + lang_dirent->d_name); lang_dir = opendir(lang_path); if (!lang_dir) continue; - for_each_script(lang_path, lang_dir, script_dirent, script_next) { - __script_root = get_script_root(&script_dirent, suffix); + for_each_script(lang_path, lang_dir, script_dirent) { + __script_root = get_script_root(script_dirent, suffix); if (__script_root && !strcmp(script_root, __script_root)) { free(__script_root); closedir(lang_dir); closedir(scripts_dir); snprintf(script_path, MAXPATHLEN, "%s/%s", - lang_path, script_dirent.d_name); + lang_path, script_dirent->d_name); return strdup(script_path); } free(__script_root); -- cgit v1.2.3 From 7839b9f32e45075d9eb48da8480faef3dbd019f0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 11:31:24 -0300 Subject: perf thread_map: Use readdir() instead of deprecated readdir_r() The readdir() function is thread safe as long as just one thread uses a DIR, which is the case in thread_map, so, to avoid breaking the build with glibc-2.23.90 (upcoming 2.24), use it instead of readdir_r(). See: http://man7.org/linux/man-pages/man3/readdir.3.html "However, in modern implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external synchronization is still preferable to the use of the deprecated readdir_r(3) function." Noticed while building on a Fedora Rawhide docker container. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-del8h2a0f40z75j4r42l96l0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread_map.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 08afc6909953..267112b4e3db 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -94,7 +94,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) DIR *proc; int max_threads = 32, items, i; char path[256]; - struct dirent dirent, *next, **namelist = NULL; + struct dirent *dirent, **namelist = NULL; struct thread_map *threads = thread_map__alloc(max_threads); if (threads == NULL) @@ -107,16 +107,16 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) threads->nr = 0; atomic_set(&threads->refcnt, 1); - while (!readdir_r(proc, &dirent, &next) && next) { + while ((dirent = readdir(proc)) != NULL) { char *end; bool grow = false; struct stat st; - pid_t pid = strtol(dirent.d_name, &end, 10); + pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ continue; - snprintf(path, sizeof(path), "/proc/%s", dirent.d_name); + snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); if (stat(path, &st) != 0) continue; -- cgit v1.2.3 From 22a9f41b555673e7499b97acf3ffb07bf0af31ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 11:53:02 -0300 Subject: perf tools: Use readdir() instead of deprecated readdir_r() The readdir() function is thread safe as long as just one thread uses a DIR, which is the case when parsing tracepoint event definitions, to avoid breaking the build with glibc-2.23.90 (upcoming 2.24), use it instead of readdir_r(). See: http://man7.org/linux/man-pages/man3/readdir.3.html "However, in modern implementations (including the glibc implementation), concurrent calls to readdir() that specify different directory streams are thread-safe. In cases where multiple threads must read from the same directory stream, using readdir() with external synchronization is still preferable to the use of the deprecated readdir_r(3) function." Noticed while building on a Fedora Rawhide docker container. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-wddn49r6bz6wq4ee3dxbl7lo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 60 +++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4c19d5e79d8c..bcbc983d4b12 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -138,11 +138,11 @@ struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { #define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE) #define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT) -#define for_each_subsystem(sys_dir, sys_dirent, sys_next) \ - while (!readdir_r(sys_dir, &sys_dirent, &sys_next) && sys_next) \ - if (sys_dirent.d_type == DT_DIR && \ - (strcmp(sys_dirent.d_name, ".")) && \ - (strcmp(sys_dirent.d_name, ".."))) +#define for_each_subsystem(sys_dir, sys_dirent) \ + while ((sys_dirent = readdir(sys_dir)) != NULL) \ + if (sys_dirent->d_type == DT_DIR && \ + (strcmp(sys_dirent->d_name, ".")) && \ + (strcmp(sys_dirent->d_name, ".."))) static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) { @@ -159,12 +159,12 @@ static int tp_event_has_id(struct dirent *sys_dir, struct dirent *evt_dir) return 0; } -#define for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) \ - while (!readdir_r(evt_dir, &evt_dirent, &evt_next) && evt_next) \ - if (evt_dirent.d_type == DT_DIR && \ - (strcmp(evt_dirent.d_name, ".")) && \ - (strcmp(evt_dirent.d_name, "..")) && \ - (!tp_event_has_id(&sys_dirent, &evt_dirent))) +#define for_each_event(sys_dirent, evt_dir, evt_dirent) \ + while ((evt_dirent = readdir(evt_dir)) != NULL) \ + if (evt_dirent->d_type == DT_DIR && \ + (strcmp(evt_dirent->d_name, ".")) && \ + (strcmp(evt_dirent->d_name, "..")) && \ + (!tp_event_has_id(sys_dirent, evt_dirent))) #define MAX_EVENT_LENGTH 512 @@ -173,7 +173,7 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) { struct tracepoint_path *path = NULL; DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char id_buf[24]; int fd; u64 id; @@ -184,18 +184,18 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) if (!sys_dir) return NULL; - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, - evt_dirent.d_name); + evt_dirent->d_name); fd = open(evt_path, O_RDONLY); if (fd < 0) continue; @@ -220,9 +220,9 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) free(path); return NULL; } - strncpy(path->system, sys_dirent.d_name, + strncpy(path->system, sys_dirent->d_name, MAX_EVENT_LENGTH); - strncpy(path->name, evt_dirent.d_name, + strncpy(path->name, evt_dirent->d_name, MAX_EVENT_LENGTH); return path; } @@ -1812,7 +1812,7 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only) { DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; char **evt_list = NULL; @@ -1830,20 +1830,20 @@ restart: goto out_close_sys_dir; } - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { if (subsys_glob != NULL && - !strglobmatch(sys_dirent.d_name, subsys_glob)) + !strglobmatch(sys_dirent->d_name, subsys_glob)) continue; snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { if (event_glob != NULL && - !strglobmatch(evt_dirent.d_name, event_glob)) + !strglobmatch(evt_dirent->d_name, event_glob)) continue; if (!evt_num_known) { @@ -1852,7 +1852,7 @@ restart: } snprintf(evt_path, MAXPATHLEN, "%s:%s", - sys_dirent.d_name, evt_dirent.d_name); + sys_dirent->d_name, evt_dirent->d_name); evt_list[evt_i] = strdup(evt_path); if (evt_list[evt_i] == NULL) @@ -1905,7 +1905,7 @@ out_close_sys_dir: int is_valid_tracepoint(const char *event_string) { DIR *sys_dir, *evt_dir; - struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; + struct dirent *sys_dirent, *evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; @@ -1913,17 +1913,17 @@ int is_valid_tracepoint(const char *event_string) if (!sys_dir) return 0; - for_each_subsystem(sys_dir, sys_dirent, sys_next) { + for_each_subsystem(sys_dir, sys_dirent) { snprintf(dir_path, MAXPATHLEN, "%s/%s", tracing_events_path, - sys_dirent.d_name); + sys_dirent->d_name); evt_dir = opendir(dir_path); if (!evt_dir) continue; - for_each_event(sys_dirent, evt_dir, evt_dirent, evt_next) { + for_each_event(sys_dirent, evt_dir, evt_dirent) { snprintf(evt_path, MAXPATHLEN, "%s:%s", - sys_dirent.d_name, evt_dirent.d_name); + sys_dirent->d_name, evt_dirent->d_name); if (!strcmp(evt_path, event_string)) { closedir(evt_dir); closedir(sys_dir); -- cgit v1.2.3 From 62aa0e177d278462145a29c30d3c8501ae57e200 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 8 Apr 2016 12:04:29 -0300 Subject: perf dwarf: Guard !x86_64 definitions under #ifdef else clause To fix the build on Fedora Rawhide (gcc 6.0.0 20160311 (Red Hat 6.0.0-0.17): CC /tmp/build/perf/arch/x86/util/dwarf-regs.o arch/x86/util/dwarf-regs.c:66:36: error: 'x86_32_regoffset_table' defined but not used [-Werror=unused-const-variable=] static const struct pt_regs_offset x86_32_regoffset_table[] = { ^~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-fghuksc1u8ln82bof4lwcj0o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/dwarf-regs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c index 9223c164e545..1f86ee8fb831 100644 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ b/tools/perf/arch/x86/util/dwarf-regs.c @@ -63,6 +63,8 @@ struct pt_regs_offset { # define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} #endif +/* TODO: switching by dwarf address size */ +#ifndef __x86_64__ static const struct pt_regs_offset x86_32_regoffset_table[] = { REG_OFFSET_NAME_32("%ax", eax), REG_OFFSET_NAME_32("%cx", ecx), @@ -75,6 +77,8 @@ static const struct pt_regs_offset x86_32_regoffset_table[] = { REG_OFFSET_END, }; +#define regoffset_table x86_32_regoffset_table +#else static const struct pt_regs_offset x86_64_regoffset_table[] = { REG_OFFSET_NAME_64("%ax", rax), REG_OFFSET_NAME_64("%dx", rdx), @@ -95,11 +99,7 @@ static const struct pt_regs_offset x86_64_regoffset_table[] = { REG_OFFSET_END, }; -/* TODO: switching by dwarf address size */ -#ifdef __x86_64__ #define regoffset_table x86_64_regoffset_table -#else -#define regoffset_table x86_32_regoffset_table #endif /* Minus 1 for the ending REG_OFFSET_END */ -- cgit v1.2.3 From 4924734570a073049450b11f7c59ce5992b03343 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Apr 2016 11:33:41 -0300 Subject: perf probe: Check if dwarf_getlocations() is available If not, tell the user that: config/Makefile:273: Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157 And return -ENOTSUPP in die_get_var_range(), failing features that need it, like the one pointed out above. This fixes the build on older systems, such as Ubuntu 12.04.5. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Vinson Lee Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-9l7luqkq4gfnx7vrklkq4obs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 2 ++ tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-dwarf_getlocations.c | 12 ++++++++++++ tools/perf/config/Makefile | 6 ++++++ tools/perf/util/dwarf-aux.c | 9 +++++++++ 6 files changed, 38 insertions(+) create mode 100644 tools/build/feature/test-dwarf_getlocations.c diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 6b7707270aa3..9f878619077a 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -30,6 +30,7 @@ endef FEATURE_TESTS_BASIC := \ backtrace \ dwarf \ + dwarf_getlocations \ fortify-source \ sync-compare-and-swap \ glibc \ @@ -78,6 +79,7 @@ endif FEATURE_DISPLAY ?= \ dwarf \ + dwarf_getlocations \ glibc \ gtk2 \ libaudit \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index c5f4c417428d..4ae94dbfdab9 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -3,6 +3,7 @@ FILES= \ test-backtrace.bin \ test-bionic.bin \ test-dwarf.bin \ + test-dwarf_getlocations.bin \ test-fortify-source.bin \ test-sync-compare-and-swap.bin \ test-glibc.bin \ @@ -82,6 +83,9 @@ endif $(OUTPUT)test-dwarf.bin: $(BUILD) $(DWARFLIBS) +$(OUTPUT)test-dwarf_getlocations.bin: + $(BUILD) $(DWARFLIBS) + $(OUTPUT)test-libelf-mmap.bin: $(BUILD) -lelf diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index e499a36c1e4a..a282e8cb84f3 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -41,6 +41,10 @@ # include "test-dwarf.c" #undef main +#define main main_test_dwarf_getlocations +# include "test-dwarf_getlocations.c" +#undef main + #define main main_test_libelf_getphdrnum # include "test-libelf-getphdrnum.c" #undef main @@ -143,6 +147,7 @@ int main(int argc, char *argv[]) main_test_libelf_mmap(); main_test_glibc(); main_test_dwarf(); + main_test_dwarf_getlocations(); main_test_libelf_getphdrnum(); main_test_libunwind(); main_test_libaudit(); diff --git a/tools/build/feature/test-dwarf_getlocations.c b/tools/build/feature/test-dwarf_getlocations.c new file mode 100644 index 000000000000..70162699dd43 --- /dev/null +++ b/tools/build/feature/test-dwarf_getlocations.c @@ -0,0 +1,12 @@ +#include +#include + +int main(void) +{ + Dwarf_Addr base, start, end; + Dwarf_Attribute attr; + Dwarf_Op *op; + size_t nops; + ptrdiff_t offset = 0; + return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops); +} diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index f7d7f5a1cad5..6f8f6430f2bf 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -268,6 +268,12 @@ else ifneq ($(feature-dwarf), 1) msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev); NO_DWARF := 1 + else + ifneq ($(feature-dwarf_getlocations), 1) + msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157); + else + CFLAGS += -DHAVE_DWARF_GETLOCATIONS + endif # dwarf_getlocations endif # Dwarf support endif # libelf support endif # NO_LIBELF diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 577e600c8eb1..aea189b41cc8 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -959,6 +959,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return 0; } +#ifdef HAVE_DWARF_GETLOCATIONS /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE * @sp_die: a subprogram DIE @@ -1080,3 +1081,11 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) return ret; } +#else +int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, + Dwarf_Die *vr_die __maybe_unused, + struct strbuf *buf __maybe_unused) +{ + return -ENOTSUP; +} +#endif -- cgit v1.2.3 From 106b816cb46ebd87408b4ed99a2e16203114daa6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Wed, 11 May 2016 15:09:36 -0400 Subject: tools lib traceevent: Do not reassign parg after collapse_tree() At the end of process_filter(), collapse_tree() was changed to update the parg parameter, but the reassignment after the call wasn't removed. What happens is that the "current_op" gets modified and freed and parg is assigned to the new allocated argument. But after the call to collapse_tree(), parg is assigned again to the just freed "current_op", and this causes the tool to crash. The current_op variable must also be assigned to NULL in case of error, otherwise it will cause it to be free()ed twice. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: stable@vger.kernel.org # 3.14+ Fixes: 42d6194d133c ("tools lib traceevent: Refactor process_filter()") Link: http://lkml.kernel.org/r/20160511150936.678c18a1@gandalf.local.home Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/parse-filter.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 0144b3d1bb77..88cccea3ca99 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1164,11 +1164,11 @@ process_filter(struct event_format *event, struct filter_arg **parg, current_op = current_exp; ret = collapse_tree(current_op, parg, error_str); + /* collapse_tree() may free current_op, and updates parg accordingly */ + current_op = NULL; if (ret < 0) goto fail; - *parg = current_op; - free(token); return 0; -- cgit v1.2.3 From 7d173913a6420f2818afeca70b268f064441f69b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 May 2016 15:44:55 -0300 Subject: perf evsel: Improve EPERM error handling in open_strerror() We were showing a hardcoded default value for the kernel.perf_event_paranoid sysctl, now that it became more paranoid (1 -> 2 [1]), this would need to be updated, instead show the current value: [acme@jouet linux]$ perf record ls Error: You may not have permission to collect stats. Consider tweaking /proc/sys/kernel/perf_event_paranoid, which controls use of the performance events system by unprivileged users (without CAP_SYS_ADMIN). The current value is 2: -1: Allow use of (almost) all events by all users >= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK >= 1: Disallow CPU event access by users without CAP_SYS_ADMIN >= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN [acme@jouet linux]$ [1] 0161028b7c8a ("perf/core: Change the default paranoia level to 2") Reported-by: Ingo Molnar Cc: Adrian Hunter Cc: Andy Lutomirski Cc: David Ahern Cc: Jiri Olsa Cc: Kees Cook Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-0gc4rdpg8d025r5not8s8028@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 738ce226002b..a5f339d447cc 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2382,12 +2382,13 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n" "which controls use of the performance events system by\n" "unprivileged users (without CAP_SYS_ADMIN).\n\n" - "The default value is 1:\n\n" + "The current value is %d:\n\n" " -1: Allow use of (almost) all events by all users\n" ">= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK\n" ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n" ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN", - target->system_wide ? "system-wide " : ""); + target->system_wide ? "system-wide " : "", + perf_event_paranoid()); case ENOENT: return scnprintf(msg, size, "The %s event is not supported.", perf_evsel__name(evsel)); -- cgit v1.2.3 From 08094828b711dd32de57e9e3314935e19db71b3d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 May 2016 16:07:47 -0300 Subject: perf evsel: Handle EACCESS + perf_event_paranoid=2 in fallback() Now with the default for the kernel.perf_event_paranoid sysctl being 2 [1] we need to fall back to :u, i.e. to set perf_event_attr.exclude_kernel to 1. Before: [acme@jouet linux]$ perf record usleep 1 Error: You may not have permission to collect stats. Consider tweaking /proc/sys/kernel/perf_event_paranoid, which controls use of the performance events system by unprivileged users (without CAP_SYS_ADMIN). The current value is 2: -1: Allow use of (almost) all events by all users >= 0: Disallow raw tracepoint access by users without CAP_IOC_LOCK >= 1: Disallow CPU event access by users without CAP_SYS_ADMIN >= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN [acme@jouet linux]$ After: [acme@jouet linux]$ perf record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.016 MB perf.data (7 samples) ] [acme@jouet linux]$ perf evlist cycles:u [acme@jouet linux]$ perf evlist -v cycles:u: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 [acme@jouet linux]$ And if the user turns on verbose mode, an explanation will appear: [acme@jouet linux]$ perf record -v usleep 1 Warning: kernel.perf_event_paranoid=2, trying to fall back to excluding kernel samples mmap size 528384B [ perf record: Woken up 1 times to write data ] Looking at the vmlinux_path (8 entries long) Using /lib/modules/4.6.0-rc7+/build/vmlinux for symbols [ perf record: Captured and wrote 0.016 MB perf.data (7 samples) ] [acme@jouet linux]$ [1] 0161028b7c8a ("perf/core: Change the default paranoia level to 2") Reported-by: Ingo Molnar Cc: Adrian Hunter Cc: Andy Lutomirski Cc: David Ahern Cc: Jiri Olsa Cc: Kees Cook Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-b20jmx4dxt5hpaa9t2rroi0o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a5f339d447cc..645dc1828836 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2345,6 +2345,8 @@ out: bool perf_evsel__fallback(struct perf_evsel *evsel, int err, char *msg, size_t msgsize) { + int paranoid; + if ((err == ENOENT || err == ENXIO || err == ENODEV) && evsel->attr.type == PERF_TYPE_HARDWARE && evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) { @@ -2363,6 +2365,22 @@ bool perf_evsel__fallback(struct perf_evsel *evsel, int err, evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK; zfree(&evsel->name); + return true; + } else if (err == EACCES && !evsel->attr.exclude_kernel && + (paranoid = perf_event_paranoid()) > 1) { + const char *name = perf_evsel__name(evsel); + char *new_name; + + if (asprintf(&new_name, "%s%su", name, strchr(name, ':') ? "" : ":") < 0) + return false; + + if (evsel->name) + free(evsel->name); + evsel->name = new_name; + scnprintf(msg, msgsize, +"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid); + evsel->attr.exclude_kernel = 1; + return true; } -- cgit v1.2.3 From 42ef8a78c1f49f53f29f0f3a6f9a5bcbc653233e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 12 May 2016 16:25:18 -0300 Subject: perf stat: Fallback to user only counters when perf_event_paranoid > 1 After 0161028b7c8a ("perf/core: Change the default paranoia level to 2") 'perf stat' fails for users without CAP_SYS_ADMIN, so just use 'perf_evsel__fallback()' to have the same behaviour as 'perf record', i.e. set perf_event_attr.exclude_kernel to 1. Now: [acme@jouet linux]$ perf stat usleep 1 Performance counter stats for 'usleep 1': 0.352536 task-clock:u (msec) # 0.423 CPUs utilized 0 context-switches:u # 0.000 K/sec 0 cpu-migrations:u # 0.000 K/sec 49 page-faults:u # 0.139 M/sec 309,407 cycles:u # 0.878 GHz 243,791 instructions:u # 0.79 insn per cycle 49,622 branches:u # 140.757 M/sec 3,884 branch-misses:u # 7.83% of all branches 0.000834174 seconds time elapsed [acme@jouet linux]$ Reported-by: Ingo Molnar Cc: Adrian Hunter Cc: Andy Lutomirski Cc: David Ahern Cc: Jiri Olsa Cc: Kees Cook Cc: Linus Torvalds Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-b20jmx4dxt5hpaa9t2rroi0o@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 1f19f2f999c8..307e8a1a003c 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -528,6 +528,7 @@ static int __run_perf_stat(int argc, const char **argv) perf_evlist__set_leader(evsel_list); evlist__for_each(evsel_list, counter) { +try_again: if (create_perf_stat_counter(counter) < 0) { /* * PPC returns ENXIO for HW counters until 2.6.37 @@ -544,7 +545,11 @@ static int __run_perf_stat(int argc, const char **argv) if ((counter->leader != counter) || !(counter->leader->nr_members > 1)) continue; - } + } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) { + if (verbose) + ui__warning("%s\n", msg); + goto try_again; + } perf_evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); -- cgit v1.2.3