3
0
mirror of https://github.com/Qortal/Brooklyn.git synced 2025-01-30 14:52:17 +00:00

update and merge

This commit is contained in:
Raziel K. Crowe 2022-04-02 17:07:24 +05:00
parent 0beea36afc
commit 4b3a56d965
121 changed files with 4401 additions and 2729 deletions

View File

@ -12,6 +12,7 @@ help:
@echo ' acpi - ACPI tools'
@echo ' bpf - misc BPF tools'
@echo ' cgroup - cgroup tools'
@echo ' counter - counter tools'
@echo ' cpupower - a tool for all things x86 CPU power'
@echo ' debugging - tools for debugging'
@echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
@ -23,7 +24,6 @@ help:
@echo ' intel-speed-select - Intel Speed Select tool'
@echo ' kvm_stat - top-like utility for displaying kvm statistics'
@echo ' leds - LEDs tools'
@echo ' liblockdep - user-space wrapper for kernel locking-validator'
@echo ' objtool - an ELF object analysis tool'
@echo ' pci - PCI tools'
@echo ' perf - Linux performance measurement and analysis tool'
@ -65,15 +65,12 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
cgroup firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
cgroup counter firewire hv guest bootconfig spi usb virtio vm bpf iio gpio objtool leds wmi pci firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
$(call descend,$@)
liblockdep: FORCE
$(call descend,lib/lockdep)
libapi: FORCE
$(call descend,lib/api)
@ -100,7 +97,7 @@ freefall: FORCE
kvm_stat: FORCE
$(call descend,kvm/$@)
all: acpi cgroup cpupower gpio hv firewire liblockdep \
all: acpi cgroup counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio vm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
@ -112,12 +109,9 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
cgroup_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
cgroup_install counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install pci_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
liblockdep_install:
$(call descend,lib/lockdep,install)
selftests_install:
$(call descend,testing/$(@:_install=),install)
@ -133,8 +127,8 @@ freefall_install:
kvm_stat_install:
$(call descend,kvm/$(@:_install=),install)
install: acpi_install cgroup_install cpupower_install gpio_install \
hv_install firewire_install iio_install liblockdep_install \
install: acpi_install cgroup_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install vm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \
@ -147,12 +141,9 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
cgroup_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
cgroup_clean counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean pci_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
liblockdep_clean:
$(call descend,lib/lockdep,clean)
libapi_clean:
$(call descend,lib/api,clean)
@ -181,10 +172,10 @@ freefall_clean:
build_clean:
$(call descend,build,clean)
clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \
clean: acpi_clean cgroup_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
gpio_clean objtool_clean leds_clean wmi_clean pci_clean firmware_clean debugging_clean \
intel-speed-select_clean tracing_clean

View File

@ -281,6 +281,11 @@ struct kvm_arm_copy_mte_tags {
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 KVM_REG_ARM_FW_REG(3)
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_AVAIL 0
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_AVAIL 1
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3_NOT_REQUIRED 2
/* SVE registers */
#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)

View File

@ -61,27 +61,35 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_PMC4,
PERF_REG_POWERPC_PMC5,
PERF_REG_POWERPC_PMC6,
/* Max regs without the extended regs */
PERF_REG_POWERPC_SDAR,
PERF_REG_POWERPC_SIAR,
/* Max mask value for interrupt regs w/o extended regs */
PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
/* Max mask value for interrupt regs including extended regs */
PERF_REG_EXTENDED_MAX = PERF_REG_POWERPC_SIAR + 1,
};
#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */
#define PERF_EXCLUDE_REG_EXT_300 (7ULL << PERF_REG_POWERPC_MMCR3)
/*
* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
* includes 9 SPRS from MMCR0 to PMC6 excluding the
* unsupported SPRS in PERF_EXCLUDE_REG_EXT_300.
* includes 11 SPRS from MMCR0 to SIAR excluding the
* unsupported SPRS MMCR3, SIER2 and SIER3.
*/
#define PERF_REG_PMU_MASK_300 ((0xfffULL << PERF_REG_POWERPC_MMCR0) - PERF_EXCLUDE_REG_EXT_300)
#define PERF_REG_PMU_MASK_300 \
((1ULL << PERF_REG_POWERPC_MMCR0) | (1ULL << PERF_REG_POWERPC_MMCR1) | \
(1ULL << PERF_REG_POWERPC_MMCR2) | (1ULL << PERF_REG_POWERPC_PMC1) | \
(1ULL << PERF_REG_POWERPC_PMC2) | (1ULL << PERF_REG_POWERPC_PMC3) | \
(1ULL << PERF_REG_POWERPC_PMC4) | (1ULL << PERF_REG_POWERPC_PMC5) | \
(1ULL << PERF_REG_POWERPC_PMC6) | (1ULL << PERF_REG_POWERPC_SDAR) | \
(1ULL << PERF_REG_POWERPC_SIAR))
/*
* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
* includes 12 SPRs from MMCR0 to PMC6.
* includes 14 SPRs from MMCR0 to SIAR.
*/
#define PERF_REG_PMU_MASK_31 (0xfffULL << PERF_REG_POWERPC_MMCR0)
#define PERF_REG_PMU_MASK_31 \
(PERF_REG_PMU_MASK_300 | (1ULL << PERF_REG_POWERPC_MMCR3) | \
(1ULL << PERF_REG_POWERPC_SIER2) | (1ULL << PERF_REG_POWERPC_SIER3))
#define PERF_REG_EXTENDED_MAX (PERF_REG_POWERPC_PMC6 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */

View File

@ -204,7 +204,7 @@
/* FREE! ( 7*32+10) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
@ -277,6 +277,7 @@
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@ -298,6 +299,9 @@
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
@ -313,6 +317,7 @@
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */

View File

@ -476,6 +476,7 @@
#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
#define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b
#define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e
#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
@ -486,6 +487,23 @@
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
#define MSR_AMD_CPPC_CAP2 0xc00102b2
#define MSR_AMD_CPPC_REQ 0xc00102b3
#define MSR_AMD_CPPC_STATUS 0xc00102b4
#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff)
#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff)
#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff)
#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff)
#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0)
#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8)
#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16)
#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24)
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
@ -625,6 +643,8 @@
#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
#define MSR_IA32_XFD 0x000001c4
#define MSR_IA32_XFD_ERR 0x000001c5
#define MSR_IA32_XSS 0x00000da0
#define MSR_IA32_APICBASE 0x0000001b

View File

@ -35,11 +35,7 @@
# define NEED_CMOV 0
#endif
#ifdef CONFIG_X86_USE_3DNOW
# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31))
#else
# define NEED_3DNOW 0
#endif
#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))

View File

@ -373,9 +373,23 @@ struct kvm_debugregs {
__u64 reserved[9];
};
/* for KVM_CAP_XSAVE */
/* for KVM_CAP_XSAVE and KVM_CAP_XSAVE2 */
struct kvm_xsave {
/*
* KVM_GET_XSAVE2 and KVM_SET_XSAVE write and read as many bytes
* as are returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
* respectively, when invoked on the vm file descriptor.
*
* The size value returned by KVM_CHECK_EXTENSION(KVM_CAP_XSAVE2)
* will always be at least 4096. Currently, it is only greater
* than 4096 if a dynamic feature has been enabled with
* ``arch_prctl()``, but this may change in the future.
*
* The offsets of the state save areas in struct kvm_xsave follow
* the contents of CPUID leaf 0xD on the host.
*/
__u32 region[1024];
__u32 extra[0];
};
#define KVM_MAX_XCRS 16
@ -438,6 +452,9 @@ struct kvm_sync_regs {
#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001
/* attributes for system fd (group 0) */
#define KVM_X86_XCOMP_GUEST_SUPP 0
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
@ -504,4 +521,8 @@ struct kvm_pmu_event_filter {
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
/* for KVM_{GET,SET,HAS}_DEVICE_ATTR */
#define KVM_VCPU_TSC_CTRL 0 /* control group for the timestamp counter (TSC) */
#define KVM_VCPU_TSC_OFFSET 0 /* attribute for the TSC offset */
#endif /* _ASM_X86_KVM_H */

View File

@ -2,16 +2,22 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
#define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
#define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
#define ARCH_GET_CPUID 0x1011
#define ARCH_SET_CPUID 0x1012
#define ARCH_GET_CPUID 0x1011
#define ARCH_SET_CPUID 0x1012
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
#define ARCH_GET_XCOMP_SUPP 0x1021
#define ARCH_GET_XCOMP_PERM 0x1022
#define ARCH_REQ_XCOMP_PERM 0x1023
#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003
#endif /* _ASM_X86_PRCTL_H */

View File

@ -39,7 +39,7 @@ SYM_FUNC_START_WEAK(memcpy)
rep movsq
movl %edx, %ecx
rep movsb
ret
RET
SYM_FUNC_END(memcpy)
SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
@ -53,7 +53,7 @@ SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
RET
SYM_FUNC_END(memcpy_erms)
SYM_FUNC_START_LOCAL(memcpy_orig)
@ -137,7 +137,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
retq
RET
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
@ -149,7 +149,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
retq
RET
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
@ -162,7 +162,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
retq
RET
.p2align 4
.Lless_3bytes:
subl $1, %edx
@ -180,7 +180,7 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
movb %cl, (%rdi)
.Lend:
retq
RET
SYM_FUNC_END(memcpy_orig)
.popsection

View File

@ -37,6 +37,7 @@ trace/beauty/generated/
pmu-events/pmu-events.c
pmu-events/jevents
feature/
libbpf/
fixdep
libtraceevent-dynamic-list
Documentation/doc.dep

View File

@ -17,7 +17,11 @@ tools/lib/symbol/kallsyms.c
tools/lib/symbol/kallsyms.h
tools/lib/find_bit.c
tools/lib/bitmap.c
tools/lib/list_sort.c
tools/lib/str_error_r.c
tools/lib/vsprintf.c
tools/lib/zalloc.c
scripts/bpf_doc.py
tools/bpf/bpftool
kernel/bpf/disasm.c
kernel/bpf/disasm.h

View File

@ -17,6 +17,7 @@ detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
include $(srctree)/tools/scripts/Makefile.arch
@ -214,6 +215,7 @@ endif
ifneq ($(WERROR),0)
CORE_CFLAGS += -Werror
CXXFLAGS += -Werror
HOSTCFLAGS += -Werror
endif
ifndef DEBUG
@ -286,13 +288,16 @@ CORE_CFLAGS += -Wall
CORE_CFLAGS += -Wextra
CORE_CFLAGS += -std=gnu99
CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti
CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti
CXXFLAGS += -Wall
CXXFLAGS += -fno-omit-frame-pointer
CXXFLAGS += -ggdb3
CXXFLAGS += -funwind-tables
CXXFLAGS += -Wno-strict-aliasing
HOSTCFLAGS += -Wall
HOSTCFLAGS += -Wextra
# Enforce a non-executable stack, as we may regress (again) in the future by
# adding assembler files missing the .GNU-stack linker note.
LDFLAGS += -Wl,-z,noexecstack
@ -1011,6 +1016,9 @@ ifndef NO_AUXTRACE
ifndef NO_AUXTRACE
$(call detected,CONFIG_AUXTRACE)
CFLAGS += -DHAVE_AUXTRACE_SUPPORT
ifeq ($(feature-reallocarray), 0)
CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
endif
endif
endif
@ -1094,11 +1102,32 @@ ifdef LIBTRACEEVENT_DYNAMIC
$(call feature_check,libtraceevent)
ifeq ($(feature-libtraceevent), 1)
EXTLIBS += -ltraceevent
LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent)
LIBTRACEEVENT_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEEVENT_VERSION)))
LIBTRACEEVENT_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEEVENT_VERSION)))
LIBTRACEEVENT_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEEVENT_VERSION)))
LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3))
CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP)
else
dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel);
endif
endif
ifdef LIBTRACEFS_DYNAMIC
$(call feature_check,libtracefs)
ifeq ($(feature-libtracefs), 1)
EXTLIBS += -ltracefs
LIBTRACEFS_VERSION := $(shell $(PKG_CONFIG) --modversion libtracefs)
LIBTRACEFS_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEFS_VERSION)))
LIBTRACEFS_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEFS_VERSION)))
LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION)))
LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3))
CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP)
else
dummy := $(error Error: No libtracefs devel library found, please install libtracefs-dev);
endif
endif
# Among the variables below, these:
# perfexecdir
# perf_include_dir

View File

@ -130,6 +130,8 @@ include ../scripts/utilities.mak
#
# Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking
#
# Define LIBTRACEFS_DYNAMIC to enable libtracefs dynamic linking
#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@ -224,7 +226,7 @@ else
endif
export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
export HOSTCC HOSTLD HOSTAR
export HOSTCC HOSTLD HOSTAR HOSTCFLAGS
include $(srctree)/tools/build/Makefile.include
@ -241,7 +243,7 @@ else # force_fixdep
LIB_DIR = $(srctree)/tools/lib/api/
TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
LIBBPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
LIBPERF_DIR = $(srctree)/tools/lib/perf/
DOC_DIR = $(srctree)/tools/perf/Documentation/
@ -293,7 +295,6 @@ strip-libs = $(filter-out -l%,$(1))
ifneq ($(OUTPUT),)
TE_PATH=$(OUTPUT)
PLUGINS_PATH=$(OUTPUT)
BPF_PATH=$(OUTPUT)
SUBCMD_PATH=$(OUTPUT)
LIBPERF_PATH=$(OUTPUT)
ifneq ($(subdir),)
@ -305,7 +306,6 @@ else
TE_PATH=$(TRACE_EVENT_DIR)
PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
API_PATH=$(LIB_DIR)
BPF_PATH=$(BPF_DIR)
SUBCMD_PATH=$(SUBCMD_DIR)
LIBPERF_PATH=$(LIBPERF_DIR)
endif
@ -324,7 +324,14 @@ LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DY
LIBAPI = $(API_PATH)libapi.a
export LIBAPI
LIBBPF = $(BPF_PATH)libbpf.a
ifneq ($(OUTPUT),)
LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf
else
LIBBPF_OUTPUT = $(CURDIR)/libbpf
endif
LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a
@ -360,7 +367,7 @@ ifndef NO_JVMTI
PROGRAMS += $(OUTPUT)$(LIBJVMTI)
endif
DLFILTERS := dlfilter-test-api-v0.so
DLFILTERS := dlfilter-test-api-v0.so dlfilter-show-cycles.so
DLFILTERS := $(patsubst %,$(OUTPUT)dlfilters/%,$(DLFILTERS))
# what 'all' will build and 'install' will install, in perfexecdir
@ -509,17 +516,17 @@ kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh
$(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl)
$(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@
socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c
socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
socket_arrays := $(beauty_outdir)/socket_arrays.c
socket_arrays := $(beauty_outdir)/socket.c
socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
$(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
$(socket_arrays): $(linux_uapi_dir)/in.h $(beauty_linux_dir)/socket.h $(socket_tbl)
$(Q)$(SHELL) '$(socket_tbl)' $(linux_uapi_dir) $(beauty_linux_dir) > $@
sockaddr_arrays := $(beauty_outdir)/sockaddr.c
sockaddr_tbl := $(srctree)/tools/perf/trace/beauty/sockaddr.sh
$(sockaddr_arrays): $(beauty_linux_dir)/socket.h $(sockaddr_tbl)
$(Q)$(SHELL) '$(sockaddr_tbl)' $(beauty_linux_dir) > $@
vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
@ -729,8 +736,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(sndrv_ctl_ioctl_array) \
$(kcmp_type_array) \
$(kvm_ioctl_array) \
$(socket_ipproto_array) \
$(socket_arrays) \
$(sockaddr_arrays) \
$(vhost_virtio_ioctl_array) \
$(madvise_behavior_array) \
$(mmap_flags_array) \
@ -829,12 +836,14 @@ $(LIBAPI)-clean:
$(call QUIET_CLEAN, libapi)
$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
$(LIBBPF): FORCE
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
$(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
$(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \
O= OUTPUT=$(LIBBPF_OUTPUT)/ DESTDIR=$(LIBBPF_DESTDIR) prefix= \
$@ install_headers
$(LIBBPF)-clean:
$(call QUIET_CLEAN, libbpf)
$(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
$(Q)$(RM) -r -- $(LIBBPF_OUTPUT)
$(LIBPERF): FORCE
$(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a
@ -1032,18 +1041,17 @@ SKEL_OUT := $(abspath $(OUTPUT)util/bpf_skel)
SKEL_TMP_OUT := $(abspath $(SKEL_OUT)/.tmp)
SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h
SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
$(Q)$(MKDIR) -p $@
ifdef BUILD_BPF_SKEL
BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
LIBBPF_SRC := $(abspath ../lib/bpf)
BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(BPF_PATH) -I$(LIBBPF_SRC)/..
$(SKEL_TMP_OUT):
$(Q)$(MKDIR) -p $@
BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE)
$(BPFTOOL): | $(SKEL_TMP_OUT)
CFLAGS= $(MAKE) -C ../bpf/bpftool \
$(Q)CFLAGS= $(MAKE) -C ../bpf/bpftool \
OUTPUT=$(SKEL_TMP_OUT)/ bootstrap
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
@ -1105,8 +1113,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(sndrv_pcm_ioctl_array) \
$(OUTPUT)$(kvm_ioctl_array) \
$(OUTPUT)$(kcmp_type_array) \
$(OUTPUT)$(socket_ipproto_array) \
$(OUTPUT)$(socket_arrays) \
$(OUTPUT)$(sockaddr_arrays) \
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \

View File

@ -2,6 +2,6 @@
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
extern struct test arch_tests[];
extern struct test_suite *arch_tests[];
#endif

View File

@ -15,46 +15,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM_R0:
return "r0";
case PERF_REG_ARM_R1:
return "r1";
case PERF_REG_ARM_R2:
return "r2";
case PERF_REG_ARM_R3:
return "r3";
case PERF_REG_ARM_R4:
return "r4";
case PERF_REG_ARM_R5:
return "r5";
case PERF_REG_ARM_R6:
return "r6";
case PERF_REG_ARM_R7:
return "r7";
case PERF_REG_ARM_R8:
return "r8";
case PERF_REG_ARM_R9:
return "r9";
case PERF_REG_ARM_R10:
return "r10";
case PERF_REG_ARM_FP:
return "fp";
case PERF_REG_ARM_IP:
return "ip";
case PERF_REG_ARM_SP:
return "sp";
case PERF_REG_ARM_LR:
return "lr";
case PERF_REG_ARM_PC:
return "pc";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -3,18 +3,10 @@
#include "tests/tests.h"
#include "arch-tests.h"
struct test arch_tests[] = {
struct test_suite *arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "DWARF unwind",
.func = test__dwarf_unwind,
},
&suite__dwarf_unwind,
#endif
{
.desc = "Vectors page",
.func = test__vectors_page,
},
{
.func = NULL,
},
&suite__vectors_page,
NULL,
};

View File

@ -9,8 +9,7 @@
#define VECTORS__MAP_NAME "[vectors]"
int test__vectors_page(struct test *test __maybe_unused,
int subtest __maybe_unused)
static int test__vectors_page(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
void *start, *end;
@ -22,3 +21,5 @@ int test__vectors_page(struct test *test __maybe_unused,
return TEST_OK;
}
DEFINE_SUITE("Vectors page", vectors_page);

View File

@ -203,9 +203,11 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
struct perf_cpu_map *online_cpus = perf_cpu_map__new(NULL);
/* Set option of each CPU we have */
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(event_cpus, i) ||
!cpu_map__has(online_cpus, i))
for (i = 0; i < cpu__max_cpu().cpu; i++) {
struct perf_cpu cpu = { .cpu = i, };
if (!perf_cpu_map__has(event_cpus, cpu) ||
!perf_cpu_map__has(online_cpus, cpu))
continue;
if (option & BIT(ETM_OPT_CTXTID)) {
@ -407,25 +409,6 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
}
/* Validate auxtrace_mmap_pages provided by user */
if (opts->auxtrace_mmap_pages) {
unsigned int max_page = (KiB(128) / page_size);
size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
if (!privileged &&
opts->auxtrace_mmap_pages > max_page) {
opts->auxtrace_mmap_pages = max_page;
pr_err("auxtrace too big, truncating to %d\n",
max_page);
}
if (!is_power_of_2(sz)) {
pr_err("Invalid mmap size for %s: must be a power of 2\n",
CORESIGHT_ETM_PMU_NAME);
return -EINVAL;
}
}
if (opts->auxtrace_snapshot_mode)
pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
opts->auxtrace_snapshot_size);
@ -541,9 +524,11 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
/* cpu map is not empty, we have specific CPUs to work with */
if (!perf_cpu_map__empty(event_cpus)) {
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(event_cpus, i) ||
!cpu_map__has(online_cpus, i))
for (i = 0; i < cpu__max_cpu().cpu; i++) {
struct perf_cpu cpu = { .cpu = i, };
if (!perf_cpu_map__has(event_cpus, cpu) ||
!perf_cpu_map__has(online_cpus, cpu))
continue;
if (cs_etm_is_ete(itr, i))
@ -555,8 +540,10 @@ cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
}
} else {
/* get configuration for all CPUs in the system */
for (i = 0; i < cpu__max_cpu(); i++) {
if (!cpu_map__has(online_cpus, i))
for (i = 0; i < cpu__max_cpu().cpu; i++) {
struct perf_cpu cpu = { .cpu = i, };
if (!perf_cpu_map__has(online_cpus, cpu))
continue;
if (cs_etm_is_ete(itr, i))
@ -741,8 +728,10 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
} else {
/* Make sure all specified CPUs are online */
for (i = 0; i < perf_cpu_map__nr(event_cpus); i++) {
if (cpu_map__has(event_cpus, i) &&
!cpu_map__has(online_cpus, i))
struct perf_cpu cpu = { .cpu = i, };
if (perf_cpu_map__has(event_cpus, cpu) &&
!perf_cpu_map__has(online_cpus, cpu))
return -EINVAL;
}
@ -762,9 +751,12 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
offset = CS_ETM_SNAPSHOT + 1;
for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
if (cpu_map__has(cpu_map, i))
for (i = 0; i < cpu__max_cpu().cpu && offset < priv_size; i++) {
struct perf_cpu cpu = { .cpu = i, };
if (perf_cpu_map__has(cpu_map, cpu))
cs_etm_get_metadata(i, &offset, itr, info);
}
perf_cpu_map__put(online_cpus);

View File

@ -2,6 +2,6 @@
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
extern struct test arch_tests[];
extern struct test_suite *arch_tests[];
#endif

View File

@ -4,7 +4,9 @@
#include <stdlib.h>
#include <linux/types.h>
#define perf_event_arm_regs perf_event_arm64_regs
#include <asm/perf_regs.h>
#undef perf_event_arm_regs
void perf_regs_load(u64 *regs);
@ -15,80 +17,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_ARM64_PC
#define PERF_REG_SP PERF_REG_ARM64_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_ARM64_X0:
return "x0";
case PERF_REG_ARM64_X1:
return "x1";
case PERF_REG_ARM64_X2:
return "x2";
case PERF_REG_ARM64_X3:
return "x3";
case PERF_REG_ARM64_X4:
return "x4";
case PERF_REG_ARM64_X5:
return "x5";
case PERF_REG_ARM64_X6:
return "x6";
case PERF_REG_ARM64_X7:
return "x7";
case PERF_REG_ARM64_X8:
return "x8";
case PERF_REG_ARM64_X9:
return "x9";
case PERF_REG_ARM64_X10:
return "x10";
case PERF_REG_ARM64_X11:
return "x11";
case PERF_REG_ARM64_X12:
return "x12";
case PERF_REG_ARM64_X13:
return "x13";
case PERF_REG_ARM64_X14:
return "x14";
case PERF_REG_ARM64_X15:
return "x15";
case PERF_REG_ARM64_X16:
return "x16";
case PERF_REG_ARM64_X17:
return "x17";
case PERF_REG_ARM64_X18:
return "x18";
case PERF_REG_ARM64_X19:
return "x19";
case PERF_REG_ARM64_X20:
return "x20";
case PERF_REG_ARM64_X21:
return "x21";
case PERF_REG_ARM64_X22:
return "x22";
case PERF_REG_ARM64_X23:
return "x23";
case PERF_REG_ARM64_X24:
return "x24";
case PERF_REG_ARM64_X25:
return "x25";
case PERF_REG_ARM64_X26:
return "x26";
case PERF_REG_ARM64_X27:
return "x27";
case PERF_REG_ARM64_X28:
return "x28";
case PERF_REG_ARM64_X29:
return "x29";
case PERF_REG_ARM64_SP:
return "sp";
case PERF_REG_ARM64_LR:
return "lr";
case PERF_REG_ARM64_PC:
return "pc";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -3,14 +3,9 @@
#include "tests/tests.h"
#include "arch-tests.h"
struct test arch_tests[] = {
struct test_suite *arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "DWARF unwind",
.func = test__dwarf_unwind,
},
&suite__dwarf_unwind,
#endif
{
.func = NULL,
},
NULL,
};

View File

@ -23,6 +23,7 @@
#include "../../../util/auxtrace.h"
#include "../../../util/record.h"
#include "../../../util/arm-spe.h"
#include <tools/libc_compat.h> // reallocarray
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@ -31,6 +32,8 @@ struct arm_spe_recording {
struct auxtrace_record itr;
struct perf_pmu *arm_spe_pmu;
struct evlist *evlist;
int wrapped_cnt;
bool *wrapped;
};
static void arm_spe_set_timestamp(struct auxtrace_record *itr,
@ -84,6 +87,55 @@ static int arm_spe_info_fill(struct auxtrace_record *itr,
return 0;
}
static void
arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
bool privileged)
{
/*
* The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size nor
* snapshot size is specified, then the default is 4MiB for privileged users, 128KiB for
* unprivileged users.
*
* The default auxtrace mmap size is 4MiB/page_size for privileged users, 128KiB for
* unprivileged users. If an unprivileged user does not specify mmap pages, the mmap pages
* will be reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
* user is likely to get an error as they exceed their mlock limmit.
*/
/*
* No size were given to '-S' or '-m,', so go with the default
*/
if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
if (privileged) {
opts->auxtrace_mmap_pages = MiB(4) / page_size;
} else {
opts->auxtrace_mmap_pages = KiB(128) / page_size;
if (opts->mmap_pages == UINT_MAX)
opts->mmap_pages = KiB(256) / page_size;
}
} else if (!opts->auxtrace_mmap_pages && !privileged && opts->mmap_pages == UINT_MAX) {
opts->mmap_pages = KiB(256) / page_size;
}
/*
* '-m,xyz' was specified but no snapshot size, so make the snapshot size as big as the
* auxtrace mmap area.
*/
if (!opts->auxtrace_snapshot_size)
opts->auxtrace_snapshot_size = opts->auxtrace_mmap_pages * (size_t)page_size;
/*
* '-Sxyz' was specified but no auxtrace mmap area, so make the auxtrace mmap area big
* enough to fit the requested snapshot size.
*/
if (!opts->auxtrace_mmap_pages) {
size_t sz = opts->auxtrace_snapshot_size;
sz = round_up(sz, page_size) / page_size;
opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
}
}
static int arm_spe_recording_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts)
@ -115,6 +167,36 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
if (!opts->full_auxtrace)
return 0;
/*
* we are in snapshot mode.
*/
if (opts->auxtrace_snapshot_mode) {
/*
* Command arguments '-Sxyz' and/or '-m,xyz' are missing, so fill those in with
* default values.
*/
if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages)
arm_spe_snapshot_resolve_auxtrace_defaults(opts, privileged);
/*
* Snapshot size can't be bigger than the auxtrace area.
*/
if (opts->auxtrace_snapshot_size > opts->auxtrace_mmap_pages * (size_t)page_size) {
pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
opts->auxtrace_snapshot_size,
opts->auxtrace_mmap_pages * (size_t)page_size);
return -EINVAL;
}
/*
* Something went wrong somewhere - this shouldn't happen.
*/
if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
return -EINVAL;
}
}
/* We are in full trace mode but '-m,xyz' wasn't specified */
if (!opts->auxtrace_mmap_pages) {
if (privileged) {
@ -138,6 +220,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
}
}
if (opts->auxtrace_snapshot_mode)
pr_debug2("%sx snapshot size: %zu\n", ARM_SPE_PMU_NAME,
opts->auxtrace_snapshot_size);
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace event
@ -166,8 +251,199 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->core.attr.sample_period = 1;
/* In per-cpu case, always need the time of mmap events etc */
if (!perf_cpu_map__empty(cpus))
if (!perf_cpu_map__empty(cpus)) {
evsel__set_sample_bit(tracking_evsel, TIME);
evsel__set_sample_bit(tracking_evsel, CPU);
/* also track task context switch */
if (!record_opts__no_switch_events(opts))
tracking_evsel->core.attr.context_switch = 1;
}
return 0;
}
static int arm_spe_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
struct record_opts *opts,
const char *str)
{
unsigned long long snapshot_size = 0;
char *endptr;
if (str) {
snapshot_size = strtoull(str, &endptr, 0);
if (*endptr || snapshot_size > SIZE_MAX)
return -1;
}
opts->auxtrace_snapshot_mode = true;
opts->auxtrace_snapshot_size = snapshot_size;
return 0;
}
static int arm_spe_snapshot_start(struct auxtrace_record *itr)
{
struct arm_spe_recording *ptr =
container_of(itr, struct arm_spe_recording, itr);
struct evsel *evsel;
evlist__for_each_entry(ptr->evlist, evsel) {
if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
return evsel__disable(evsel);
}
return -EINVAL;
}
static int arm_spe_snapshot_finish(struct auxtrace_record *itr)
{
struct arm_spe_recording *ptr =
container_of(itr, struct arm_spe_recording, itr);
struct evsel *evsel;
evlist__for_each_entry(ptr->evlist, evsel) {
if (evsel->core.attr.type == ptr->arm_spe_pmu->type)
return evsel__enable(evsel);
}
return -EINVAL;
}
static int arm_spe_alloc_wrapped_array(struct arm_spe_recording *ptr, int idx)
{
bool *wrapped;
int cnt = ptr->wrapped_cnt, new_cnt, i;
/*
* No need to allocate, so return early.
*/
if (idx < cnt)
return 0;
/*
* Make ptr->wrapped as big as idx.
*/
new_cnt = idx + 1;
/*
* Free'ed in arm_spe_recording_free().
*/
wrapped = reallocarray(ptr->wrapped, new_cnt, sizeof(bool));
if (!wrapped)
return -ENOMEM;
/*
* init new allocated values.
*/
for (i = cnt; i < new_cnt; i++)
wrapped[i] = false;
ptr->wrapped_cnt = new_cnt;
ptr->wrapped = wrapped;
return 0;
}
static bool arm_spe_buffer_has_wrapped(unsigned char *buffer,
size_t buffer_size, u64 head)
{
u64 i, watermark;
u64 *buf = (u64 *)buffer;
size_t buf_size = buffer_size;
/*
* Defensively handle the case where head might be continually increasing - if its value is
* equal or greater than the size of the ring buffer, then we can safely determine it has
* wrapped around. Otherwise, continue to detect if head might have wrapped.
*/
if (head >= buffer_size)
return true;
/*
* We want to look the very last 512 byte (chosen arbitrarily) in the ring buffer.
*/
watermark = buf_size - 512;
/*
* The value of head is somewhere within the size of the ring buffer. This can be that there
* hasn't been enough data to fill the ring buffer yet or the trace time was so long that
* head has numerically wrapped around. To find we need to check if we have data at the
* very end of the ring buffer. We can reliably do this because mmap'ed pages are zeroed
* out and there is a fresh mapping with every new session.
*/
/*
* head is less than 512 byte from the end of the ring buffer.
*/
if (head > watermark)
watermark = head;
/*
* Speed things up by using 64 bit transactions (see "u64 *buf" above)
*/
watermark /= sizeof(u64);
buf_size /= sizeof(u64);
/*
* If we find trace data at the end of the ring buffer, head has been there and has
* numerically wrapped around at least once.
*/
for (i = watermark; i < buf_size; i++)
if (buf[i])
return true;
return false;
}
static int arm_spe_find_snapshot(struct auxtrace_record *itr, int idx,
struct auxtrace_mmap *mm, unsigned char *data,
u64 *head, u64 *old)
{
int err;
bool wrapped;
struct arm_spe_recording *ptr =
container_of(itr, struct arm_spe_recording, itr);
/*
* Allocate memory to keep track of wrapping if this is the first
* time we deal with this *mm.
*/
if (idx >= ptr->wrapped_cnt) {
err = arm_spe_alloc_wrapped_array(ptr, idx);
if (err)
return err;
}
/*
* Check to see if *head has wrapped around. If it hasn't only the
* amount of data between *head and *old is snapshot'ed to avoid
* bloating the perf.data file with zeros. But as soon as *head has
* wrapped around the entire size of the AUX ring buffer it taken.
*/
wrapped = ptr->wrapped[idx];
if (!wrapped && arm_spe_buffer_has_wrapped(data, mm->len, *head)) {
wrapped = true;
ptr->wrapped[idx] = true;
}
pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
__func__, idx, (size_t)*old, (size_t)*head, mm->len);
/*
* No wrap has occurred, we can just use *head and *old.
*/
if (!wrapped)
return 0;
/*
* *head has wrapped around - adjust *head and *old to pickup the
* entire content of the AUX buffer.
*/
if (*head >= mm->len) {
*old = *head - mm->len;
} else {
*head += mm->len;
*old = *head - mm->len;
}
return 0;
}
@ -186,6 +462,7 @@ static void arm_spe_recording_free(struct auxtrace_record *itr)
struct arm_spe_recording *sper =
container_of(itr, struct arm_spe_recording, itr);
free(sper->wrapped);
free(sper);
}
@ -207,6 +484,10 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
sper->arm_spe_pmu = arm_spe_pmu;
sper->itr.pmu = arm_spe_pmu;
sper->itr.snapshot_start = arm_spe_snapshot_start;
sper->itr.snapshot_finish = arm_spe_snapshot_finish;
sper->itr.find_snapshot = arm_spe_find_snapshot;
sper->itr.parse_snapshot_options = arm_spe_parse_snapshot_options;
sper->itr.recording_options = arm_spe_recording_options;
sper->itr.info_priv_size = arm_spe_info_priv_size;
sper->itr.info_fill = arm_spe_info_fill;

View File

@ -5,6 +5,8 @@
#include <string.h>
#include "debug.h"
#include "symbol.h"
#include "callchain.h"
#include "record.h"
/* On arm64, kernel text segment starts at high memory address,
* for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
@ -26,3 +28,8 @@ void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
p->end = c->start;
pr_debug4("%s sym:%s end:%#" PRIx64 "\n", __func__, p->name, p->end);
}
void arch__add_leaf_frame_record_opts(struct record_opts *opts)
{
opts->sample_user_regs |= sample_reg_masks[PERF_REG_ARM64_LR].mask;
}

View File

@ -3,7 +3,7 @@
#include "../../../util/cpumap.h"
#include "../../../util/pmu.h"
struct pmu_events_map *pmu_events_map__find(void)
const struct pmu_events_map *pmu_events_map__find(void)
{
struct perf_pmu *pmu = NULL;
@ -15,7 +15,7 @@ struct pmu_events_map *pmu_events_map__find(void)
* The cpumap should cover all CPUs. Otherwise, some CPUs may
* not support some events or have different event IDs.
*/
if (pmu->cpus->nr != cpu__max_cpu())
if (pmu->cpus->nr != cpu__max_cpu().cpu)
return NULL;
return perf_pmu__find_map(pmu);

View File

@ -15,86 +15,4 @@
#define PERF_REG_IP PERF_REG_CSKY_PC
#define PERF_REG_SP PERF_REG_CSKY_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_CSKY_A0:
return "a0";
case PERF_REG_CSKY_A1:
return "a1";
case PERF_REG_CSKY_A2:
return "a2";
case PERF_REG_CSKY_A3:
return "a3";
case PERF_REG_CSKY_REGS0:
return "regs0";
case PERF_REG_CSKY_REGS1:
return "regs1";
case PERF_REG_CSKY_REGS2:
return "regs2";
case PERF_REG_CSKY_REGS3:
return "regs3";
case PERF_REG_CSKY_REGS4:
return "regs4";
case PERF_REG_CSKY_REGS5:
return "regs5";
case PERF_REG_CSKY_REGS6:
return "regs6";
case PERF_REG_CSKY_REGS7:
return "regs7";
case PERF_REG_CSKY_REGS8:
return "regs8";
case PERF_REG_CSKY_REGS9:
return "regs9";
case PERF_REG_CSKY_SP:
return "sp";
case PERF_REG_CSKY_LR:
return "lr";
case PERF_REG_CSKY_PC:
return "pc";
#if defined(__CSKYABIV2__)
case PERF_REG_CSKY_EXREGS0:
return "exregs0";
case PERF_REG_CSKY_EXREGS1:
return "exregs1";
case PERF_REG_CSKY_EXREGS2:
return "exregs2";
case PERF_REG_CSKY_EXREGS3:
return "exregs3";
case PERF_REG_CSKY_EXREGS4:
return "exregs4";
case PERF_REG_CSKY_EXREGS5:
return "exregs5";
case PERF_REG_CSKY_EXREGS6:
return "exregs6";
case PERF_REG_CSKY_EXREGS7:
return "exregs7";
case PERF_REG_CSKY_EXREGS8:
return "exregs8";
case PERF_REG_CSKY_EXREGS9:
return "exregs9";
case PERF_REG_CSKY_EXREGS10:
return "exregs10";
case PERF_REG_CSKY_EXREGS11:
return "exregs11";
case PERF_REG_CSKY_EXREGS12:
return "exregs12";
case PERF_REG_CSKY_EXREGS13:
return "exregs13";
case PERF_REG_CSKY_EXREGS14:
return "exregs14";
case PERF_REG_CSKY_TLS:
return "tls";
case PERF_REG_CSKY_HI:
return "hi";
case PERF_REG_CSKY_LO:
return "lo";
#endif
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -363,3 +363,5 @@
446 n64 landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 n64 process_mrelease sys_process_mrelease
449 n64 futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node

View File

@ -12,73 +12,4 @@
#define PERF_REGS_MASK ((1ULL << PERF_REG_MIPS_MAX) - 1)
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_MIPS_PC:
return "PC";
case PERF_REG_MIPS_R1:
return "$1";
case PERF_REG_MIPS_R2:
return "$2";
case PERF_REG_MIPS_R3:
return "$3";
case PERF_REG_MIPS_R4:
return "$4";
case PERF_REG_MIPS_R5:
return "$5";
case PERF_REG_MIPS_R6:
return "$6";
case PERF_REG_MIPS_R7:
return "$7";
case PERF_REG_MIPS_R8:
return "$8";
case PERF_REG_MIPS_R9:
return "$9";
case PERF_REG_MIPS_R10:
return "$10";
case PERF_REG_MIPS_R11:
return "$11";
case PERF_REG_MIPS_R12:
return "$12";
case PERF_REG_MIPS_R13:
return "$13";
case PERF_REG_MIPS_R14:
return "$14";
case PERF_REG_MIPS_R15:
return "$15";
case PERF_REG_MIPS_R16:
return "$16";
case PERF_REG_MIPS_R17:
return "$17";
case PERF_REG_MIPS_R18:
return "$18";
case PERF_REG_MIPS_R19:
return "$19";
case PERF_REG_MIPS_R20:
return "$20";
case PERF_REG_MIPS_R21:
return "$21";
case PERF_REG_MIPS_R22:
return "$22";
case PERF_REG_MIPS_R23:
return "$23";
case PERF_REG_MIPS_R24:
return "$24";
case PERF_REG_MIPS_R25:
return "$25";
case PERF_REG_MIPS_R28:
return "$28";
case PERF_REG_MIPS_R29:
return "$29";
case PERF_REG_MIPS_R30:
return "$30";
case PERF_REG_MIPS_R31:
return "$31";
default:
break;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -528,3 +528,5 @@
446 common landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node

View File

@ -2,6 +2,6 @@
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
extern struct test arch_tests[];
extern struct test_suite *arch_tests[];
#endif

View File

@ -19,68 +19,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_POWERPC_NIP
#define PERF_REG_SP PERF_REG_POWERPC_R1
static const char *reg_names[] = {
[PERF_REG_POWERPC_R0] = "r0",
[PERF_REG_POWERPC_R1] = "r1",
[PERF_REG_POWERPC_R2] = "r2",
[PERF_REG_POWERPC_R3] = "r3",
[PERF_REG_POWERPC_R4] = "r4",
[PERF_REG_POWERPC_R5] = "r5",
[PERF_REG_POWERPC_R6] = "r6",
[PERF_REG_POWERPC_R7] = "r7",
[PERF_REG_POWERPC_R8] = "r8",
[PERF_REG_POWERPC_R9] = "r9",
[PERF_REG_POWERPC_R10] = "r10",
[PERF_REG_POWERPC_R11] = "r11",
[PERF_REG_POWERPC_R12] = "r12",
[PERF_REG_POWERPC_R13] = "r13",
[PERF_REG_POWERPC_R14] = "r14",
[PERF_REG_POWERPC_R15] = "r15",
[PERF_REG_POWERPC_R16] = "r16",
[PERF_REG_POWERPC_R17] = "r17",
[PERF_REG_POWERPC_R18] = "r18",
[PERF_REG_POWERPC_R19] = "r19",
[PERF_REG_POWERPC_R20] = "r20",
[PERF_REG_POWERPC_R21] = "r21",
[PERF_REG_POWERPC_R22] = "r22",
[PERF_REG_POWERPC_R23] = "r23",
[PERF_REG_POWERPC_R24] = "r24",
[PERF_REG_POWERPC_R25] = "r25",
[PERF_REG_POWERPC_R26] = "r26",
[PERF_REG_POWERPC_R27] = "r27",
[PERF_REG_POWERPC_R28] = "r28",
[PERF_REG_POWERPC_R29] = "r29",
[PERF_REG_POWERPC_R30] = "r30",
[PERF_REG_POWERPC_R31] = "r31",
[PERF_REG_POWERPC_NIP] = "nip",
[PERF_REG_POWERPC_MSR] = "msr",
[PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
[PERF_REG_POWERPC_CTR] = "ctr",
[PERF_REG_POWERPC_LINK] = "link",
[PERF_REG_POWERPC_XER] = "xer",
[PERF_REG_POWERPC_CCR] = "ccr",
[PERF_REG_POWERPC_SOFTE] = "softe",
[PERF_REG_POWERPC_TRAP] = "trap",
[PERF_REG_POWERPC_DAR] = "dar",
[PERF_REG_POWERPC_DSISR] = "dsisr",
[PERF_REG_POWERPC_SIER] = "sier",
[PERF_REG_POWERPC_MMCRA] = "mmcra",
[PERF_REG_POWERPC_MMCR0] = "mmcr0",
[PERF_REG_POWERPC_MMCR1] = "mmcr1",
[PERF_REG_POWERPC_MMCR2] = "mmcr2",
[PERF_REG_POWERPC_MMCR3] = "mmcr3",
[PERF_REG_POWERPC_SIER2] = "sier2",
[PERF_REG_POWERPC_SIER3] = "sier3",
[PERF_REG_POWERPC_PMC1] = "pmc1",
[PERF_REG_POWERPC_PMC2] = "pmc2",
[PERF_REG_POWERPC_PMC3] = "pmc3",
[PERF_REG_POWERPC_PMC4] = "pmc4",
[PERF_REG_POWERPC_PMC5] = "pmc5",
[PERF_REG_POWERPC_PMC6] = "pmc6",
};
static inline const char *__perf_reg_name(int id)
{
return reg_names[id];
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -3,14 +3,10 @@
#include "tests/tests.h"
#include "arch-tests.h"
struct test arch_tests[] = {
struct test_suite *arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "Test dwarf unwind",
.func = test__dwarf_unwind,
},
&suite__dwarf_unwind,
#endif
{
.func = NULL,
},
NULL,
};

View File

@ -40,8 +40,12 @@ const char *arch_perf_header_entry(const char *se_header)
{
if (!strcmp(se_header, "Local INSTR Latency"))
return "Finish Cyc";
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
else if (!strcmp(se_header, "INSTR Latency"))
return "Global Finish_cyc";
else if (!strcmp(se_header, "Local Pipeline Stage Cycle"))
return "Dispatch Cyc";
else if (!strcmp(se_header, "Pipeline Stage Cycle"))
return "Global Dispatch_cyc";
return se_header;
}
@ -49,5 +53,7 @@ int arch_support_sort_key(const char *sort_key)
{
if (!strcmp(sort_key, "p_stage_cyc"))
return 1;
if (!strcmp(sort_key, "local_p_stage_cyc"))
return 1;
return 0;
}

View File

@ -40,7 +40,7 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
return bufp;
}
int arch_get_runtimeparam(struct pmu_event *pe)
int arch_get_runtimeparam(const struct pmu_event *pe)
{
int count;
char path[PATH_MAX] = "/devices/hv_24x7/interface/";

View File

@ -113,10 +113,11 @@ static int is_tracepoint_available(const char *str, struct evlist *evlist)
struct parse_events_error err;
int ret;
bzero(&err, sizeof(err));
parse_events_error__init(&err);
ret = parse_events(evlist, str, &err);
if (err.str)
parse_events_print_error(&err, "tracepoint");
parse_events_error__print(&err, "tracepoint");
parse_events_error__exit(&err);
return ret;
}

View File

@ -74,6 +74,8 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4),
SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5),
SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6),
SMPL_REG(sdar, PERF_REG_POWERPC_SDAR),
SMPL_REG(siar, PERF_REG_POWERPC_SIAR),
SMPL_REG_END
};

View File

@ -19,78 +19,4 @@
#define PERF_REG_IP PERF_REG_RISCV_PC
#define PERF_REG_SP PERF_REG_RISCV_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_RISCV_PC:
return "pc";
case PERF_REG_RISCV_RA:
return "ra";
case PERF_REG_RISCV_SP:
return "sp";
case PERF_REG_RISCV_GP:
return "gp";
case PERF_REG_RISCV_TP:
return "tp";
case PERF_REG_RISCV_T0:
return "t0";
case PERF_REG_RISCV_T1:
return "t1";
case PERF_REG_RISCV_T2:
return "t2";
case PERF_REG_RISCV_S0:
return "s0";
case PERF_REG_RISCV_S1:
return "s1";
case PERF_REG_RISCV_A0:
return "a0";
case PERF_REG_RISCV_A1:
return "a1";
case PERF_REG_RISCV_A2:
return "a2";
case PERF_REG_RISCV_A3:
return "a3";
case PERF_REG_RISCV_A4:
return "a4";
case PERF_REG_RISCV_A5:
return "a5";
case PERF_REG_RISCV_A6:
return "a6";
case PERF_REG_RISCV_A7:
return "a7";
case PERF_REG_RISCV_S2:
return "s2";
case PERF_REG_RISCV_S3:
return "s3";
case PERF_REG_RISCV_S4:
return "s4";
case PERF_REG_RISCV_S5:
return "s5";
case PERF_REG_RISCV_S6:
return "s6";
case PERF_REG_RISCV_S7:
return "s7";
case PERF_REG_RISCV_S8:
return "s8";
case PERF_REG_RISCV_S9:
return "s9";
case PERF_REG_RISCV_S10:
return "s10";
case PERF_REG_RISCV_S11:
return "s11";
case PERF_REG_RISCV_T3:
return "t3";
case PERF_REG_RISCV_T4:
return "t4";
case PERF_REG_RISCV_T5:
return "t5";
case PERF_REG_RISCV_T6:
return "t6";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -451,3 +451,5 @@
446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self
# 447 reserved for memfd_secret
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node

View File

@ -14,82 +14,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_S390_PC
#define PERF_REG_SP PERF_REG_S390_R15
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_S390_R0:
return "R0";
case PERF_REG_S390_R1:
return "R1";
case PERF_REG_S390_R2:
return "R2";
case PERF_REG_S390_R3:
return "R3";
case PERF_REG_S390_R4:
return "R4";
case PERF_REG_S390_R5:
return "R5";
case PERF_REG_S390_R6:
return "R6";
case PERF_REG_S390_R7:
return "R7";
case PERF_REG_S390_R8:
return "R8";
case PERF_REG_S390_R9:
return "R9";
case PERF_REG_S390_R10:
return "R10";
case PERF_REG_S390_R11:
return "R11";
case PERF_REG_S390_R12:
return "R12";
case PERF_REG_S390_R13:
return "R13";
case PERF_REG_S390_R14:
return "R14";
case PERF_REG_S390_R15:
return "R15";
case PERF_REG_S390_FP0:
return "FP0";
case PERF_REG_S390_FP1:
return "FP1";
case PERF_REG_S390_FP2:
return "FP2";
case PERF_REG_S390_FP3:
return "FP3";
case PERF_REG_S390_FP4:
return "FP4";
case PERF_REG_S390_FP5:
return "FP5";
case PERF_REG_S390_FP6:
return "FP6";
case PERF_REG_S390_FP7:
return "FP7";
case PERF_REG_S390_FP8:
return "FP8";
case PERF_REG_S390_FP9:
return "FP9";
case PERF_REG_S390_FP10:
return "FP10";
case PERF_REG_S390_FP11:
return "FP11";
case PERF_REG_S390_FP12:
return "FP12";
case PERF_REG_S390_FP13:
return "FP13";
case PERF_REG_S390_FP14:
return "FP14";
case PERF_REG_S390_FP15:
return "FP15";
case PERF_REG_S390_MASK:
return "MASK";
case PERF_REG_S390_PC:
return "PC";
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -144,8 +144,31 @@ static struct ins x86__instructions[] = {
{ .name = "xorps", .ops = &mov_ops, },
};
static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
static bool amd__ins_is_fused(struct arch *arch, const char *ins1,
const char *ins2)
{
if (strstr(ins2, "jmp"))
return false;
/* Family >= 15h supports cmp/test + branch fusion */
if (arch->family >= 0x15 && (strstarts(ins1, "test") ||
(strstarts(ins1, "cmp") && !strstr(ins1, "xchg")))) {
return true;
}
/* Family >= 19h supports some ALU + branch fusion */
if (arch->family >= 0x19 && (strstarts(ins1, "add") ||
strstarts(ins1, "sub") || strstarts(ins1, "and") ||
strstarts(ins1, "inc") || strstarts(ins1, "dec") ||
strstarts(ins1, "or") || strstarts(ins1, "xor"))) {
return true;
}
return false;
}
static bool intel__ins_is_fused(struct arch *arch, const char *ins1,
const char *ins2)
{
if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
return false;
@ -184,6 +207,9 @@ static int x86__cpuid_parse(struct arch *arch, char *cpuid)
if (ret == 3) {
arch->family = family;
arch->model = model;
arch->ins_is_fused = strstarts(cpuid, "AuthenticAMD") ?
amd__ins_is_fused :
intel__ins_is_fused;
return 0;
}

View File

@ -370,6 +370,8 @@
446 common landlock_restrict_self sys_landlock_restrict_self
447 common memfd_secret sys_memfd_secret
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
#
# Due to a historical design error, certain syscalls are numbered differently

View File

@ -2,15 +2,15 @@
#ifndef ARCH_TESTS_H
#define ARCH_TESTS_H
struct test;
struct test_suite;
/* Tests */
int test__rdpmc(struct test *test, int subtest);
int test__insn_x86(struct test *test, int subtest);
int test__intel_pt_pkt_decoder(struct test *test, int subtest);
int test__bp_modify(struct test *test, int subtest);
int test__x86_sample_parsing(struct test *test, int subtest);
int test__rdpmc(struct test_suite *test, int subtest);
int test__insn_x86(struct test_suite *test, int subtest);
int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
int test__bp_modify(struct test_suite *test, int subtest);
int test__x86_sample_parsing(struct test_suite *test, int subtest);
extern struct test arch_tests[];
extern struct test_suite *arch_tests[];
#endif

View File

@ -23,86 +23,4 @@ void perf_regs_load(u64 *regs);
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
static inline const char *__perf_reg_name(int id)
{
switch (id) {
case PERF_REG_X86_AX:
return "AX";
case PERF_REG_X86_BX:
return "BX";
case PERF_REG_X86_CX:
return "CX";
case PERF_REG_X86_DX:
return "DX";
case PERF_REG_X86_SI:
return "SI";
case PERF_REG_X86_DI:
return "DI";
case PERF_REG_X86_BP:
return "BP";
case PERF_REG_X86_SP:
return "SP";
case PERF_REG_X86_IP:
return "IP";
case PERF_REG_X86_FLAGS:
return "FLAGS";
case PERF_REG_X86_CS:
return "CS";
case PERF_REG_X86_SS:
return "SS";
case PERF_REG_X86_DS:
return "DS";
case PERF_REG_X86_ES:
return "ES";
case PERF_REG_X86_FS:
return "FS";
case PERF_REG_X86_GS:
return "GS";
#ifdef HAVE_ARCH_X86_64_SUPPORT
case PERF_REG_X86_R8:
return "R8";
case PERF_REG_X86_R9:
return "R9";
case PERF_REG_X86_R10:
return "R10";
case PERF_REG_X86_R11:
return "R11";
case PERF_REG_X86_R12:
return "R12";
case PERF_REG_X86_R13:
return "R13";
case PERF_REG_X86_R14:
return "R14";
case PERF_REG_X86_R15:
return "R15";
#endif /* HAVE_ARCH_X86_64_SUPPORT */
#define XMM(x) \
case PERF_REG_X86_XMM ## x: \
case PERF_REG_X86_XMM ## x + 1: \
return "XMM" #x;
XMM(0)
XMM(1)
XMM(2)
XMM(3)
XMM(4)
XMM(5)
XMM(6)
XMM(7)
XMM(8)
XMM(9)
XMM(10)
XMM(11)
XMM(12)
XMM(13)
XMM(14)
XMM(15)
#undef XMM
default:
return NULL;
}
return NULL;
}
#endif /* ARCH_PERF_REGS_H */

View File

@ -3,39 +3,28 @@
#include "tests/tests.h"
#include "arch-tests.h"
struct test arch_tests[] = {
{
.desc = "x86 rdpmc",
.func = test__rdpmc,
},
#ifdef HAVE_DWARF_UNWIND_SUPPORT
{
.desc = "DWARF unwind",
.func = test__dwarf_unwind,
},
#endif
DEFINE_SUITE("x86 rdpmc", rdpmc);
#ifdef HAVE_AUXTRACE_SUPPORT
{
.desc = "x86 instruction decoder - new instructions",
.func = test__insn_x86,
},
{
.desc = "Intel PT packet decoder",
.func = test__intel_pt_pkt_decoder,
},
DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder);
#endif
#if defined(__x86_64__)
{
.desc = "x86 bp modify",
.func = test__bp_modify,
},
DEFINE_SUITE("x86 bp modify", bp_modify);
#endif
{
.desc = "x86 Sample parsing",
.func = test__x86_sample_parsing,
},
{
.func = NULL,
},
DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing);
struct test_suite *arch_tests[] = {
&suite__rdpmc,
#ifdef HAVE_DWARF_UNWIND_SUPPORT
&suite__dwarf_unwind,
#endif
#ifdef HAVE_AUXTRACE_SUPPORT
&suite__insn_x86,
&suite__intel_pt_pkt_decoder,
#endif
#if defined(__x86_64__)
&suite__bp_modify,
#endif
&suite__x86_sample_parsing,
NULL,
};

View File

@ -204,7 +204,7 @@ static int bp_modify2(void)
return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
}
int test__bp_modify(struct test *test __maybe_unused,
int test__bp_modify(struct test_suite *test __maybe_unused,
int subtest __maybe_unused)
{
TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());

View File

@ -173,7 +173,7 @@ static int test_data_set(struct test_data *dat_set, int x86_64)
* verbose (-v) option to see all the instructions and whether or not they
* decoded successfully.
*/
int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused)
int test__insn_x86(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
int ret = 0;

View File

@ -37,7 +37,7 @@ static pid_t spawn(void)
* the last read counter value to avoid triggering a WARN_ON_ONCE() in
* smp_call_function_many() caused by sending IPIs from NMI context.
*/
int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused)
int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
struct evlist *evlist = NULL;
struct evsel *evsel = NULL;

View File

@ -289,7 +289,7 @@ static int test_one(struct test_data *d)
* This test feeds byte sequences to the Intel PT packet decoder and checks the
* results. Changes to the packet context are also checked.
*/
int test__intel_pt_pkt_decoder(struct test *test __maybe_unused, int subtest __maybe_unused)
int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
struct test_data *d = data;
int ret;

View File

@ -157,7 +157,7 @@ static int __test__rdpmc(void)
return 0;
}
int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused)
int test__rdpmc(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
int status = 0;
int wret = 0;

View File

@ -115,7 +115,7 @@ static int do_test(u64 sample_type)
* For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type.
* The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type.
*/
int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
int test__x86_sample_parsing(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
return do_test(PERF_SAMPLE_WEIGHT_STRUCT);
}

View File

@ -17,3 +17,20 @@ int arch_evlist__add_default_attrs(struct evlist *evlist)
else
return parse_events(evlist, TOPDOWN_L1_EVENTS, NULL);
}
struct evsel *arch_evlist__leader(struct list_head *list)
{
struct evsel *evsel, *first;
first = list_first_entry(list, struct evsel, core.node);
if (!pmu_have_event("cpu", "slots"))
return first;
__evlist__for_each_entry(list, evsel) {
if (evsel->pmu_name && !strcmp(evsel->pmu_name, "cpu") &&
evsel->name && strcasestr(evsel->name, "slots"))
return evsel;
}
return first;
}

View File

@ -1,8 +1,31 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdio.h>
#include <stdlib.h>
#include "util/evsel.h"
#include "util/env.h"
#include "linux/string.h"
void arch_evsel__set_sample_weight(struct evsel *evsel)
{
evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
}
void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
{
struct perf_env env = { .total_mem = 0, } ;
if (!perf_env__cpuid(&env))
return;
/*
* On AMD, precise cycles event sampling internally uses IBS pmu.
* But IBS does not have filtering capabilities and perf by default
* sets exclude_guest = 1. This makes IBS pmu event init fail and
* thus perf ends up doing non-precise sampling. Avoid it by clearing
* exclude_guest.
*/
if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
attr->exclude_guest = 0;
free(env.cpuid);
}

View File

@ -591,6 +591,10 @@ int cmd_annotate(int argc, const char **argv)
return ret;
}
ret = symbol__validate_sym_arguments();
if (ret)
return ret;
if (quiet)
perf_quiet_option();

View File

@ -226,7 +226,6 @@ static void run_collection(struct collection *coll)
if (!bench->fn)
break;
printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
fflush(stdout);
argv[1] = bench->name;
run_bench(coll->name, bench->name, bench->fn, 1, argv);
@ -247,6 +246,9 @@ int cmd_bench(int argc, const char **argv)
struct collection *coll;
int ret = 0;
/* Unbuffered output */
setvbuf(stdout, NULL, _IONBF, 0);
if (argc < 2) {
/* No collection specified. */
print_usage();
@ -300,7 +302,6 @@ int cmd_bench(int argc, const char **argv)
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
fflush(stdout);
ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
goto end;
}

View File

@ -351,10 +351,14 @@ static int build_id_cache__show_all(void)
static int perf_buildid_cache_config(const char *var, const char *value, void *cb)
{
const char **debuginfod = cb;
struct perf_debuginfod *di = cb;
if (!strcmp(var, "buildid-cache.debuginfod"))
*debuginfod = strdup(value);
if (!strcmp(var, "buildid-cache.debuginfod")) {
di->urls = strdup(value);
if (!di->urls)
return -ENOMEM;
di->set = true;
}
return 0;
}
@ -373,8 +377,8 @@ int cmd_buildid_cache(int argc, const char **argv)
*purge_name_list_str = NULL,
*missing_filename = NULL,
*update_name_list_str = NULL,
*kcore_filename = NULL,
*debuginfod = NULL;
*kcore_filename = NULL;
struct perf_debuginfod debuginfod = { };
char sbuf[STRERR_BUFSIZE];
struct perf_data data = {
@ -399,8 +403,10 @@ int cmd_buildid_cache(int argc, const char **argv)
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
OPT_STRING('u', "update", &update_name_list_str, "file list",
"file(s) to update"),
OPT_STRING(0, "debuginfod", &debuginfod, "debuginfod url",
"set debuginfod url"),
OPT_STRING_OPTARG_SET(0, "debuginfod", &debuginfod.urls,
&debuginfod.set, "debuginfod urls",
"Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
"system"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"),
OPT_END()
@ -425,10 +431,7 @@ int cmd_buildid_cache(int argc, const char **argv)
if (argc || !(list_files || opts_flag))
usage_with_options(buildid_cache_usage, buildid_cache_options);
if (debuginfod) {
pr_debug("DEBUGINFOD_URLS=%s\n", debuginfod);
setenv("DEBUGINFOD_URLS", debuginfod, 1);
}
perf_debuginfod_setup(&debuginfod);
/* -l is exclusive. It can not be used with other options. */
if (list_files && opts_flag) {

View File

@ -2015,7 +2015,8 @@ static int setup_nodes(struct perf_session *session)
{
struct numa_node *n;
unsigned long **nodes;
int node, cpu;
int node, idx;
struct perf_cpu cpu;
int *cpu2node;
if (c2c.node_info > 2)
@ -2038,8 +2039,8 @@ static int setup_nodes(struct perf_session *session)
if (!cpu2node)
return -ENOMEM;
for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
cpu2node[cpu] = -1;
for (idx = 0; idx < c2c.cpus_cnt; idx++)
cpu2node[idx] = -1;
c2c.cpu2node = cpu2node;
@ -2057,13 +2058,13 @@ static int setup_nodes(struct perf_session *session)
if (perf_cpu_map__empty(map))
continue;
for (cpu = 0; cpu < map->nr; cpu++) {
set_bit(map->map[cpu], set);
perf_cpu_map__for_each_cpu(cpu, idx, map) {
set_bit(cpu.cpu, set);
if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
return -EINVAL;
cpu2node[map->map[cpu]] = node;
cpu2node[cpu.cpu] = node;
}
}
@ -2768,6 +2769,10 @@ static int perf_c2c__report(int argc, const char **argv)
if (c2c.stats_only)
c2c.use_stdio = true;
err = symbol__validate_sym_arguments();
if (err)
goto out;
if (!input_name || !strlen(input_name))
input_name = "perf.data";

View File

@ -1121,8 +1121,6 @@ static int setup_config(struct daemon *daemon)
#ifndef F_TLOCK
#define F_TLOCK 2
#include <sys/file.h>
static int lockf(int fd, int cmd, off_t len)
{
if (cmd != F_TLOCK || len != 0)
@ -1403,8 +1401,10 @@ static int send_cmd(struct daemon *daemon, union cmd *cmd)
static int send_cmd_list(struct daemon *daemon)
{
union cmd cmd = { .cmd = CMD_LIST, };
union cmd cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.list.cmd = CMD_LIST;
cmd.list.verbose = verbose;
cmd.list.csv_sep = daemon->csv_sep ? *daemon->csv_sep : 0;
@ -1432,6 +1432,7 @@ static int __cmd_signal(struct daemon *daemon, struct option parent_options[],
return -1;
}
memset(&cmd, 0, sizeof(cmd));
cmd.signal.cmd = CMD_SIGNAL,
cmd.signal.sig = SIGUSR2;
strncpy(cmd.signal.name, name, sizeof(cmd.signal.name) - 1);
@ -1446,7 +1447,7 @@ static int __cmd_stop(struct daemon *daemon, struct option parent_options[],
OPT_PARENT(parent_options),
OPT_END()
};
union cmd cmd = { .cmd = CMD_STOP, };
union cmd cmd;
argc = parse_options(argc, argv, start_options, daemon_usage, 0);
if (argc)
@ -1457,6 +1458,8 @@ static int __cmd_stop(struct daemon *daemon, struct option parent_options[],
return -1;
}
memset(&cmd, 0, sizeof(cmd));
cmd.cmd = CMD_STOP;
return send_cmd(daemon, &cmd);
}
@ -1470,7 +1473,7 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[],
OPT_PARENT(parent_options),
OPT_END()
};
union cmd cmd = { .cmd = CMD_PING, };
union cmd cmd;
argc = parse_options(argc, argv, ping_options, daemon_usage, 0);
if (argc)
@ -1481,6 +1484,8 @@ static int __cmd_ping(struct daemon *daemon, struct option parent_options[],
return -1;
}
memset(&cmd, 0, sizeof(cmd));
cmd.cmd = CMD_PING;
scnprintf(cmd.ping.name, sizeof(cmd.ping.name), "%s", name);
return send_cmd(daemon, &cmd);
}

View File

@ -13,7 +13,9 @@
#include <signal.h>
#include <stdlib.h>
#include <fcntl.h>
#include <math.h>
#include <poll.h>
#include <ctype.h>
#include <linux/capability.h>
#include <linux/string.h>
@ -28,36 +30,12 @@
#include "strfilter.h"
#include "util/cap.h"
#include "util/config.h"
#include "util/ftrace.h"
#include "util/units.h"
#include "util/parse-sublevel-options.h"
#define DEFAULT_TRACER "function_graph"
struct perf_ftrace {
struct evlist *evlist;
struct target target;
const char *tracer;
struct list_head filters;
struct list_head notrace;
struct list_head graph_funcs;
struct list_head nograph_funcs;
int graph_depth;
unsigned long percpu_buffer_size;
bool inherit;
int func_stack_trace;
int func_irq_info;
int graph_nosleep_time;
int graph_noirqs;
int graph_verbose;
int graph_thresh;
unsigned int initial_delay;
};
struct filter_entry {
struct list_head list;
char name[];
};
static volatile int workload_exec_errno;
static bool done;
@ -303,7 +281,7 @@ static int set_tracing_cpumask(struct perf_cpu_map *cpumap)
int ret;
int last_cpu;
last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1);
last_cpu = perf_cpu_map__cpu(cpumap, perf_cpu_map__nr(cpumap) - 1).cpu;
mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
@ -565,7 +543,24 @@ static int set_tracing_options(struct perf_ftrace *ftrace)
return 0;
}
static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
static void select_tracer(struct perf_ftrace *ftrace)
{
bool graph = !list_empty(&ftrace->graph_funcs) ||
!list_empty(&ftrace->nograph_funcs);
bool func = !list_empty(&ftrace->filters) ||
!list_empty(&ftrace->notrace);
/* The function_graph has priority over function tracer. */
if (graph)
ftrace->tracer = "function_graph";
else if (func)
ftrace->tracer = "function";
/* Otherwise, the default tracer is used. */
pr_debug("%s tracer is used\n", ftrace->tracer);
}
static int __cmd_ftrace(struct perf_ftrace *ftrace)
{
char *trace_file;
int trace_fd;
@ -586,10 +581,7 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
return -1;
}
signal(SIGINT, sig_handler);
signal(SIGUSR1, sig_handler);
signal(SIGCHLD, sig_handler);
signal(SIGPIPE, sig_handler);
select_tracer(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
@ -600,11 +592,6 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
if (write_tracing_file("trace", "0") < 0)
goto out;
if (argc && evlist__prepare_workload(ftrace->evlist, &ftrace->target, argv, false,
ftrace__workload_exec_failed_signal) < 0) {
goto out;
}
if (set_tracing_options(ftrace) < 0)
goto out_reset;
@ -693,6 +680,270 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
return (done && !workload_exec_errno) ? 0 : -1;
}
static void make_histogram(int buckets[], char *buf, size_t len, char *linebuf)
{
char *p, *q;
char *unit;
double num;
int i;
/* ensure NUL termination */
buf[len] = '\0';
/* handle data line by line */
for (p = buf; (q = strchr(p, '\n')) != NULL; p = q + 1) {
*q = '\0';
/* move it to the line buffer */
strcat(linebuf, p);
/*
* parse trace output to get function duration like in
*
* # tracer: function_graph
* #
* # CPU DURATION FUNCTION CALLS
* # | | | | | | |
* 1) + 10.291 us | do_filp_open();
* 1) 4.889 us | do_filp_open();
* 1) 6.086 us | do_filp_open();
*
*/
if (linebuf[0] == '#')
goto next;
/* ignore CPU */
p = strchr(linebuf, ')');
if (p == NULL)
p = linebuf;
while (*p && !isdigit(*p) && (*p != '|'))
p++;
/* no duration */
if (*p == '\0' || *p == '|')
goto next;
num = strtod(p, &unit);
if (!unit || strncmp(unit, " us", 3))
goto next;
i = log2(num);
if (i < 0)
i = 0;
if (i >= NUM_BUCKET)
i = NUM_BUCKET - 1;
buckets[i]++;
next:
/* empty the line buffer for the next output */
linebuf[0] = '\0';
}
/* preserve any remaining output (before newline) */
strcat(linebuf, p);
}
static void display_histogram(int buckets[])
{
int i;
int total = 0;
int bar_total = 46; /* to fit in 80 column */
char bar[] = "###############################################";
int bar_len;
for (i = 0; i < NUM_BUCKET; i++)
total += buckets[i];
if (total == 0) {
printf("No data found\n");
return;
}
printf("# %14s | %10s | %-*s |\n",
" DURATION ", "COUNT", bar_total, "GRAPH");
bar_len = buckets[0] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, "");
for (i = 1; i < NUM_BUCKET - 1; i++) {
int start = (1 << (i - 1));
int stop = 1 << i;
const char *unit = "us";
if (start >= 1024) {
start >>= 10;
stop >>= 10;
unit = "ms";
}
bar_len = buckets[i] * bar_total / total;
printf(" %4d - %-4d %s | %10d | %.*s%*s |\n",
start, stop, unit, buckets[i], bar_len, bar,
bar_total - bar_len, "");
}
bar_len = buckets[NUM_BUCKET - 1] * bar_total / total;
printf(" %4d - %-4s %s | %10d | %.*s%*s |\n",
1, "...", " s", buckets[NUM_BUCKET - 1], bar_len, bar,
bar_total - bar_len, "");
}
static int prepare_func_latency(struct perf_ftrace *ftrace)
{
char *trace_file;
int fd;
if (ftrace->target.use_bpf)
return perf_ftrace__latency_prepare_bpf(ftrace);
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
return -1;
}
/* reset ftrace buffer */
if (write_tracing_file("trace", "0") < 0)
return -1;
if (set_tracing_options(ftrace) < 0)
return -1;
/* force to use the function_graph tracer to track duration */
if (write_tracing_file("current_tracer", "function_graph") < 0) {
pr_err("failed to set current_tracer to function_graph\n");
return -1;
}
trace_file = get_tracing_file("trace_pipe");
if (!trace_file) {
pr_err("failed to open trace_pipe\n");
return -1;
}
fd = open(trace_file, O_RDONLY);
if (fd < 0)
pr_err("failed to open trace_pipe\n");
put_tracing_file(trace_file);
return fd;
}
static int start_func_latency(struct perf_ftrace *ftrace)
{
if (ftrace->target.use_bpf)
return perf_ftrace__latency_start_bpf(ftrace);
if (write_tracing_file("tracing_on", "1") < 0) {
pr_err("can't enable tracing\n");
return -1;
}
return 0;
}
static int stop_func_latency(struct perf_ftrace *ftrace)
{
if (ftrace->target.use_bpf)
return perf_ftrace__latency_stop_bpf(ftrace);
write_tracing_file("tracing_on", "0");
return 0;
}
static int read_func_latency(struct perf_ftrace *ftrace, int buckets[])
{
if (ftrace->target.use_bpf)
return perf_ftrace__latency_read_bpf(ftrace, buckets);
return 0;
}
static int cleanup_func_latency(struct perf_ftrace *ftrace)
{
if (ftrace->target.use_bpf)
return perf_ftrace__latency_cleanup_bpf(ftrace);
reset_tracing_files(ftrace);
return 0;
}
static int __cmd_latency(struct perf_ftrace *ftrace)
{
int trace_fd;
char buf[4096];
char line[256];
struct pollfd pollfd = {
.events = POLLIN,
};
int buckets[NUM_BUCKET] = { };
if (!(perf_cap__capable(CAP_PERFMON) ||
perf_cap__capable(CAP_SYS_ADMIN))) {
pr_err("ftrace only works for %s!\n",
#ifdef HAVE_LIBCAP_SUPPORT
"users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
#else
"root"
#endif
);
return -1;
}
trace_fd = prepare_func_latency(ftrace);
if (trace_fd < 0)
goto out;
fcntl(trace_fd, F_SETFL, O_NONBLOCK);
pollfd.fd = trace_fd;
if (start_func_latency(ftrace) < 0)
goto out;
evlist__start_workload(ftrace->evlist);
line[0] = '\0';
while (!done) {
if (poll(&pollfd, 1, -1) < 0)
break;
if (pollfd.revents & POLLIN) {
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n < 0)
break;
make_histogram(buckets, buf, n, line);
}
}
stop_func_latency(ftrace);
if (workload_exec_errno) {
const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
pr_err("workload failed: %s\n", emsg);
goto out;
}
/* read remaining buffer contents */
while (!ftrace->target.use_bpf) {
int n = read(trace_fd, buf, sizeof(buf) - 1);
if (n <= 0)
break;
make_histogram(buckets, buf, n, line);
}
read_func_latency(ftrace, buckets);
display_histogram(buckets);
out:
close(trace_fd);
cleanup_func_latency(ftrace);
return (done && !workload_exec_errno) ? 0 : -1;
}
static int perf_ftrace_config(const char *var, const char *value, void *cb)
{
struct perf_ftrace *ftrace = cb;
@ -855,41 +1106,21 @@ static int parse_graph_tracer_opts(const struct option *opt,
return 0;
}
static void select_tracer(struct perf_ftrace *ftrace)
{
bool graph = !list_empty(&ftrace->graph_funcs) ||
!list_empty(&ftrace->nograph_funcs);
bool func = !list_empty(&ftrace->filters) ||
!list_empty(&ftrace->notrace);
/* The function_graph has priority over function tracer. */
if (graph)
ftrace->tracer = "function_graph";
else if (func)
ftrace->tracer = "function";
/* Otherwise, the default tracer is used. */
pr_debug("%s tracer is used\n", ftrace->tracer);
}
enum perf_ftrace_subcommand {
PERF_FTRACE_NONE,
PERF_FTRACE_TRACE,
PERF_FTRACE_LATENCY,
};
int cmd_ftrace(int argc, const char **argv)
{
int ret;
int (*cmd_func)(struct perf_ftrace *) = NULL;
struct perf_ftrace ftrace = {
.tracer = DEFAULT_TRACER,
.target = { .uid = UINT_MAX, },
};
const char * const ftrace_usage[] = {
"perf ftrace [<options>] [<command>]",
"perf ftrace [<options>] -- <command> [<options>]",
NULL
};
const struct option ftrace_options[] = {
OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
"Tracer to use: function_graph(default) or function"),
OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
"Show available functions to filter",
opt_list_avail_functions, "*"),
const struct option common_options[] = {
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
"Trace on existing process id"),
/* TODO: Add short option -t after -t/--tracer can be removed. */
@ -901,6 +1132,14 @@ int cmd_ftrace(int argc, const char **argv)
"System-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
"List of cpus to monitor"),
OPT_END()
};
const struct option ftrace_options[] = {
OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
"Tracer to use: function_graph(default) or function"),
OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
"Show available functions to filter",
opt_list_avail_functions, "*"),
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Trace given functions using function tracer",
parse_filter_func),
@ -923,24 +1162,87 @@ int cmd_ftrace(int argc, const char **argv)
"Trace children processes"),
OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
"Number of milliseconds to wait before starting tracing after program start"),
OPT_END()
OPT_PARENT(common_options),
};
const struct option latency_options[] = {
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
"Show latency of given function", parse_filter_func),
#ifdef HAVE_BPF_SKEL
OPT_BOOLEAN('b', "use-bpf", &ftrace.target.use_bpf,
"Use BPF to measure function latency"),
#endif
OPT_PARENT(common_options),
};
const struct option *options = ftrace_options;
const char * const ftrace_usage[] = {
"perf ftrace [<options>] [<command>]",
"perf ftrace [<options>] -- [<command>] [<options>]",
"perf ftrace {trace|latency} [<options>] [<command>]",
"perf ftrace {trace|latency} [<options>] -- [<command>] [<options>]",
NULL
};
enum perf_ftrace_subcommand subcmd = PERF_FTRACE_NONE;
INIT_LIST_HEAD(&ftrace.filters);
INIT_LIST_HEAD(&ftrace.notrace);
INIT_LIST_HEAD(&ftrace.graph_funcs);
INIT_LIST_HEAD(&ftrace.nograph_funcs);
signal(SIGINT, sig_handler);
signal(SIGUSR1, sig_handler);
signal(SIGCHLD, sig_handler);
signal(SIGPIPE, sig_handler);
ret = perf_config(perf_ftrace_config, &ftrace);
if (ret < 0)
return -1;
argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target__none(&ftrace.target))
ftrace.target.system_wide = true;
if (argc > 1) {
if (!strcmp(argv[1], "trace")) {
subcmd = PERF_FTRACE_TRACE;
} else if (!strcmp(argv[1], "latency")) {
subcmd = PERF_FTRACE_LATENCY;
options = latency_options;
}
select_tracer(&ftrace);
if (subcmd != PERF_FTRACE_NONE) {
argc--;
argv++;
}
}
/* for backward compatibility */
if (subcmd == PERF_FTRACE_NONE)
subcmd = PERF_FTRACE_TRACE;
argc = parse_options(argc, argv, options, ftrace_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (argc < 0) {
ret = -EINVAL;
goto out_delete_filters;
}
switch (subcmd) {
case PERF_FTRACE_TRACE:
if (!argc && target__none(&ftrace.target))
ftrace.target.system_wide = true;
cmd_func = __cmd_ftrace;
break;
case PERF_FTRACE_LATENCY:
if (list_empty(&ftrace.filters)) {
pr_err("Should provide a function to measure\n");
parse_options_usage(ftrace_usage, options, "T", 1);
ret = -EINVAL;
goto out_delete_filters;
}
cmd_func = __cmd_latency;
break;
case PERF_FTRACE_NONE:
default:
pr_err("Invalid subcommand\n");
ret = -EINVAL;
goto out_delete_filters;
}
ret = target__validate(&ftrace.target);
if (ret) {
@ -961,7 +1263,15 @@ int cmd_ftrace(int argc, const char **argv)
if (ret < 0)
goto out_delete_evlist;
ret = __cmd_ftrace(&ftrace, argc, argv);
if (argc) {
ret = evlist__prepare_workload(ftrace.evlist, &ftrace.target,
argv, false,
ftrace__workload_exec_failed_signal);
if (ret < 0)
goto out_delete_evlist;
}
ret = cmd_func(&ftrace);
out_delete_evlist:
evlist__delete(ftrace.evlist);

View File

@ -535,12 +535,9 @@ static int perf_event__repipe_exit(struct perf_tool *tool,
static int perf_event__repipe_tracing_data(struct perf_session *session,
union perf_event *event)
{
int err;
perf_event__repipe_synth(session->tool, event);
err = perf_event__process_tracing_data(session, event);
return err;
return perf_event__process_tracing_data(session, event);
}
static int dso__read_build_id(struct dso *dso)
@ -819,7 +816,8 @@ static int __cmd_inject(struct perf_inject *inject)
inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
inject->tool.auxtrace = perf_event__process_auxtrace;
inject->tool.aux = perf_event__drop_aux;
inject->tool.itrace_start = perf_event__drop_aux,
inject->tool.itrace_start = perf_event__drop_aux;
inject->tool.aux_output_hw_id = perf_event__drop_aux;
inject->tool.ordered_events = true;
inject->tool.ordering_requires_timestamps = true;
/* Allow space in the header for new attributes */
@ -886,6 +884,7 @@ int cmd_inject(int argc, const char **argv)
.lost_samples = perf_event__repipe,
.aux = perf_event__repipe,
.itrace_start = perf_event__repipe,
.aux_output_hw_id = perf_event__repipe,
.context_switch = perf_event__repipe,
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
@ -942,6 +941,10 @@ int cmd_inject(int argc, const char **argv)
#endif
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show build ids, etc)"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
"don't load vmlinux even if found"),
OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
"kallsyms pathname"),
OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
@ -976,6 +979,9 @@ int cmd_inject(int argc, const char **argv)
return -1;
}
if (symbol__validate_sym_arguments())
return -1;
if (inject.in_place_update) {
if (!strcmp(inject.input_name, "-")) {
pr_err("Input file name required for in-place updating\n");

View File

@ -192,7 +192,7 @@ static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_samp
int ret = evsel__process_alloc_event(evsel, sample);
if (!ret) {
int node1 = cpu__get_node(sample->cpu),
int node1 = cpu__get_node((struct perf_cpu){.cpu = sample->cpu}),
node2 = evsel__intval(evsel, sample, "node");
if (node1 != node2)

View File

@ -1456,7 +1456,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
perf_session__set_id_hdr_size(kvm->session);
ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
kvm->evlist->core.threads, false, 1);
kvm->evlist->core.threads, true, false, 1);
err = kvm_live_open_events(kvm);
if (err)
goto out;

View File

@ -12,6 +12,7 @@
#include "util/parse-events.h"
#include "util/pmu.h"
#include "util/pmu-hybrid.h"
#include "util/debug.h"
#include "util/metricgroup.h"
#include <subcmd/pager.h>
@ -20,13 +21,15 @@
static bool desc_flag = true;
static bool details_flag;
static const char *hybrid_type;
int cmd_list(int argc, const char **argv)
{
int i;
int i, ret = 0;
bool raw_dump = false;
bool long_desc_flag = false;
bool deprecated = false;
char *pmu_name = NULL;
struct option list_options[] = {
OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
OPT_BOOLEAN('d', "desc", &desc_flag,
@ -37,6 +40,9 @@ int cmd_list(int argc, const char **argv)
"Print information on the perf event names and expressions used internally by events."),
OPT_BOOLEAN(0, "deprecated", &deprecated,
"Print deprecated events."),
OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type",
"Print events applying cpu with this type for hybrid platform "
"(e.g. core or atom)"),
OPT_INCR(0, "debug", &verbose,
"Enable debugging output"),
OPT_END()
@ -56,10 +62,16 @@ int cmd_list(int argc, const char **argv)
if (!raw_dump && pager_in_use())
printf("\nList of pre-defined events (to be used in -e):\n\n");
if (hybrid_type) {
pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type);
if (!pmu_name)
pr_warning("WARNING: hybrid cputype is not supported!\n");
}
if (argc == 0) {
print_events(NULL, raw_dump, !desc_flag, long_desc_flag,
details_flag, deprecated);
return 0;
details_flag, deprecated, pmu_name);
goto out;
}
for (i = 0; i < argc; ++i) {
@ -82,25 +94,27 @@ int cmd_list(int argc, const char **argv)
else if (strcmp(argv[i], "pmu") == 0)
print_pmu_events(NULL, raw_dump, !desc_flag,
long_desc_flag, details_flag,
deprecated);
deprecated, pmu_name);
else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(NULL, NULL, raw_dump);
else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0)
metricgroup__print(true, false, NULL, raw_dump, details_flag);
metricgroup__print(true, false, NULL, raw_dump, details_flag, pmu_name);
else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0)
metricgroup__print(false, true, NULL, raw_dump, details_flag);
metricgroup__print(false, true, NULL, raw_dump, details_flag, pmu_name);
else if ((sep = strchr(argv[i], ':')) != NULL) {
int sep_idx;
sep_idx = sep - argv[i];
s = strdup(argv[i]);
if (s == NULL)
return -1;
if (s == NULL) {
ret = -1;
goto out;
}
s[sep_idx] = '\0';
print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
print_sdt_events(s, s + sep_idx + 1, raw_dump);
metricgroup__print(true, true, s, raw_dump, details_flag);
metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name);
free(s);
} else {
if (asprintf(&s, "*%s*", argv[i]) < 0) {
@ -116,12 +130,16 @@ int cmd_list(int argc, const char **argv)
print_pmu_events(s, raw_dump, !desc_flag,
long_desc_flag,
details_flag,
deprecated);
deprecated,
pmu_name);
print_tracepoint_events(NULL, s, raw_dump);
print_sdt_events(NULL, s, raw_dump);
metricgroup__print(true, true, s, raw_dump, details_flag);
metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name);
free(s);
}
}
return 0;
out:
free(pmu_name);
return ret;
}

View File

@ -21,6 +21,7 @@
#include "util/build-id.h"
#include "util/strlist.h"
#include "util/strfilter.h"
#include "util/symbol.h"
#include "util/symbol_conf.h"
#include "util/debug.h"
#include <subcmd/parse-options.h>
@ -629,6 +630,10 @@ __cmd_probe(int argc, const char **argv)
params.command = 'a';
}
ret = symbol__validate_sym_arguments();
if (ret)
return ret;
if (params.quiet) {
if (verbose != 0) {
pr_err(" Error: -v and -q are exclusive.\n");

View File

@ -111,6 +111,7 @@ struct record {
unsigned long long samples;
struct mmap_cpu_mask affinity_mask;
unsigned long output_max_size; /* = 0: unlimited */
struct perf_debuginfod debuginfod;
};
static volatile int done;
@ -1255,6 +1256,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
{
int err;
struct perf_thread_map *thread_map;
bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
if (rec->opts.tail_synthesize != tail)
return 0;
@ -1266,6 +1268,7 @@ static int record__synthesize_workload(struct record *rec, bool tail)
err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
process_synthesized_event,
&rec->session->machines.host,
needs_mmap,
rec->opts.sample_address);
perf_thread_map__put(thread_map);
return err;
@ -1409,7 +1412,7 @@ static int record__synthesize(struct record *rec, bool tail)
goto out;
/* Synthesize id_index before auxtrace_info */
if (rec->opts.auxtrace_sample_mode) {
if (rec->opts.auxtrace_sample_mode || rec->opts.full_auxtrace) {
err = perf_event__synthesize_id_index(tool,
process_synthesized_event,
session->evlist, machine);
@ -1470,19 +1473,26 @@ static int record__synthesize(struct record *rec, bool tail)
if (err < 0)
pr_warning("Couldn't synthesize bpf events.\n");
err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
machine);
if (err < 0)
pr_warning("Couldn't synthesize cgroup events.\n");
if (rec->opts.synth & PERF_SYNTH_CGROUP) {
err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
machine);
if (err < 0)
pr_warning("Couldn't synthesize cgroup events.\n");
}
if (rec->opts.nr_threads_synthesize > 1) {
perf_set_multithreaded();
f = process_locked_synthesized_event;
}
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
f, opts->sample_address,
rec->opts.nr_threads_synthesize);
if (rec->opts.synth & PERF_SYNTH_TASK) {
bool needs_mmap = rec->opts.synth & PERF_SYNTH_MMAP;
err = __machine__synthesize_threads(machine, tool, &opts->target,
rec->evlist->core.threads,
f, needs_mmap, opts->sample_address,
rec->opts.nr_threads_synthesize);
}
if (rec->opts.nr_threads_synthesize > 1)
perf_set_singlethreaded();
@ -2168,6 +2178,12 @@ static int perf_record_config(const char *var, const char *value, void *cb)
rec->opts.nr_cblocks = nr_cblocks_default;
}
#endif
if (!strcmp(var, "record.debuginfod")) {
rec->debuginfod.urls = strdup(value);
if (!rec->debuginfod.urls)
return -ENOMEM;
rec->debuginfod.set = true;
}
return 0;
}
@ -2258,6 +2274,10 @@ static int record__parse_mmap_pages(const struct option *opt,
return ret;
}
void __weak arch__add_leaf_frame_record_opts(struct record_opts *opts __maybe_unused)
{
}
static int parse_control_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
@ -2391,6 +2411,26 @@ static int process_timestamp_boundary(struct perf_tool *tool,
return 0;
}
static int parse_record_synth_option(const struct option *opt,
const char *str,
int unset __maybe_unused)
{
struct record_opts *opts = opt->value;
char *p = strdup(str);
if (p == NULL)
return -1;
opts->synth = parse_synth_opt(p);
free(p);
if (opts->synth < 0) {
pr_err("Invalid synth option: %s\n", str);
return -1;
}
return 0;
}
/*
* XXX Ideally would be local to cmd_record() and passed to a record__new
* because we need to have access to it in record__exit, that is called
@ -2416,6 +2456,7 @@ static struct record record = {
.nr_threads_synthesize = 1,
.ctl_fd = -1,
.ctl_fd_ack = -1,
.synth = PERF_SYNTH_ALL,
},
.tool = {
.sample = process_sample_event,
@ -2631,6 +2672,12 @@ static struct option __record_options[] = {
"\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.\n"
"\t\t\t Alternatively, ctl-fifo / ack-fifo will be opened and used as ctl-fd / ack-fd.",
parse_control_option),
OPT_CALLBACK(0, "synth", &record.opts, "no|all|task|mmap|cgroup",
"Fine-tune event synthesis: default=all", parse_record_synth_option),
OPT_STRING_OPTARG_SET(0, "debuginfod", &record.debuginfod.urls,
&record.debuginfod.set, "debuginfod urls",
"Enable debuginfod data retrieval from DEBUGINFOD_URLS or specified urls",
"system"),
OPT_END()
};
@ -2680,6 +2727,12 @@ int cmd_record(int argc, const char **argv)
if (quiet)
perf_quiet_option();
err = symbol__validate_sym_arguments();
if (err)
return err;
perf_debuginfod_setup(&record.debuginfod);
/* Make system wide (-a) the default target. */
if (!argc && target__none(&rec->opts.target))
rec->opts.target.system_wide = true;
@ -2756,7 +2809,7 @@ int cmd_record(int argc, const char **argv)
symbol__init(NULL);
if (rec->opts.affinity != PERF_AFFINITY_SYS) {
rec->affinity_mask.nbits = cpu__max_cpu();
rec->affinity_mask.nbits = cpu__max_cpu().cpu;
rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits);
if (!rec->affinity_mask.bits) {
pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
@ -2862,6 +2915,10 @@ int cmd_record(int argc, const char **argv)
}
rec->opts.target.hybrid = perf_pmu__has_hybrid();
if (callchain_param.enabled && callchain_param.record_mode == CALLCHAIN_FP)
arch__add_leaf_frame_record_opts(&rec->opts);
err = -ENOMEM;
if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
usage_with_options(record_usage, record_options);

View File

@ -410,7 +410,7 @@ static int report__setup_sample_type(struct report *rep)
}
}
callchain_param_setup(sample_type);
callchain_param_setup(sample_type, perf_env__arch(&rep->session->header.env));
if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
@ -1127,7 +1127,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(*pevlist);
callchain_param_setup(sample_type);
callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
return 0;
}
@ -1381,18 +1381,9 @@ int cmd_report(int argc, const char **argv)
if (quiet)
perf_quiet_option();
if (symbol_conf.vmlinux_name &&
access(symbol_conf.vmlinux_name, R_OK)) {
pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
ret = -EINVAL;
ret = symbol__validate_sym_arguments();
if (ret)
goto exit;
}
if (symbol_conf.kallsyms_name &&
access(symbol_conf.kallsyms_name, R_OK)) {
pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name);
ret = -EINVAL;
goto exit;
}
if (report.inverted_callchain)
callchain_param.order = ORDER_CALLER;

View File

@ -167,7 +167,7 @@ struct trace_sched_handler {
struct perf_sched_map {
DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
int *comp_cpus;
struct perf_cpu *comp_cpus;
bool comp;
struct perf_thread_map *color_pids;
const char *color_pids_str;
@ -191,7 +191,7 @@ struct perf_sched {
* Track the current task - that way we can know whether there's any
* weird events, such as a task being switched away that is not current.
*/
int max_cpu;
struct perf_cpu max_cpu;
u32 curr_pid[MAX_CPUS];
struct thread *curr_thread[MAX_CPUS];
char next_shortname1;
@ -1535,28 +1535,31 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
int i, this_cpu = sample->cpu;
int i;
struct perf_cpu this_cpu = {
.cpu = sample->cpu,
};
int cpus_nr;
bool new_cpu = false;
const char *color = PERF_COLOR_NORMAL;
char stimestamp[32];
BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
BUG_ON(this_cpu.cpu >= MAX_CPUS || this_cpu.cpu < 0);
if (this_cpu > sched->max_cpu)
if (this_cpu.cpu > sched->max_cpu.cpu)
sched->max_cpu = this_cpu;
if (sched->map.comp) {
cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
sched->map.comp_cpus[cpus_nr++] = this_cpu;
new_cpu = true;
}
} else
cpus_nr = sched->max_cpu;
cpus_nr = sched->max_cpu.cpu;
timestamp0 = sched->cpu_last_switched[this_cpu];
sched->cpu_last_switched[this_cpu] = timestamp;
timestamp0 = sched->cpu_last_switched[this_cpu.cpu];
sched->cpu_last_switched[this_cpu.cpu] = timestamp;
if (timestamp0)
delta = timestamp - timestamp0;
else
@ -1577,7 +1580,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
return -1;
}
sched->curr_thread[this_cpu] = thread__get(sched_in);
sched->curr_thread[this_cpu.cpu] = thread__get(sched_in);
printf(" ");
@ -1608,8 +1611,10 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
}
for (i = 0; i < cpus_nr; i++) {
int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
struct thread *curr_thread = sched->curr_thread[cpu];
struct perf_cpu cpu = {
.cpu = sched->map.comp ? sched->map.comp_cpus[i].cpu : i,
};
struct thread *curr_thread = sched->curr_thread[cpu.cpu];
struct thread_runtime *curr_tr;
const char *pid_color = color;
const char *cpu_color = color;
@ -1617,19 +1622,19 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
if (curr_thread && thread__has_color(curr_thread))
pid_color = COLOR_PIDS;
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, cpu))
continue;
if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
if (sched->map.color_cpus && perf_cpu_map__has(sched->map.color_cpus, cpu))
cpu_color = COLOR_CPUS;
if (cpu != this_cpu)
if (cpu.cpu != this_cpu.cpu)
color_fprintf(stdout, color, " ");
else
color_fprintf(stdout, cpu_color, "*");
if (sched->curr_thread[cpu]) {
curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
if (sched->curr_thread[cpu.cpu]) {
curr_tr = thread__get_runtime(sched->curr_thread[cpu.cpu]);
if (curr_tr == NULL) {
thread__put(sched_in);
return -1;
@ -1639,7 +1644,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
color_fprintf(stdout, color, " ");
}
if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
if (sched->map.cpus && !perf_cpu_map__has(sched->map.cpus, this_cpu))
goto out;
timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
@ -1929,7 +1934,7 @@ static char *timehist_get_commstr(struct thread *thread)
static void timehist_header(struct perf_sched *sched)
{
u32 ncpus = sched->max_cpu + 1;
u32 ncpus = sched->max_cpu.cpu + 1;
u32 i, j;
printf("%15s %6s ", "time", "cpu");
@ -2008,7 +2013,7 @@ static void timehist_print_sample(struct perf_sched *sched,
struct thread_runtime *tr = thread__priv(thread);
const char *next_comm = evsel__strval(evsel, sample, "next_comm");
const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
u32 max_cpus = sched->max_cpu + 1;
u32 max_cpus = sched->max_cpu.cpu + 1;
char tstr[64];
char nstr[30];
u64 wait_time;
@ -2389,7 +2394,7 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
printf("%15s [%04d] ", tstr, sample->cpu);
if (sched->show_cpu_visual)
printf(" %*s ", sched->max_cpu + 1, "");
printf(" %*s ", sched->max_cpu.cpu + 1, "");
printf(" %-*s ", comm_width, timehist_get_commstr(thread));
@ -2449,13 +2454,13 @@ static void timehist_print_migration_event(struct perf_sched *sched,
{
struct thread *thread;
char tstr[64];
u32 max_cpus = sched->max_cpu + 1;
u32 max_cpus;
u32 ocpu, dcpu;
if (sched->summary_only)
return;
max_cpus = sched->max_cpu + 1;
max_cpus = sched->max_cpu.cpu + 1;
ocpu = evsel__intval(evsel, sample, "orig_cpu");
dcpu = evsel__intval(evsel, sample, "dest_cpu");
@ -2918,7 +2923,7 @@ static void timehist_print_summary(struct perf_sched *sched,
printf(" Total scheduling time (msec): ");
print_sched_time(hist_time, 2);
printf(" (x %d)\n", sched->max_cpu);
printf(" (x %d)\n", sched->max_cpu.cpu);
}
typedef int (*sched_handler)(struct perf_tool *tool,
@ -2935,9 +2940,11 @@ static int perf_timehist__process_sample(struct perf_tool *tool,
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
int err = 0;
int this_cpu = sample->cpu;
struct perf_cpu this_cpu = {
.cpu = sample->cpu,
};
if (this_cpu > sched->max_cpu)
if (this_cpu.cpu > sched->max_cpu.cpu)
sched->max_cpu = this_cpu;
if (evsel->handler != NULL) {
@ -3054,10 +3061,10 @@ static int perf_sched__timehist(struct perf_sched *sched)
goto out;
/* pre-allocate struct for per-CPU idle stats */
sched->max_cpu = session->header.env.nr_cpus_online;
if (sched->max_cpu == 0)
sched->max_cpu = 4;
if (init_idle_threads(sched->max_cpu))
sched->max_cpu.cpu = session->header.env.nr_cpus_online;
if (sched->max_cpu.cpu == 0)
sched->max_cpu.cpu = 4;
if (init_idle_threads(sched->max_cpu.cpu))
goto out;
/* summary_only implies summary option, but don't overwrite summary if set */
@ -3209,10 +3216,10 @@ static int setup_map_cpus(struct perf_sched *sched)
{
struct perf_cpu_map *map;
sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
sched->max_cpu.cpu = sysconf(_SC_NPROCESSORS_CONF);
if (sched->map.comp) {
sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
sched->map.comp_cpus = zalloc(sched->max_cpu.cpu * sizeof(int));
if (!sched->map.comp_cpus)
return -1;
}
@ -3538,6 +3545,7 @@ int cmd_sched(int argc, const char **argv)
.fork_event = replay_fork_event,
};
unsigned int i;
int ret;
for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
sched.curr_pid[i] = -1;
@ -3598,6 +3606,9 @@ int cmd_sched(int argc, const char **argv)
parse_options_usage(NULL, timehist_options, "n", true);
return -EINVAL;
}
ret = symbol__validate_sym_arguments();
if (ret)
return ret;
return perf_sched__timehist(&sched);
} else {

View File

@ -15,6 +15,7 @@
#include "util/symbol.h"
#include "util/thread.h"
#include "util/trace-event.h"
#include "util/env.h"
#include "util/evlist.h"
#include "util/evsel.h"
#include "util/evsel_fprintf.h"
@ -122,6 +123,7 @@ enum perf_output_field {
PERF_OUTPUT_TOD = 1ULL << 32,
PERF_OUTPUT_DATA_PAGE_SIZE = 1ULL << 33,
PERF_OUTPUT_CODE_PAGE_SIZE = 1ULL << 34,
PERF_OUTPUT_INS_LAT = 1ULL << 35,
};
struct perf_script {
@ -188,6 +190,7 @@ struct output_option {
{.str = "tod", .field = PERF_OUTPUT_TOD},
{.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE},
{.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE},
{.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT},
};
enum {
@ -262,7 +265,8 @@ static struct {
PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR |
PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE,
PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE |
PERF_OUTPUT_INS_LAT,
.invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
},
@ -459,7 +463,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(WEIGHT) &&
evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT))
evsel__do_check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(SYM) &&
@ -511,7 +515,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
return -EINVAL;
if (PRINT_FIELD(PHYS_ADDR) &&
evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
evsel__do_check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(DATA_PAGE_SIZE) &&
@ -522,6 +526,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE))
return -EINVAL;
if (PRINT_FIELD(INS_LAT) &&
evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT))
return -EINVAL;
return 0;
}
@ -641,7 +649,7 @@ static int perf_session__check_output_opt(struct perf_session *session)
return 0;
}
static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask, const char *arch,
FILE *fp)
{
unsigned i = 0, r;
@ -654,7 +662,7 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r, arch), val);
}
return printed;
@ -711,17 +719,17 @@ tod_scnprintf(struct perf_script *script, char *buf, int buflen,
}
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->intr_regs,
attr->sample_regs_intr, fp);
attr->sample_regs_intr, arch, fp);
}
static int perf_sample__fprintf_uregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
struct perf_event_attr *attr, const char *arch, FILE *fp)
{
return perf_sample__fprintf_regs(&sample->user_regs,
attr->sample_regs_user, fp);
attr->sample_regs_user, arch, fp);
}
static int perf_sample__fprintf_start(struct perf_script *script,
@ -1993,6 +2001,7 @@ static void process_event(struct perf_script *script,
struct evsel_script *es = evsel->priv;
FILE *fp = es->fp;
char str[PAGE_SIZE_NAME_LEN];
const char *arch = perf_env__arch(machine->env);
if (output[type].fields == 0)
return;
@ -2039,6 +2048,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(WEIGHT))
fprintf(fp, "%16" PRIu64, sample->weight);
if (PRINT_FIELD(INS_LAT))
fprintf(fp, "%16" PRIu16, sample->ins_lat);
if (PRINT_FIELD(IP)) {
struct callchain_cursor *cursor = NULL;
@ -2056,10 +2068,10 @@ static void process_event(struct perf_script *script,
}
if (PRINT_FIELD(IREGS))
perf_sample__fprintf_iregs(sample, attr, fp);
perf_sample__fprintf_iregs(sample, attr, arch, fp);
if (PRINT_FIELD(UREGS))
perf_sample__fprintf_uregs(sample, attr, fp);
perf_sample__fprintf_uregs(sample, attr, arch, fp);
if (PRINT_FIELD(BRSTACK))
perf_sample__fprintf_brstack(sample, thread, attr, fp);
@ -2103,8 +2115,8 @@ static struct scripting_ops *scripting_ops;
static void __process_stat(struct evsel *counter, u64 tstamp)
{
int nthreads = perf_thread_map__nr(counter->core.threads);
int ncpus = evsel__nr_cpus(counter);
int cpu, thread;
int idx, thread;
struct perf_cpu cpu;
static int header_printed;
if (counter->core.system_wide)
@ -2117,13 +2129,13 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
}
for (thread = 0; thread < nthreads; thread++) {
for (cpu = 0; cpu < ncpus; cpu++) {
perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
struct perf_counts_values *counts;
counts = perf_counts(counter->counts, cpu, thread);
counts = perf_counts(counter->counts, idx, thread);
printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
counter->core.cpus->map[cpu],
cpu.cpu,
perf_thread_map__pid(counter->core.threads, thread),
counts->val,
counts->ena,
@ -2306,7 +2318,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
* on events sample_type.
*/
sample_type = evlist__combined_sample_type(evlist);
callchain_param_setup(sample_type);
callchain_param_setup(sample_type, perf_env__arch((*pevlist)->env));
/* Enable fields for callchain entries */
if (symbol_conf.use_callchain &&
@ -3456,16 +3468,7 @@ static void script__setup_sample_type(struct perf_script *script)
struct perf_session *session = script->session;
u64 sample_type = evlist__combined_sample_type(session->evlist);
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
(sample_type & PERF_SAMPLE_STACK_USER)) {
callchain_param.record_mode = CALLCHAIN_DWARF;
dwarf_callchain_users = true;
} else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
callchain_param.record_mode = CALLCHAIN_LBR;
else
callchain_param.record_mode = CALLCHAIN_FP;
}
callchain_param_setup(sample_type, perf_env__arch(session->machines.host.env));
if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
@ -3715,7 +3718,7 @@ int cmd_script(int argc, const char **argv)
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
"callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod,"
"data_page_size,code_page_size",
"data_page_size,code_page_size,ins_lat",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
@ -3836,6 +3839,9 @@ int cmd_script(int argc, const char **argv)
data.path = input_name;
data.force = symbol_conf.force;
if (symbol__validate_sym_arguments())
return -1;
if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
if (!rec_script_path)

View File

@ -230,11 +230,12 @@ static bool cpus_map_matched(struct evsel *a, struct evsel *b)
if (!a->core.cpus || !b->core.cpus)
return false;
if (a->core.cpus->nr != b->core.cpus->nr)
if (perf_cpu_map__nr(a->core.cpus) != perf_cpu_map__nr(b->core.cpus))
return false;
for (int i = 0; i < a->core.cpus->nr; i++) {
if (a->core.cpus->map[i] != b->core.cpus->map[i])
for (int i = 0; i < perf_cpu_map__nr(a->core.cpus); i++) {
if (perf_cpu_map__cpu(a->core.cpus, i).cpu !=
perf_cpu_map__cpu(b->core.cpus, i).cpu)
return false;
}
@ -327,34 +328,35 @@ static int write_stat_round_event(u64 tm, u64 type)
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 thread,
struct perf_counts_values *count)
{
struct perf_sample_id *sid = SID(counter, cpu, thread);
struct perf_sample_id *sid = SID(counter, cpu_map_idx, thread);
struct perf_cpu cpu = perf_cpu_map__cpu(evsel__cpus(counter), cpu_map_idx);
return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
process_synthesized_event, NULL);
}
static int read_single_counter(struct evsel *counter, int cpu,
static int read_single_counter(struct evsel *counter, int cpu_map_idx,
int thread, struct timespec *rs)
{
if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
struct perf_counts_values *count =
perf_counts(counter->counts, cpu, thread);
perf_counts(counter->counts, cpu_map_idx, thread);
count->ena = count->run = val;
count->val = val;
return 0;
}
return evsel__read_counter(counter, cpu, thread);
return evsel__read_counter(counter, cpu_map_idx, thread);
}
/*
* Read out the results of a single counter:
* do not aggregate counts across CPUs in system-wide mode
*/
static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu_map_idx)
{
int nthreads = perf_thread_map__nr(evsel_list->core.threads);
int thread;
@ -368,24 +370,24 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
for (thread = 0; thread < nthreads; thread++) {
struct perf_counts_values *count;
count = perf_counts(counter->counts, cpu, thread);
count = perf_counts(counter->counts, cpu_map_idx, thread);
/*
* The leader's group read loads data into its group members
* (via evsel__read_counter()) and sets their count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
if (!perf_counts__is_loaded(counter->counts, cpu_map_idx, thread) &&
read_single_counter(counter, cpu_map_idx, thread, rs)) {
counter->counts->scaled = -1;
perf_counts(counter->counts, cpu, thread)->ena = 0;
perf_counts(counter->counts, cpu, thread)->run = 0;
perf_counts(counter->counts, cpu_map_idx, thread)->ena = 0;
perf_counts(counter->counts, cpu_map_idx, thread)->run = 0;
return -1;
}
perf_counts__set_loaded(counter->counts, cpu, thread, false);
perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, false);
if (STAT_RECORD) {
if (evsel__write_stat_event(counter, cpu, thread, count)) {
if (evsel__write_stat_event(counter, cpu_map_idx, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
@ -395,7 +397,8 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
evsel__name(counter),
cpu,
perf_cpu_map__cpu(evsel__cpus(counter),
cpu_map_idx).cpu,
count->val, count->ena, count->run);
}
}
@ -405,36 +408,33 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
static int read_affinity_counters(struct timespec *rs)
{
struct evsel *counter;
struct affinity affinity;
int i, ncpus, cpu;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity;
if (all_counters_use_bpf)
return 0;
if (affinity__setup(&affinity) < 0)
return -1;
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
if (!target__has_cpu(&target) || target__has_per_thread(&target))
ncpus = 1;
evlist__for_each_cpu(evsel_list, i, cpu) {
if (i >= ncpus)
break;
affinity__set(&affinity, cpu);
affinity = NULL;
else if (affinity__setup(&saved_affinity) < 0)
return -1;
else
affinity = &saved_affinity;
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (evsel__is_bpf(counter))
continue;
if (!counter->err) {
counter->err = read_counter_cpu(counter, rs,
counter->cpu_iter - 1);
}
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
struct evsel *counter = evlist_cpu_itr.evsel;
if (evsel__is_bpf(counter))
continue;
if (!counter->err) {
counter->err = read_counter_cpu(counter, rs,
evlist_cpu_itr.cpu_map_idx);
}
}
affinity__cleanup(&affinity);
if (affinity)
affinity__cleanup(&saved_affinity);
return 0;
}
@ -788,8 +788,9 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int status = 0;
const bool forks = (argc > 0);
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct affinity affinity;
int i, cpu, err;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity = NULL;
int err;
bool second_pass = false;
if (forks) {
@ -803,8 +804,11 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (group)
evlist__set_leader(evsel_list);
if (affinity__setup(&affinity) < 0)
return -1;
if (!cpu_map__is_dummy(evsel_list->core.cpus)) {
if (affinity__setup(&saved_affinity) < 0)
return -1;
affinity = &saved_affinity;
}
evlist__for_each_entry(evsel_list, counter) {
if (bpf_counter__load(counter, &target))
@ -813,56 +817,53 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
all_counters_use_bpf = false;
}
evlist__for_each_cpu (evsel_list, i, cpu) {
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
/*
* bperf calls evsel__open_per_cpu() in bperf__load(), so
* no need to call it again here.
*/
if (target.use_bpf)
break;
affinity__set(&affinity, cpu);
evlist__for_each_entry(evsel_list, counter) {
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (counter->reset_group || counter->errored)
continue;
if (evsel__is_bpf(counter))
continue;
if (counter->reset_group || counter->errored)
continue;
if (evsel__is_bpf(counter))
continue;
try_again:
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {
/*
* Weak group failed. We cannot just undo this here
* because earlier CPUs might be in group mode, and the kernel
* doesn't support mixing group and non group reads. Defer
* it to later.
* Don't close here because we're in the wrong affinity.
*/
if ((errno == EINVAL || errno == EBADF) &&
evsel__leader(counter) != counter &&
counter->weak_group) {
evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
second_pass = true;
continue;
}
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again;
case COUNTER_SKIP:
continue;
default:
break;
}
if (create_perf_stat_counter(counter, &stat_config, &target,
evlist_cpu_itr.cpu_map_idx) < 0) {
/*
* Weak group failed. We cannot just undo this here
* because earlier CPUs might be in group mode, and the kernel
* doesn't support mixing group and non group reads. Defer
* it to later.
* Don't close here because we're in the wrong affinity.
*/
if ((errno == EINVAL || errno == EBADF) &&
evsel__leader(counter) != counter &&
counter->weak_group) {
evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
second_pass = true;
continue;
}
counter->supported = true;
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again;
case COUNTER_SKIP:
continue;
default:
break;
}
}
counter->supported = true;
}
if (second_pass) {
@ -871,45 +872,43 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
* and also close errored counters.
*/
evlist__for_each_cpu(evsel_list, i, cpu) {
affinity__set(&affinity, cpu);
/* First close errored or weak retry */
evlist__for_each_entry(evsel_list, counter) {
if (!counter->reset_group && !counter->errored)
continue;
if (evsel__cpu_iter_skip_no_inc(counter, cpu))
continue;
perf_evsel__close_cpu(&counter->core, counter->cpu_iter);
}
/* Now reopen weak */
evlist__for_each_entry(evsel_list, counter) {
if (!counter->reset_group && !counter->errored)
continue;
if (evsel__cpu_iter_skip(counter, cpu))
continue;
if (!counter->reset_group)
continue;
try_again_reset:
pr_debug2("reopening weak %s\n", evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {
/* First close errored or weak retry */
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again_reset;
case COUNTER_SKIP:
continue;
default:
break;
}
if (!counter->reset_group && !counter->errored)
continue;
perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
}
/* Now reopen weak */
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
if (!counter->reset_group && !counter->errored)
continue;
if (!counter->reset_group)
continue;
try_again_reset:
pr_debug2("reopening weak %s\n", evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
evlist_cpu_itr.cpu_map_idx) < 0) {
switch (stat_handle_error(counter)) {
case COUNTER_FATAL:
return -1;
case COUNTER_RETRY:
goto try_again_reset;
case COUNTER_SKIP:
continue;
default:
break;
}
counter->supported = true;
}
counter->supported = true;
}
}
affinity__cleanup(&affinity);
affinity__cleanup(affinity);
evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported) {
@ -1168,6 +1167,26 @@ static int parse_stat_cgroups(const struct option *opt,
return parse_cgroups(opt, str, unset);
}
static int parse_hybrid_type(const struct option *opt,
const char *str,
int unset __maybe_unused)
{
struct evlist *evlist = *(struct evlist **)opt->value;
if (!list_empty(&evlist->core.entries)) {
fprintf(stderr, "Must define cputype before events/metrics\n");
return -1;
}
evlist->hybrid_pmu_name = perf_pmu__hybrid_type_to_pmu(str);
if (!evlist->hybrid_pmu_name) {
fprintf(stderr, "--cputype %s is not supported!\n", str);
return -1;
}
return 0;
}
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@ -1282,6 +1301,10 @@ static struct option stat_options[] = {
"don't print 'summary' for CSV summary output"),
OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
"don't print output (useful with record)"),
OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
"Only enable events on applying cpu with this type "
"for hybrid platform (e.g. core or atom)",
parse_hybrid_type),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
"libpfm4 event selector. use 'perf list' to list available events",
@ -1298,70 +1321,75 @@ static struct option stat_options[] = {
OPT_END()
};
static const char *const aggr_mode__string[] = {
[AGGR_CORE] = "core",
[AGGR_DIE] = "die",
[AGGR_GLOBAL] = "global",
[AGGR_NODE] = "node",
[AGGR_NONE] = "none",
[AGGR_SOCKET] = "socket",
[AGGR_THREAD] = "thread",
[AGGR_UNSET] = "unset",
};
static struct aggr_cpu_id perf_stat__get_socket(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
struct perf_cpu cpu)
{
return cpu_map__get_socket(map, cpu, NULL);
return aggr_cpu_id__socket(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_die(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
struct perf_cpu cpu)
{
return cpu_map__get_die(map, cpu, NULL);
return aggr_cpu_id__die(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_core(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
struct perf_cpu cpu)
{
return cpu_map__get_core(map, cpu, NULL);
return aggr_cpu_id__core(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int cpu)
struct perf_cpu cpu)
{
return cpu_map__get_node(map, cpu, NULL);
return aggr_cpu_id__node(cpu, /*data=*/NULL);
}
static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
aggr_get_id_t get_id, struct perf_cpu_map *map, int idx)
aggr_get_id_t get_id, struct perf_cpu cpu)
{
int cpu;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (idx >= map->nr)
return id;
if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
cpu = map->map[idx];
if (cpu_map__aggr_cpu_id_is_empty(config->cpus_aggr_map->map[cpu]))
config->cpus_aggr_map->map[cpu] = get_id(config, map, idx);
id = config->cpus_aggr_map->map[cpu];
id = config->cpus_aggr_map->map[cpu.cpu];
return id;
}
static struct aggr_cpu_id perf_stat__get_socket_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_socket, map, idx);
return perf_stat__get_aggr(config, perf_stat__get_socket, cpu);
}
static struct aggr_cpu_id perf_stat__get_die_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_die, map, idx);
return perf_stat__get_aggr(config, perf_stat__get_die, cpu);
}
static struct aggr_cpu_id perf_stat__get_core_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
return perf_stat__get_aggr(config, perf_stat__get_core, cpu);
}
static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *config,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_stat__get_aggr(config, perf_stat__get_node, map, idx);
return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
}
static bool term_percore_set(void)
@ -1376,54 +1404,67 @@ static bool term_percore_set(void)
return false;
}
static int perf_stat_init_aggr_mode(void)
static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
{
int nr;
switch (stat_config.aggr_mode) {
switch (aggr_mode) {
case AGGR_SOCKET:
if (cpu_map__build_socket_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build socket map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_socket_cached;
break;
return aggr_cpu_id__socket;
case AGGR_DIE:
if (cpu_map__build_die_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build die map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_die_cached;
break;
return aggr_cpu_id__die;
case AGGR_CORE:
if (cpu_map__build_core_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_core_cached;
break;
return aggr_cpu_id__core;
case AGGR_NODE:
if (cpu_map__build_node_map(evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_node_cached;
break;
return aggr_cpu_id__node;
case AGGR_NONE:
if (term_percore_set()) {
if (cpu_map__build_core_map(evsel_list->core.cpus,
&stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_core_cached;
}
break;
if (term_percore_set())
return aggr_cpu_id__core;
return NULL;
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
break;
return NULL;
}
}
static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
{
switch (aggr_mode) {
case AGGR_SOCKET:
return perf_stat__get_socket_cached;
case AGGR_DIE:
return perf_stat__get_die_cached;
case AGGR_CORE:
return perf_stat__get_core_cached;
case AGGR_NODE:
return perf_stat__get_node_cached;
case AGGR_NONE:
if (term_percore_set()) {
return perf_stat__get_core_cached;
}
return NULL;
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
return NULL;
}
}
static int perf_stat_init_aggr_mode(void)
{
int nr;
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
if (get_id) {
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus,
get_id, /*data=*/NULL);
if (!stat_config.aggr_map) {
pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
return -1;
}
stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
}
/*
@ -1431,7 +1472,7 @@ static int perf_stat_init_aggr_mode(void)
* taking the highest cpu number to be the size of
* the aggregation translate cpumap.
*/
nr = perf_cpu_map__max(evsel_list->core.cpus);
nr = perf_cpu_map__max(evsel_list->core.cpus).cpu;
stat_config.cpus_aggr_map = cpu_aggr_map__empty_new(nr + 1);
return stat_config.cpus_aggr_map ? 0 : -ENOMEM;
}
@ -1459,169 +1500,139 @@ static void perf_stat__exit_aggr_mode(void)
stat_config.cpus_aggr_map = NULL;
}
static inline int perf_env__get_cpu(struct perf_env *env, struct perf_cpu_map *map, int idx)
{
int cpu;
if (idx > map->nr)
return -1;
cpu = map->map[idx];
if (cpu >= env->nr_cpus_avail)
return -1;
return cpu;
}
static struct aggr_cpu_id perf_env__get_socket(struct perf_cpu_map *map, int idx, void *data)
static struct aggr_cpu_id perf_env__get_socket_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
int cpu = perf_env__get_cpu(env, map, idx);
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (cpu != -1)
id.socket = env->cpu[cpu].socket_id;
if (cpu.cpu != -1)
id.socket = env->cpu[cpu.cpu].socket_id;
return id;
}
static struct aggr_cpu_id perf_env__get_die(struct perf_cpu_map *map, int idx, void *data)
static struct aggr_cpu_id perf_env__get_die_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
int cpu = perf_env__get_cpu(env, map, idx);
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (cpu != -1) {
if (cpu.cpu != -1) {
/*
* die_id is relative to socket, so start
* with the socket ID and then add die to
* make a unique ID.
*/
id.socket = env->cpu[cpu].socket_id;
id.die = env->cpu[cpu].die_id;
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
}
return id;
}
static struct aggr_cpu_id perf_env__get_core(struct perf_cpu_map *map, int idx, void *data)
static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
struct perf_env *env = data;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
int cpu = perf_env__get_cpu(env, map, idx);
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (cpu != -1) {
if (cpu.cpu != -1) {
/*
* core_id is relative to socket and die,
* we need a global id. So we set
* socket, die id and core id
*/
id.socket = env->cpu[cpu].socket_id;
id.die = env->cpu[cpu].die_id;
id.core = env->cpu[cpu].core_id;
id.socket = env->cpu[cpu.cpu].socket_id;
id.die = env->cpu[cpu.cpu].die_id;
id.core = env->cpu[cpu.cpu].core_id;
}
return id;
}
static struct aggr_cpu_id perf_env__get_node(struct perf_cpu_map *map, int idx, void *data)
static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
{
int cpu = perf_env__get_cpu(data, map, idx);
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id = aggr_cpu_id__empty();
id.node = perf_env__numa_node(data, cpu);
return id;
}
static int perf_env__build_socket_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct cpu_aggr_map **sockp)
{
return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
}
static int perf_env__build_die_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct cpu_aggr_map **diep)
{
return cpu_map__build_map(cpus, diep, perf_env__get_die, env);
}
static int perf_env__build_core_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct cpu_aggr_map **corep)
{
return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
}
static int perf_env__build_node_map(struct perf_env *env, struct perf_cpu_map *cpus,
struct cpu_aggr_map **nodep)
{
return cpu_map__build_map(cpus, nodep, perf_env__get_node, env);
}
static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
return perf_env__get_socket_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_die_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_env__get_die(map, idx, &perf_stat.session->header.env);
return perf_env__get_die_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_env__get_core(map, idx, &perf_stat.session->header.env);
return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
struct perf_cpu_map *map, int idx)
struct perf_cpu cpu)
{
return perf_env__get_node(map, idx, &perf_stat.session->header.env);
return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
}
static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
{
struct perf_env *env = &st->session->header.env;
switch (stat_config.aggr_mode) {
switch (aggr_mode) {
case AGGR_SOCKET:
if (perf_env__build_socket_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build socket map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_socket_file;
break;
return perf_env__get_socket_aggr_by_cpu;
case AGGR_DIE:
if (perf_env__build_die_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build die map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_die_file;
break;
return perf_env__get_die_aggr_by_cpu;
case AGGR_CORE:
if (perf_env__build_core_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_core_file;
break;
return perf_env__get_core_aggr_by_cpu;
case AGGR_NODE:
if (perf_env__build_node_map(env, evsel_list->core.cpus, &stat_config.aggr_map)) {
perror("cannot build core map");
return -1;
}
stat_config.aggr_get_id = perf_stat__get_node_file;
break;
return perf_env__get_node_aggr_by_cpu;
case AGGR_NONE:
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
break;
return NULL;
}
}
static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
{
switch (aggr_mode) {
case AGGR_SOCKET:
return perf_stat__get_socket_file;
case AGGR_DIE:
return perf_stat__get_die_file;
case AGGR_CORE:
return perf_stat__get_core_file;
case AGGR_NODE:
return perf_stat__get_node_file;
case AGGR_NONE:
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
default:
return NULL;
}
}
static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
{
struct perf_env *env = &st->session->header.env;
aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
if (!get_id)
return 0;
stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.cpus, get_id, env);
if (!stat_config.aggr_map) {
pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
return -1;
}
stat_config.aggr_get_id = aggr_mode__get_id_file(stat_config.aggr_mode);
return 0;
}
@ -1750,14 +1761,12 @@ static int add_default_attributes(void)
(PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
(PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
};
struct parse_events_error errinfo;
/* Set attrs if no event is selected and !null_run: */
if (stat_config.null_run)
return 0;
bzero(&errinfo, sizeof(errinfo));
if (transaction_run) {
struct parse_events_error errinfo;
/* Handle -T as -M transaction. Once platform specific metrics
* support has been added to the json files, all architectures
* will use this approach. To determine transaction support
@ -1772,6 +1781,7 @@ static int add_default_attributes(void)
&stat_config.metric_events);
}
parse_events_error__init(&errinfo);
if (pmu_have_event("cpu", "cycles-ct") &&
pmu_have_event("cpu", "el-start"))
err = parse_events(evsel_list, transaction_attrs,
@ -1782,13 +1792,14 @@ static int add_default_attributes(void)
&errinfo);
if (err) {
fprintf(stderr, "Cannot set up transaction events\n");
parse_events_print_error(&errinfo, transaction_attrs);
return -1;
parse_events_error__print(&errinfo, transaction_attrs);
}
return 0;
parse_events_error__exit(&errinfo);
return err ? -1 : 0;
}
if (smi_cost) {
struct parse_events_error errinfo;
int smi;
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
@ -1804,23 +1815,23 @@ static int add_default_attributes(void)
smi_reset = true;
}
if (pmu_have_event("msr", "aperf") &&
pmu_have_event("msr", "smi")) {
if (!force_metric_only)
stat_config.metric_only = true;
err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
} else {
if (!pmu_have_event("msr", "aperf") ||
!pmu_have_event("msr", "smi")) {
fprintf(stderr, "To measure SMI cost, it needs "
"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
parse_events_print_error(&errinfo, smi_cost_attrs);
return -1;
}
if (!force_metric_only)
stat_config.metric_only = true;
parse_events_error__init(&errinfo);
err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
if (err) {
parse_events_print_error(&errinfo, smi_cost_attrs);
parse_events_error__print(&errinfo, smi_cost_attrs);
fprintf(stderr, "Cannot set up SMI cost events\n");
return -1;
}
return 0;
parse_events_error__exit(&errinfo);
return err ? -1 : 0;
}
if (topdown_run) {
@ -1875,18 +1886,22 @@ static int add_default_attributes(void)
return -1;
}
if (topdown_attrs[0] && str) {
struct parse_events_error errinfo;
if (warn)
arch_topdown_group_warn();
setup_metrics:
parse_events_error__init(&errinfo);
err = parse_events(evsel_list, str, &errinfo);
if (err) {
fprintf(stderr,
"Cannot set up top down events %s: %d\n",
str, err);
parse_events_print_error(&errinfo, str);
parse_events_error__print(&errinfo, str);
parse_events_error__exit(&errinfo);
free(str);
return -1;
}
parse_events_error__exit(&errinfo);
} else {
fprintf(stderr, "System does not support topdown\n");
return -1;
@ -1896,6 +1911,7 @@ static int add_default_attributes(void)
if (!evsel_list->core.nr_entries) {
if (perf_pmu__has_hybrid()) {
struct parse_events_error errinfo;
const char *hybrid_str = "cycles,instructions,branches,branch-misses";
if (target__has_cpu(&target))
@ -1906,15 +1922,16 @@ static int add_default_attributes(void)
return -1;
}
parse_events_error__init(&errinfo);
err = parse_events(evsel_list, hybrid_str, &errinfo);
if (err) {
fprintf(stderr,
"Cannot set up hybrid events %s: %d\n",
hybrid_str, err);
parse_events_print_error(&errinfo, hybrid_str);
return -1;
parse_events_error__print(&errinfo, hybrid_str);
}
return err;
parse_events_error__exit(&errinfo);
return err ? -1 : 0;
}
if (target__has_cpu(&target))

View File

@ -1271,7 +1271,7 @@ static int __cmd_top(struct perf_top *top)
pr_debug("Couldn't synthesize cgroup events.\n");
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->core.threads, false,
top->evlist->core.threads, true, false,
top->nr_threads_synthesize);
if (top->nr_threads_synthesize > 1)
@ -1618,6 +1618,10 @@ int cmd_top(int argc, const char **argv)
if (argc)
usage_with_options(top_usage, options);
status = symbol__validate_sym_arguments();
if (status)
goto out_delete_evlist;
if (annotate_check_args(&top.annotation_opts) < 0)
goto out_delete_evlist;

View File

@ -979,6 +979,8 @@ static struct syscall_fmt syscall_fmts[] = {
.arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
{ .name = "getrlimit",
.arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
{ .name = "getsockopt",
.arg = { [1] = STRARRAY(level, socket_level), }, },
{ .name = "gettid", .errpid = true, },
{ .name = "ioctl",
.arg = {
@ -1121,6 +1123,8 @@ static struct syscall_fmt syscall_fmts[] = {
.arg = { [0] = STRARRAY(which, itimers), }, },
{ .name = "setrlimit",
.arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
{ .name = "setsockopt",
.arg = { [1] = STRARRAY(level, socket_level), }, },
{ .name = "socket",
.arg = { [0] = STRARRAY(family, socket_families),
[1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
@ -1532,13 +1536,20 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
return fprintf(fp, " ? ");
}
static pid_t workload_pid = -1;
static bool done = false;
static bool interrupted = false;
static void sig_handler(int sig)
static void sighandler_interrupt(int sig __maybe_unused)
{
done = true;
interrupted = sig == SIGINT;
done = interrupted = true;
}
static void sighandler_chld(int sig __maybe_unused, siginfo_t *info,
void *context __maybe_unused)
{
if (info->si_pid == workload_pid)
done = true;
}
static size_t trace__fprintf_comm_tid(struct trace *trace, struct thread *thread, FILE *fp)
@ -1628,8 +1639,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
goto out;
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
evlist->core.threads, trace__tool_process, false,
1);
evlist->core.threads, trace__tool_process,
true, false, 1);
out:
if (err)
symbol__exit();
@ -2722,6 +2733,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
offset = format_field__intval(field, sample, evsel->needs_swap);
syscall_arg.len = offset >> 16;
offset &= 0xffff;
if (field->flags & TEP_FIELD_IS_RELATIVE)
offset += field->offset + field->size;
}
val = (uintptr_t)(sample->raw_data + offset);
@ -3063,15 +3076,11 @@ static bool evlist__add_vfs_getname(struct evlist *evlist)
struct parse_events_error err;
int ret;
bzero(&err, sizeof(err));
parse_events_error__init(&err);
ret = parse_events(evlist, "probe:vfs_getname*", &err);
if (ret) {
free(err.str);
free(err.help);
free(err.first_str);
free(err.first_help);
parse_events_error__exit(&err);
if (ret)
return false;
}
evlist__for_each_entry_safe(evlist, evsel, tmp) {
if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
@ -3257,10 +3266,21 @@ static void trace__set_bpf_map_syscalls(struct trace *trace)
static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
{
struct bpf_program *pos, *prog = NULL;
const char *sec_name;
if (trace->bpf_obj == NULL)
return NULL;
return bpf_object__find_program_by_title(trace->bpf_obj, name);
bpf_object__for_each_program(pos, trace->bpf_obj) {
sec_name = bpf_program__section_name(pos);
if (sec_name && !strcmp(sec_name, name)) {
prog = pos;
break;
}
}
return prog;
}
static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
@ -3950,6 +3970,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
evlist__add(evlist, pgfault_min);
}
/* Enable ignoring missing threads when -u/-p option is defined. */
trace->opts.ignore_missing_thread = trace->opts.target.uid != UINT_MAX || trace->opts.target.pid;
if (trace->sched &&
evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
@ -4001,6 +4024,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
fprintf(trace->output, "Couldn't run the workload!\n");
goto out_delete_evlist;
}
workload_pid = evlist->workload.pid;
}
err = evlist__open(evlist);
@ -4870,11 +4894,16 @@ int cmd_trace(int argc, const char **argv)
const char * const trace_subcommands[] = { "record", NULL };
int err = -1;
char bf[BUFSIZ];
struct sigaction sigchld_act;
signal(SIGSEGV, sighandler_dump_stack);
signal(SIGFPE, sighandler_dump_stack);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
signal(SIGINT, sighandler_interrupt);
memset(&sigchld_act, 0, sizeof(sigchld_act));
sigchld_act.sa_flags = SA_SIGINFO;
sigchld_act.sa_sigaction = sighandler_chld;
sigaction(SIGCHLD, &sigchld_act, NULL);
trace.evlist = evlist__new();
trace.sctbl = syscalltbl__new();
@ -4925,12 +4954,13 @@ int cmd_trace(int argc, const char **argv)
if (trace.perfconfig_events != NULL) {
struct parse_events_error parse_err;
bzero(&parse_err, sizeof(parse_err));
parse_events_error__init(&parse_err);
err = parse_events(trace.evlist, trace.perfconfig_events, &parse_err);
if (err) {
parse_events_print_error(&parse_err, trace.perfconfig_events);
if (err)
parse_events_error__print(&parse_err, trace.perfconfig_events);
parse_events_error__exit(&parse_err);
if (err)
goto out;
}
}
if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {

View File

@ -26,6 +26,7 @@ include/vdso/bits.h
include/linux/const.h
include/vdso/const.h
include/linux/hash.h
include/linux/list-sort.h
include/uapi/linux/hw_breakpoint.h
arch/x86/include/asm/disabled-features.h
arch/x86/include/asm/required-features.h
@ -150,6 +151,7 @@ check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
check include/linux/ctype.h '-I "isdigit("'
check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
check lib/list_sort.c '-I "^#include <linux/bug.h>"'
# diff non-symmetric files
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl

View File

@ -106,6 +106,9 @@ enum perf_hw_id {
PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
PERF_COUNT_HW_BRANCH_MISSES = 5,
PERF_COUNT_HW_BUS_CYCLES = 6,
PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
PERF_COUNT_HW_REF_CPU_CYCLES = 9,
};
These are standardized types of events that work relatively uniformly

View File

@ -11,7 +11,7 @@
static int get_cpu_set_size(void)
{
int sz = cpu__max_cpu() + 8 - 1;
int sz = cpu__max_cpu().cpu + 8 - 1;
/*
* sched_getaffinity doesn't like masks smaller than the kernel.
* Hopefully that's big enough.
@ -62,7 +62,7 @@ void affinity__set(struct affinity *a, int cpu)
clear_bit(cpu, a->sched_cpus);
}
void affinity__cleanup(struct affinity *a)
static void __affinity__cleanup(struct affinity *a)
{
int cpu_set_size = get_cpu_set_size();
@ -71,3 +71,9 @@ void affinity__cleanup(struct affinity *a)
zfree(&a->sched_cpus);
zfree(&a->orig_cpus);
}
void affinity__cleanup(struct affinity *a)
{
if (a != NULL)
__affinity__cleanup(a);
}

View File

@ -15,6 +15,7 @@
#include <linux/list.h>
#include <linux/perf_event.h>
#include <linux/types.h>
#include <internal/cpumap.h>
#include <asm/bitsperlong.h>
#include <asm/barrier.h>
@ -59,6 +60,7 @@ enum itrace_period_type {
#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a'))
#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a'))
#define AUXTRACE_LOG_FLG_USE_STDOUT (1 << ('o' - 'a'))
/**
* struct itrace_synth_opts - AUX area tracing synthesis options.
@ -84,6 +86,7 @@ enum itrace_period_type {
* @thread_stack: feed branches to the thread_stack
* @last_branch: add branch context to 'instruction' events
* @add_last_branch: add branch context to existing event records
* @approx_ipc: approximate IPC
* @flc: whether to synthesize first level cache events
* @llc: whether to synthesize last level cache events
* @tlb: whether to synthesize TLB events
@ -127,6 +130,7 @@ struct itrace_synth_opts {
bool thread_stack;
bool last_branch;
bool add_last_branch;
bool approx_ipc;
bool flc;
bool llc;
bool tlb;
@ -237,7 +241,7 @@ struct auxtrace_buffer {
size_t size;
pid_t pid;
pid_t tid;
int cpu;
struct perf_cpu cpu;
void *data;
off_t data_offset;
void *mmap_addr;
@ -347,7 +351,7 @@ struct auxtrace_mmap_params {
int prot;
int idx;
pid_t tid;
int cpu;
struct perf_cpu cpu;
};
/**
@ -639,6 +643,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
" d[flags]: create a debug log\n" \
" each flag must be preceded by + or -\n" \
" log flags are: a (all perf events)\n" \
" o (output to stdout)\n" \
" f: synthesize first level cache events\n" \
" m: synthesize last level cache events\n" \
" t: synthesize TLB events\n" \
@ -649,6 +654,8 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
" L[len]: synthesize last branch entries on existing event records\n" \
" sNUMBER: skip initial number of events\n" \
" q: quicker (less detailed) decoding\n" \
" A: approximate IPC\n" \
" Z: prefer to ignore timestamps (so-called \"timeless\" decoding)\n" \
" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
" concatenate multiple options. Default is ibxwpe or cewp\n"

View File

@ -10,6 +10,7 @@
#include <internal/lib.h>
#include <symbol/kallsyms.h>
#include "bpf-event.h"
#include "bpf-utils.h"
#include "debug.h"
#include "dso.h"
#include "symbol.h"
@ -32,7 +33,32 @@ struct btf * __weak btf__load_from_kernel_by_id(__u32 id)
return err ? ERR_PTR(err) : btf;
}
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
struct bpf_program * __weak
bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return bpf_program__next(prev, obj);
#pragma GCC diagnostic pop
}
struct bpf_map * __weak
bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return bpf_map__next(prev, obj);
#pragma GCC diagnostic pop
}
const void * __weak
btf__raw_data(const struct btf *btf_ro, __u32 *size)
{
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return btf__get_raw_data(btf_ro, size);
#pragma GCC diagnostic pop
}
static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len)
{
@ -48,9 +74,9 @@ static int machine__process_bpf_event_load(struct machine *machine,
union perf_event *event,
struct perf_sample *sample __maybe_unused)
{
struct bpf_prog_info_linear *info_linear;
struct bpf_prog_info_node *info_node;
struct perf_env *env = machine->env;
struct perf_bpil *info_linear;
int id = event->bpf.id;
unsigned int i;
@ -110,7 +136,7 @@ static int perf_env__fetch_btf(struct perf_env *env,
u32 data_size;
const void *data;
data = btf__get_raw_data(btf, &data_size);
data = btf__raw_data(btf, &data_size);
node = malloc(data_size + sizeof(struct btf_node));
if (!node)
@ -179,9 +205,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
{
struct perf_record_ksymbol *ksymbol_event = &event->ksymbol;
struct perf_record_bpf_event *bpf_event = &event->bpf;
struct bpf_prog_info_linear *info_linear;
struct perf_tool *tool = session->tool;
struct bpf_prog_info_node *info_node;
struct perf_bpil *info_linear;
struct bpf_prog_info *info;
struct btf *btf = NULL;
struct perf_env *env;
@ -195,15 +221,15 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
*/
env = session->data ? &session->header.env : &perf_env;
arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
arrays = 1UL << PERF_BPIL_JITED_KSYMS;
arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
arrays |= 1UL << PERF_BPIL_FUNC_INFO;
arrays |= 1UL << PERF_BPIL_PROG_TAGS;
arrays |= 1UL << PERF_BPIL_JITED_INSNS;
arrays |= 1UL << PERF_BPIL_LINE_INFO;
arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
info_linear = bpf_program__get_prog_info_linear(fd, arrays);
info_linear = get_bpf_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
info_linear = NULL;
pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
@ -456,8 +482,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
{
struct bpf_prog_info_linear *info_linear;
struct bpf_prog_info_node *info_node;
struct perf_bpil *info_linear;
struct btf *btf = NULL;
u64 arrays;
u32 btf_id;
@ -467,15 +493,15 @@ static void perf_env__add_bpf_info(struct perf_env *env, u32 id)
if (fd < 0)
return;
arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS;
arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS;
arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO;
arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS;
arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS;
arrays |= 1UL << BPF_PROG_INFO_LINE_INFO;
arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO;
arrays = 1UL << PERF_BPIL_JITED_KSYMS;
arrays |= 1UL << PERF_BPIL_JITED_FUNC_LENS;
arrays |= 1UL << PERF_BPIL_FUNC_INFO;
arrays |= 1UL << PERF_BPIL_PROG_TAGS;
arrays |= 1UL << PERF_BPIL_JITED_INSNS;
arrays |= 1UL << PERF_BPIL_LINE_INFO;
arrays |= 1UL << PERF_BPIL_JITED_LINE_INFO;
info_linear = bpf_program__get_prog_info_linear(fd, arrays);
info_linear = get_bpf_prog_info_linear(fd, arrays);
if (IS_ERR_OR_NULL(info_linear)) {
pr_debug("%s: failed to get BPF program info. aborting\n", __func__);
goto out;

View File

@ -29,6 +29,9 @@
#include <internal/xyarray.h>
/* temporarily disable libbpf deprecation warnings */
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)),
const char *fmt, va_list args)
{
@ -421,7 +424,7 @@ preproc_gen_prologue(struct bpf_program *prog, int n,
size_t prologue_cnt = 0;
int i, err;
if (IS_ERR(priv) || !priv || priv->is_tp)
if (IS_ERR_OR_NULL(priv) || priv->is_tp)
goto errout;
pev = &priv->pev;
@ -570,7 +573,7 @@ static int hook_load_preprocessor(struct bpf_program *prog)
bool need_prologue = false;
int err, i;
if (IS_ERR(priv) || !priv) {
if (IS_ERR_OR_NULL(priv)) {
pr_debug("Internal error when hook preprocessor\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}
@ -642,8 +645,11 @@ int bpf__probe(struct bpf_object *obj)
goto out;
priv = bpf_program__priv(prog);
if (IS_ERR(priv) || !priv) {
err = PTR_ERR(priv);
if (IS_ERR_OR_NULL(priv)) {
if (!priv)
err = -BPF_LOADER_ERRNO__INTERNAL;
else
err = PTR_ERR(priv);
goto out;
}
@ -693,7 +699,7 @@ int bpf__unprobe(struct bpf_object *obj)
struct bpf_prog_priv *priv = bpf_program__priv(prog);
int i;
if (IS_ERR(priv) || !priv || priv->is_tp)
if (IS_ERR_OR_NULL(priv) || priv->is_tp)
continue;
for (i = 0; i < priv->pev.ntevs; i++) {
@ -751,7 +757,7 @@ int bpf__foreach_event(struct bpf_object *obj,
struct perf_probe_event *pev;
int i, fd;
if (IS_ERR(priv) || !priv) {
if (IS_ERR_OR_NULL(priv)) {
pr_debug("bpf: failed to get private field\n");
return -BPF_LOADER_ERRNO__INTERNAL;
}

View File

@ -16,7 +16,7 @@ typedef int (*bpf_counter_evsel_op)(struct evsel *evsel);
typedef int (*bpf_counter_evsel_target_op)(struct evsel *evsel,
struct target *target);
typedef int (*bpf_counter_evsel_install_pe_op)(struct evsel *evsel,
int cpu,
int cpu_map_idx,
int fd);
struct bpf_counter_ops {
@ -40,7 +40,7 @@ int bpf_counter__enable(struct evsel *evsel);
int bpf_counter__disable(struct evsel *evsel);
int bpf_counter__read(struct evsel *evsel);
void bpf_counter__destroy(struct evsel *evsel);
int bpf_counter__install_pe(struct evsel *evsel, int cpu, int fd);
int bpf_counter__install_pe(struct evsel *evsel, int cpu_map_idx, int fd);
#else /* HAVE_BPF_SKEL */

View File

@ -48,7 +48,7 @@ static int bperf_load_program(struct evlist *evlist)
struct cgroup *cgrp, *leader_cgrp;
__u32 i, cpu;
__u32 nr_cpus = evlist->core.all_cpus->nr;
int total_cpus = cpu__max_cpu();
int total_cpus = cpu__max_cpu().cpu;
int map_size, map_fd;
int prog_fd, err;
@ -65,14 +65,14 @@ static int bperf_load_program(struct evlist *evlist)
/* we need one copy of events per cpu for reading */
map_size = total_cpus * evlist->core.nr_entries / nr_cgroups;
bpf_map__resize(skel->maps.events, map_size);
bpf_map__resize(skel->maps.cgrp_idx, nr_cgroups);
bpf_map__set_max_entries(skel->maps.events, map_size);
bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups);
/* previous result is saved in a per-cpu array */
map_size = evlist->core.nr_entries / nr_cgroups;
bpf_map__resize(skel->maps.prev_readings, map_size);
bpf_map__set_max_entries(skel->maps.prev_readings, map_size);
/* cgroup result needs all events (per-cpu) */
map_size = evlist->core.nr_entries;
bpf_map__resize(skel->maps.cgrp_readings, map_size);
bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size);
set_max_rlimit();
@ -125,7 +125,7 @@ static int bperf_load_program(struct evlist *evlist)
for (cpu = 0; cpu < nr_cpus; cpu++) {
int fd = FD(evsel, cpu);
__u32 idx = evsel->core.idx * total_cpus +
evlist->core.all_cpus->map[cpu];
evlist->core.all_cpus->map[cpu].cpu;
err = bpf_map_update_elem(map_fd, &idx, &fd,
BPF_ANY);
@ -212,7 +212,7 @@ static int bperf_cgrp__sync_counters(struct evlist *evlist)
int prog_fd = bpf_program__fd(skel->progs.trigger_read);
for (i = 0; i < nr_cpus; i++) {
cpu = evlist->core.all_cpus->map[i];
cpu = evlist->core.all_cpus->map[i].cpu;
bperf_trigger_reading(prog_fd, cpu);
}
@ -245,7 +245,7 @@ static int bperf_cgrp__read(struct evsel *evsel)
{
struct evlist *evlist = evsel->evlist;
int i, cpu, nr_cpus = evlist->core.all_cpus->nr;
int total_cpus = cpu__max_cpu();
int total_cpus = cpu__max_cpu().cpu;
struct perf_counts_values *counts;
struct bpf_perf_event_value *values;
int reading_map_fd, err = 0;
@ -266,13 +266,13 @@ static int bperf_cgrp__read(struct evsel *evsel)
idx = evsel->core.idx;
err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
if (err) {
pr_err("bpf map lookup falied: idx=%u, event=%s, cgrp=%s\n",
pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
idx, evsel__name(evsel), evsel->cgrp->name);
goto out;
}
for (i = 0; i < nr_cpus; i++) {
cpu = evlist->core.all_cpus->map[i];
cpu = evlist->core.all_cpus->map[i].cpu;
counts = perf_counts(evsel->counts, i, 0);
counts->val = values[cpu].counter;

View File

@ -280,6 +280,8 @@ static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
}
#endif
void arch__add_leaf_frame_record_opts(struct record_opts *opts);
char *callchain_list__sym_name(struct callchain_list *cl,
char *bf, size_t bfsize, bool show_dso);
char *callchain_node__scnprintf_value(struct callchain_node *node,
@ -298,7 +300,7 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);
void callchain_param_setup(u64 sample_type);
void callchain_param_setup(u64 sample_type, const char *arch);
bool callchain_cnode_matched(struct callchain_node *base_cnode,
struct callchain_node *pair_cnode);

View File

@ -18,21 +18,21 @@ struct perf_counts {
static inline struct perf_counts_values*
perf_counts(struct perf_counts *counts, int cpu, int thread)
perf_counts(struct perf_counts *counts, int cpu_map_idx, int thread)
{
return xyarray__entry(counts->values, cpu, thread);
return xyarray__entry(counts->values, cpu_map_idx, thread);
}
static inline bool
perf_counts__is_loaded(struct perf_counts *counts, int cpu, int thread)
perf_counts__is_loaded(struct perf_counts *counts, int cpu_map_idx, int thread)
{
return *((bool *) xyarray__entry(counts->loaded, cpu, thread));
return *((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread));
}
static inline void
perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool loaded)
perf_counts__set_loaded(struct perf_counts *counts, int cpu_map_idx, int thread, bool loaded)
{
*((bool *) xyarray__entry(counts->loaded, cpu, thread)) = loaded;
*((bool *) xyarray__entry(counts->loaded, cpu_map_idx, thread)) = loaded;
}
struct perf_counts *perf_counts__new(int ncpus, int nthreads);
@ -40,7 +40,7 @@ void perf_counts__delete(struct perf_counts *counts);
void perf_counts__reset(struct perf_counts *counts);
void evsel__reset_counts(struct evsel *evsel);
int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
int evsel__alloc_counts(struct evsel *evsel);
void evsel__free_counts(struct evsel *evsel);
#endif /* __PERF_COUNTS_H */

View File

@ -13,9 +13,13 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
static int max_cpu_num;
static int max_present_cpu_num;
static struct perf_cpu max_cpu_num;
static struct perf_cpu max_present_cpu_num;
static int max_node_num;
/**
* The numa node X as read from /sys/devices/system/node/nodeX indexed by the
* CPU number.
*/
static int *cpunode_map;
static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
@ -33,9 +37,9 @@ static struct perf_cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
* otherwise it would become 65535.
*/
if (cpus->cpu[i] == (u16) -1)
map->map[i] = -1;
map->map[i].cpu = -1;
else
map->map[i] = (int) cpus->cpu[i];
map->map[i].cpu = (int) cpus->cpu[i];
}
}
@ -54,7 +58,7 @@ static struct perf_cpu_map *cpu_map__from_mask(struct perf_record_record_cpu_map
int cpu, i = 0;
for_each_set_bit(cpu, mask->mask, nbits)
map->map[i++] = cpu;
map->map[i++].cpu = cpu;
}
return map;
@ -87,7 +91,7 @@ struct perf_cpu_map *perf_cpu_map__empty_new(int nr)
cpus->nr = nr;
for (i = 0; i < nr; i++)
cpus->map[i] = -1;
cpus->map[i].cpu = -1;
refcount_set(&cpus->refcnt, 1);
}
@ -104,7 +108,7 @@ struct cpu_aggr_map *cpu_aggr_map__empty_new(int nr)
cpus->nr = nr;
for (i = 0; i < nr; i++)
cpus->map[i] = cpu_map__empty_aggr_cpu_id();
cpus->map[i] = aggr_cpu_id__empty();
refcount_set(&cpus->refcnt, 1);
}
@ -122,28 +126,21 @@ static int cpu__get_topology_int(int cpu, const char *name, int *value)
return sysfs__read_int(path, value);
}
int cpu_map__get_socket_id(int cpu)
int cpu__get_socket_id(struct perf_cpu cpu)
{
int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value);
int value, ret = cpu__get_topology_int(cpu.cpu, "physical_package_id", &value);
return ret ?: value;
}
struct aggr_cpu_id cpu_map__get_socket(struct perf_cpu_map *map, int idx,
void *data __maybe_unused)
struct aggr_cpu_id aggr_cpu_id__socket(struct perf_cpu cpu, void *data __maybe_unused)
{
int cpu;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id = aggr_cpu_id__empty();
if (idx > map->nr)
return id;
cpu = map->map[idx];
id.socket = cpu_map__get_socket_id(cpu);
id.socket = cpu__get_socket_id(cpu);
return id;
}
static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
{
struct aggr_cpu_id *a = (struct aggr_cpu_id *)a_pointer;
struct aggr_cpu_id *b = (struct aggr_cpu_id *)b_pointer;
@ -160,57 +157,64 @@ static int cmp_aggr_cpu_id(const void *a_pointer, const void *b_pointer)
return a->thread - b->thread;
}
int cpu_map__build_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **res,
struct aggr_cpu_id (*f)(struct perf_cpu_map *map, int cpu, void *data),
void *data)
struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
aggr_cpu_id_get_t get_id,
void *data)
{
int nr = cpus->nr;
struct cpu_aggr_map *c = cpu_aggr_map__empty_new(nr);
int cpu, s2;
struct aggr_cpu_id s1;
int idx;
struct perf_cpu cpu;
struct cpu_aggr_map *c = cpu_aggr_map__empty_new(cpus->nr);
if (!c)
return -1;
return NULL;
/* Reset size as it may only be partially filled */
c->nr = 0;
for (cpu = 0; cpu < nr; cpu++) {
s1 = f(cpus, cpu, data);
for (s2 = 0; s2 < c->nr; s2++) {
if (cpu_map__compare_aggr_cpu_id(s1, c->map[s2]))
perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
bool duplicate = false;
struct aggr_cpu_id cpu_id = get_id(cpu, data);
for (int j = 0; j < c->nr; j++) {
if (aggr_cpu_id__equal(&cpu_id, &c->map[j])) {
duplicate = true;
break;
}
}
if (s2 == c->nr) {
c->map[c->nr] = s1;
if (!duplicate) {
c->map[c->nr] = cpu_id;
c->nr++;
}
}
/* ensure we process id in increasing order */
qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), cmp_aggr_cpu_id);
/* Trim. */
if (c->nr != cpus->nr) {
struct cpu_aggr_map *trimmed_c =
realloc(c,
sizeof(struct cpu_aggr_map) + sizeof(struct aggr_cpu_id) * c->nr);
if (trimmed_c)
c = trimmed_c;
}
/* ensure we process id in increasing order */
qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
return c;
*res = c;
return 0;
}
int cpu_map__get_die_id(int cpu)
int cpu__get_die_id(struct perf_cpu cpu)
{
int value, ret = cpu__get_topology_int(cpu, "die_id", &value);
int value, ret = cpu__get_topology_int(cpu.cpu, "die_id", &value);
return ret ?: value;
}
struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *data)
struct aggr_cpu_id aggr_cpu_id__die(struct perf_cpu cpu, void *data)
{
int cpu, die;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id;
int die;
if (idx > map->nr)
return id;
cpu = map->map[idx];
die = cpu_map__get_die_id(cpu);
die = cpu__get_die_id(cpu);
/* There is no die_id on legacy system. */
if (die == -1)
die = 0;
@ -220,79 +224,59 @@ struct aggr_cpu_id cpu_map__get_die(struct perf_cpu_map *map, int idx, void *dat
* with the socket ID and then add die to
* make a unique ID.
*/
id = cpu_map__get_socket(map, idx, data);
if (cpu_map__aggr_cpu_id_is_empty(id))
id = aggr_cpu_id__socket(cpu, data);
if (aggr_cpu_id__is_empty(&id))
return id;
id.die = die;
return id;
}
int cpu_map__get_core_id(int cpu)
int cpu__get_core_id(struct perf_cpu cpu)
{
int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
int value, ret = cpu__get_topology_int(cpu.cpu, "core_id", &value);
return ret ?: value;
}
int cpu_map__get_node_id(int cpu)
struct aggr_cpu_id aggr_cpu_id__core(struct perf_cpu cpu, void *data)
{
return cpu__get_node(cpu);
}
struct aggr_cpu_id id;
int core = cpu__get_core_id(cpu);
struct aggr_cpu_id cpu_map__get_core(struct perf_cpu_map *map, int idx, void *data)
{
int cpu;
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
if (idx > map->nr)
return id;
cpu = map->map[idx];
cpu = cpu_map__get_core_id(cpu);
/* cpu_map__get_die returns a struct with socket and die set*/
id = cpu_map__get_die(map, idx, data);
if (cpu_map__aggr_cpu_id_is_empty(id))
/* aggr_cpu_id__die returns a struct with socket and die set. */
id = aggr_cpu_id__die(cpu, data);
if (aggr_cpu_id__is_empty(&id))
return id;
/*
* core_id is relative to socket and die, we need a global id.
* So we combine the result from cpu_map__get_die with the core id
*/
id.core = cpu;
id.core = core;
return id;
}
struct aggr_cpu_id cpu_map__get_node(struct perf_cpu_map *map, int idx, void *data __maybe_unused)
struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data)
{
struct aggr_cpu_id id = cpu_map__empty_aggr_cpu_id();
struct aggr_cpu_id id;
if (idx < 0 || idx >= map->nr)
/* aggr_cpu_id__core returns a struct with socket, die and core set. */
id = aggr_cpu_id__core(cpu, data);
if (aggr_cpu_id__is_empty(&id))
return id;
id.node = cpu_map__get_node_id(map->map[idx]);
id.cpu = cpu;
return id;
}
int cpu_map__build_socket_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **sockp)
struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unused)
{
return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
}
struct aggr_cpu_id id = aggr_cpu_id__empty();
int cpu_map__build_die_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **diep)
{
return cpu_map__build_map(cpus, diep, cpu_map__get_die, NULL);
}
int cpu_map__build_core_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **corep)
{
return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
}
int cpu_map__build_node_map(struct perf_cpu_map *cpus, struct cpu_aggr_map **numap)
{
return cpu_map__build_map(cpus, numap, cpu_map__get_node, NULL);
id.node = cpu__get_node(cpu);
return id;
}
/* setup simple routines to easily access node numbers given a cpu number */
@ -335,8 +319,8 @@ static void set_max_cpu_num(void)
int ret = -1;
/* set up default */
max_cpu_num = 4096;
max_present_cpu_num = 4096;
max_cpu_num.cpu = 4096;
max_present_cpu_num.cpu = 4096;
mnt = sysfs__mountpoint();
if (!mnt)
@ -349,7 +333,7 @@ static void set_max_cpu_num(void)
goto out;
}
ret = get_max_num(path, &max_cpu_num);
ret = get_max_num(path, &max_cpu_num.cpu);
if (ret)
goto out;
@ -360,11 +344,11 @@ static void set_max_cpu_num(void)
goto out;
}
ret = get_max_num(path, &max_present_cpu_num);
ret = get_max_num(path, &max_present_cpu_num.cpu);
out:
if (ret)
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num);
pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num.cpu);
}
/* Determine highest possible node in the system for sparse allocation */
@ -403,31 +387,31 @@ int cpu__max_node(void)
return max_node_num;
}
int cpu__max_cpu(void)
struct perf_cpu cpu__max_cpu(void)
{
if (unlikely(!max_cpu_num))
if (unlikely(!max_cpu_num.cpu))
set_max_cpu_num();
return max_cpu_num;
}
int cpu__max_present_cpu(void)
struct perf_cpu cpu__max_present_cpu(void)
{
if (unlikely(!max_present_cpu_num))
if (unlikely(!max_present_cpu_num.cpu))
set_max_cpu_num();
return max_present_cpu_num;
}
int cpu__get_node(int cpu)
int cpu__get_node(struct perf_cpu cpu)
{
if (unlikely(cpunode_map == NULL)) {
pr_debug("cpu_map not initialized\n");
return -1;
}
return cpunode_map[cpu];
return cpunode_map[cpu.cpu];
}
static int init_cpunode_map(void)
@ -437,13 +421,13 @@ static int init_cpunode_map(void)
set_max_cpu_num();
set_max_node_num();
cpunode_map = calloc(max_cpu_num, sizeof(int));
cpunode_map = calloc(max_cpu_num.cpu, sizeof(int));
if (!cpunode_map) {
pr_err("%s: calloc failed\n", __func__);
return -1;
}
for (i = 0; i < max_cpu_num; i++)
for (i = 0; i < max_cpu_num.cpu; i++)
cpunode_map[i] = -1;
return 0;
@ -502,47 +486,39 @@ int cpu__setup_cpunode_map(void)
return 0;
}
bool cpu_map__has(struct perf_cpu_map *cpus, int cpu)
{
return perf_cpu_map__idx(cpus, cpu) != -1;
}
int cpu_map__cpu(struct perf_cpu_map *cpus, int idx)
{
return cpus->map[idx];
}
size_t cpu_map__snprint(struct perf_cpu_map *map, char *buf, size_t size)
{
int i, cpu, start = -1;
int i, start = -1;
bool first = true;
size_t ret = 0;
#define COMMA first ? "" : ","
for (i = 0; i < map->nr + 1; i++) {
struct perf_cpu cpu = { .cpu = INT_MAX };
bool last = i == map->nr;
cpu = last ? INT_MAX : map->map[i];
if (!last)
cpu = map->map[i];
if (start == -1) {
start = i;
if (last) {
ret += snprintf(buf + ret, size - ret,
"%s%d", COMMA,
map->map[i]);
map->map[i].cpu);
}
} else if (((i - start) != (cpu - map->map[start])) || last) {
} else if (((i - start) != (cpu.cpu - map->map[start].cpu)) || last) {
int end = i - 1;
if (start == end) {
ret += snprintf(buf + ret, size - ret,
"%s%d", COMMA,
map->map[start]);
map->map[start].cpu);
} else {
ret += snprintf(buf + ret, size - ret,
"%s%d-%d", COMMA,
map->map[start], map->map[end]);
map->map[start].cpu, map->map[end].cpu);
}
first = false;
start = i;
@ -569,23 +545,23 @@ size_t cpu_map__snprint_mask(struct perf_cpu_map *map, char *buf, size_t size)
int i, cpu;
char *ptr = buf;
unsigned char *bitmap;
int last_cpu = cpu_map__cpu(map, map->nr - 1);
struct perf_cpu last_cpu = perf_cpu_map__cpu(map, map->nr - 1);
if (buf == NULL)
return 0;
bitmap = zalloc(last_cpu / 8 + 1);
bitmap = zalloc(last_cpu.cpu / 8 + 1);
if (bitmap == NULL) {
buf[0] = '\0';
return 0;
}
for (i = 0; i < map->nr; i++) {
cpu = cpu_map__cpu(map, i);
cpu = perf_cpu_map__cpu(map, i).cpu;
bitmap[cpu / 8] |= 1 << (cpu % 8);
}
for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) {
for (cpu = last_cpu.cpu / 4 * 4; cpu >= 0; cpu -= 4) {
unsigned char bits = bitmap[cpu / 8];
if (cpu % 8)
@ -614,32 +590,35 @@ const struct perf_cpu_map *cpu_map__online(void) /* thread unsafe */
return online;
}
bool cpu_map__compare_aggr_cpu_id(struct aggr_cpu_id a, struct aggr_cpu_id b)
bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b)
{
return a.thread == b.thread &&
a.node == b.node &&
a.socket == b.socket &&
a.die == b.die &&
a.core == b.core;
return a->thread == b->thread &&
a->node == b->node &&
a->socket == b->socket &&
a->die == b->die &&
a->core == b->core &&
a->cpu.cpu == b->cpu.cpu;
}
bool cpu_map__aggr_cpu_id_is_empty(struct aggr_cpu_id a)
bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a)
{
return a.thread == -1 &&
a.node == -1 &&
a.socket == -1 &&
a.die == -1 &&
a.core == -1;
return a->thread == -1 &&
a->node == -1 &&
a->socket == -1 &&
a->die == -1 &&
a->core == -1 &&
a->cpu.cpu == -1;
}
struct aggr_cpu_id cpu_map__empty_aggr_cpu_id(void)
struct aggr_cpu_id aggr_cpu_id__empty(void)
{
struct aggr_cpu_id ret = {
.thread = -1,
.node = -1,
.socket = -1,
.die = -1,
.core = -1
.core = -1,
.cpu = (struct perf_cpu){ .cpu = -1 },
};
return ret;
}

View File

@ -14,14 +14,16 @@
#include "env.h"
#include "pmu-hybrid.h"
#define CORE_SIB_FMT \
#define PACKAGE_CPUS_FMT \
"%s/devices/system/cpu/cpu%d/topology/package_cpus_list"
#define PACKAGE_CPUS_FMT_OLD \
"%s/devices/system/cpu/cpu%d/topology/core_siblings_list"
#define DIE_SIB_FMT \
#define DIE_CPUS_FMT \
"%s/devices/system/cpu/cpu%d/topology/die_cpus_list"
#define THRD_SIB_FMT \
"%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
#define THRD_SIB_FMT_NEW \
#define CORE_CPUS_FMT \
"%s/devices/system/cpu/cpu%d/topology/core_cpus_list"
#define CORE_CPUS_FMT_OLD \
"%s/devices/system/cpu/cpu%d/topology/thread_siblings_list"
#define NODE_ONLINE_FMT \
"%s/devices/system/node/online"
#define NODE_MEMINFO_FMT \
@ -39,8 +41,12 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
u32 i = 0;
int ret = -1;
scnprintf(filename, MAXPATHLEN, CORE_SIB_FMT,
scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT,
sysfs__mountpoint(), cpu);
if (access(filename, F_OK) == -1) {
scnprintf(filename, MAXPATHLEN, PACKAGE_CPUS_FMT_OLD,
sysfs__mountpoint(), cpu);
}
fp = fopen(filename, "r");
if (!fp)
goto try_dies;
@ -54,23 +60,23 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
if (p)
*p = '\0';
for (i = 0; i < tp->core_sib; i++) {
if (!strcmp(buf, tp->core_siblings[i]))
for (i = 0; i < tp->package_cpus_lists; i++) {
if (!strcmp(buf, tp->package_cpus_list[i]))
break;
}
if (i == tp->core_sib) {
tp->core_siblings[i] = buf;
tp->core_sib++;
if (i == tp->package_cpus_lists) {
tp->package_cpus_list[i] = buf;
tp->package_cpus_lists++;
buf = NULL;
len = 0;
}
ret = 0;
try_dies:
if (!tp->die_siblings)
if (!tp->die_cpus_list)
goto try_threads;
scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
sysfs__mountpoint(), cpu);
fp = fopen(filename, "r");
if (!fp)
@ -85,23 +91,23 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
if (p)
*p = '\0';
for (i = 0; i < tp->die_sib; i++) {
if (!strcmp(buf, tp->die_siblings[i]))
for (i = 0; i < tp->die_cpus_lists; i++) {
if (!strcmp(buf, tp->die_cpus_list[i]))
break;
}
if (i == tp->die_sib) {
tp->die_siblings[i] = buf;
tp->die_sib++;
if (i == tp->die_cpus_lists) {
tp->die_cpus_list[i] = buf;
tp->die_cpus_lists++;
buf = NULL;
len = 0;
}
ret = 0;
try_threads:
scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT_NEW,
scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT,
sysfs__mountpoint(), cpu);
if (access(filename, F_OK) == -1) {
scnprintf(filename, MAXPATHLEN, THRD_SIB_FMT,
scnprintf(filename, MAXPATHLEN, CORE_CPUS_FMT_OLD,
sysfs__mountpoint(), cpu);
}
fp = fopen(filename, "r");
@ -115,13 +121,13 @@ static int build_cpu_topology(struct cpu_topology *tp, int cpu)
if (p)
*p = '\0';
for (i = 0; i < tp->thread_sib; i++) {
if (!strcmp(buf, tp->thread_siblings[i]))
for (i = 0; i < tp->core_cpus_lists; i++) {
if (!strcmp(buf, tp->core_cpus_list[i]))
break;
}
if (i == tp->thread_sib) {
tp->thread_siblings[i] = buf;
tp->thread_sib++;
if (i == tp->core_cpus_lists) {
tp->core_cpus_list[i] = buf;
tp->core_cpus_lists++;
buf = NULL;
}
ret = 0;
@ -139,16 +145,14 @@ void cpu_topology__delete(struct cpu_topology *tp)
if (!tp)
return;
for (i = 0 ; i < tp->core_sib; i++)
zfree(&tp->core_siblings[i]);
for (i = 0 ; i < tp->package_cpus_lists; i++)
zfree(&tp->package_cpus_list[i]);
if (tp->die_sib) {
for (i = 0 ; i < tp->die_sib; i++)
zfree(&tp->die_siblings[i]);
}
for (i = 0 ; i < tp->die_cpus_lists; i++)
zfree(&tp->die_cpus_list[i]);
for (i = 0 ; i < tp->thread_sib; i++)
zfree(&tp->thread_siblings[i]);
for (i = 0 ; i < tp->core_cpus_lists; i++)
zfree(&tp->core_cpus_list[i]);
free(tp);
}
@ -161,10 +165,11 @@ static bool has_die_topology(void)
if (uname(&uts) < 0)
return false;
if (strncmp(uts.machine, "x86_64", 6))
if (strncmp(uts.machine, "x86_64", 6) &&
strncmp(uts.machine, "s390x", 5))
return false;
scnprintf(filename, MAXPATHLEN, DIE_SIB_FMT,
scnprintf(filename, MAXPATHLEN, DIE_CPUS_FMT,
sysfs__mountpoint(), 0);
if (access(filename, F_OK) == -1)
return false;
@ -183,7 +188,7 @@ struct cpu_topology *cpu_topology__new(void)
struct perf_cpu_map *map;
bool has_die = has_die_topology();
ncpus = cpu__max_present_cpu();
ncpus = cpu__max_present_cpu().cpu;
/* build online CPU map */
map = perf_cpu_map__new(NULL);
@ -205,16 +210,16 @@ struct cpu_topology *cpu_topology__new(void)
tp = addr;
addr += sizeof(*tp);
tp->core_siblings = addr;
tp->package_cpus_list = addr;
addr += sz;
if (has_die) {
tp->die_siblings = addr;
tp->die_cpus_list = addr;
addr += sz;
}
tp->thread_siblings = addr;
tp->core_cpus_list = addr;
for (i = 0; i < nr; i++) {
if (!cpu_map__has(map, i))
if (!perf_cpu_map__has(map, (struct perf_cpu){ .cpu = i }))
continue;
ret = build_cpu_topology(tp, i);
@ -320,7 +325,7 @@ struct numa_topology *numa_topology__new(void)
if (!node_map)
goto out;
nr = (u32) node_map->nr;
nr = (u32) perf_cpu_map__nr(node_map);
tp = zalloc(sizeof(*tp) + sizeof(tp->nodes[0])*nr);
if (!tp)
@ -329,7 +334,7 @@ struct numa_topology *numa_topology__new(void)
tp->nr = nr;
for (i = 0; i < nr; i++) {
if (load_numa_node(&tp->nodes[i], node_map->map[i])) {
if (load_numa_node(&tp->nodes[i], perf_cpu_map__cpu(node_map, i).cpu)) {
numa_topology__delete(tp);
tp = NULL;
break;

View File

@ -5,12 +5,33 @@
#include <linux/types.h>
struct cpu_topology {
u32 core_sib;
u32 die_sib;
u32 thread_sib;
char **core_siblings;
char **die_siblings;
char **thread_siblings;
/* The number of unique package_cpus_lists below. */
u32 package_cpus_lists;
/* The number of unique die_cpu_lists below. */
u32 die_cpus_lists;
/* The number of unique core_cpu_lists below. */
u32 core_cpus_lists;
/*
* An array of strings where each string is unique and read from
* /sys/devices/system/cpu/cpuX/topology/package_cpus_list. From the ABI
* each of these is a human-readable list of CPUs sharing the same
* physical_package_id. The format is like 0-3, 8-11, 14,17.
*/
const char **package_cpus_list;
/*
* An array of string where each string is unique and from
* /sys/devices/system/cpu/cpuX/topology/die_cpus_list. From the ABI
* each of these is a human-readable list of CPUs within the same die.
* The format is like 0-3, 8-11, 14,17.
*/
const char **die_cpus_list;
/*
* An array of string where each string is unique and from
* /sys/devices/system/cpu/cpuX/topology/core_cpus_list. From the ABI
* each of these is a human-readable list of CPUs within the same
* core. The format is like 0-3, 8-11, 14,17.
*/
const char **core_cpus_list;
};
struct numa_topology_node {

View File

@ -318,6 +318,8 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
offset = tmp_val;
len = offset >> 16;
offset &= 0xffff;
if (flags & TEP_FIELD_IS_RELATIVE)
offset += fmtf->offset + fmtf->size;
}
if (flags & TEP_FIELD_IS_ARRAY) {
@ -1437,7 +1439,7 @@ static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
goto err;
#if __BYTE_ORDER == __BIG_ENDIAN
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
#else
bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);

View File

@ -14,6 +14,7 @@
#ifdef HAVE_LIBBPF_SUPPORT
#include <bpf/libbpf.h>
#include "bpf-event.h"
#include "bpf-utils.h"
#endif
#include "compress.h"
#include "env.h"

View File

@ -193,7 +193,7 @@ struct dso {
int fd;
int status;
u32 status_seen;
size_t file_size;
u64 file_size;
struct list_head open_entry;
u64 debug_frame_offset;
u64 eh_frame_hdr_offset;

View File

@ -4,6 +4,7 @@
#include <linux/types.h>
#include <linux/rbtree.h>
#include "cpumap.h"
#include "rwsem.h"
struct perf_cpu_map;
@ -170,5 +171,5 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env,
bool perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node);
struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id);
int perf_env__numa_node(struct perf_env *env, int cpu);
int perf_env__numa_node(struct perf_env *env, struct perf_cpu cpu);
#endif /* __PERF_ENV_H */

View File

@ -342,36 +342,71 @@ static int evlist__nr_threads(struct evlist *evlist, struct evsel *evsel)
return perf_thread_map__nr(evlist->core.threads);
}
void evlist__cpu_iter_start(struct evlist *evlist)
struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity)
{
struct evsel *pos;
struct evlist_cpu_iterator itr = {
.container = evlist,
.evsel = NULL,
.cpu_map_idx = 0,
.evlist_cpu_map_idx = 0,
.evlist_cpu_map_nr = perf_cpu_map__nr(evlist->core.all_cpus),
.cpu = (struct perf_cpu){ .cpu = -1},
.affinity = affinity,
};
/*
* Reset the per evsel cpu_iter. This is needed because
* each evsel's cpumap may have a different index space,
* and some operations need the index to modify
* the FD xyarray (e.g. open, close)
*/
evlist__for_each_entry(evlist, pos)
pos->cpu_iter = 0;
}
bool evsel__cpu_iter_skip_no_inc(struct evsel *ev, int cpu)
{
if (ev->cpu_iter >= ev->core.cpus->nr)
return true;
if (cpu >= 0 && ev->core.cpus->map[ev->cpu_iter] != cpu)
return true;
return false;
}
bool evsel__cpu_iter_skip(struct evsel *ev, int cpu)
{
if (!evsel__cpu_iter_skip_no_inc(ev, cpu)) {
ev->cpu_iter++;
return false;
if (evlist__empty(evlist)) {
/* Ensure the empty list doesn't iterate. */
itr.evlist_cpu_map_idx = itr.evlist_cpu_map_nr;
} else {
itr.evsel = evlist__first(evlist);
if (itr.affinity) {
itr.cpu = perf_cpu_map__cpu(evlist->core.all_cpus, 0);
affinity__set(itr.affinity, itr.cpu.cpu);
itr.cpu_map_idx = perf_cpu_map__idx(itr.evsel->core.cpus, itr.cpu);
/*
* If this CPU isn't in the evsel's cpu map then advance
* through the list.
*/
if (itr.cpu_map_idx == -1)
evlist_cpu_iterator__next(&itr);
}
}
return true;
return itr;
}
void evlist_cpu_iterator__next(struct evlist_cpu_iterator *evlist_cpu_itr)
{
while (evlist_cpu_itr->evsel != evlist__last(evlist_cpu_itr->container)) {
evlist_cpu_itr->evsel = evsel__next(evlist_cpu_itr->evsel);
evlist_cpu_itr->cpu_map_idx =
perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
evlist_cpu_itr->cpu);
if (evlist_cpu_itr->cpu_map_idx != -1)
return;
}
evlist_cpu_itr->evlist_cpu_map_idx++;
if (evlist_cpu_itr->evlist_cpu_map_idx < evlist_cpu_itr->evlist_cpu_map_nr) {
evlist_cpu_itr->evsel = evlist__first(evlist_cpu_itr->container);
evlist_cpu_itr->cpu =
perf_cpu_map__cpu(evlist_cpu_itr->container->core.all_cpus,
evlist_cpu_itr->evlist_cpu_map_idx);
if (evlist_cpu_itr->affinity)
affinity__set(evlist_cpu_itr->affinity, evlist_cpu_itr->cpu.cpu);
evlist_cpu_itr->cpu_map_idx =
perf_cpu_map__idx(evlist_cpu_itr->evsel->core.cpus,
evlist_cpu_itr->cpu);
/*
* If this CPU isn't in the evsel's cpu map then advance through
* the list.
*/
if (evlist_cpu_itr->cpu_map_idx == -1)
evlist_cpu_iterator__next(evlist_cpu_itr);
}
}
bool evlist_cpu_iterator__end(const struct evlist_cpu_iterator *evlist_cpu_itr)
{
return evlist_cpu_itr->evlist_cpu_map_idx >= evlist_cpu_itr->evlist_cpu_map_nr;
}
static int evsel__strcmp(struct evsel *pos, char *evsel_name)
@ -400,37 +435,36 @@ static int evlist__is_enabled(struct evlist *evlist)
static void __evlist__disable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
struct affinity affinity;
int cpu, i, imm = 0;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity = NULL;
bool has_imm = false;
if (affinity__setup(&affinity) < 0)
return;
// See explanation in evlist__close()
if (!cpu_map__is_dummy(evlist->core.cpus)) {
if (affinity__setup(&saved_affinity) < 0)
return;
affinity = &saved_affinity;
}
/* Disable 'immediate' events last */
for (imm = 0; imm <= 1; imm++) {
evlist__for_each_cpu(evlist, i, cpu) {
affinity__set(&affinity, cpu);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
if (evsel__cpu_iter_skip(pos, cpu))
continue;
if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
continue;
if (pos->immediate)
has_imm = true;
if (pos->immediate != imm)
continue;
evsel__disable_cpu(pos, pos->cpu_iter - 1);
}
for (int imm = 0; imm <= 1; imm++) {
evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
pos = evlist_cpu_itr.evsel;
if (evsel__strcmp(pos, evsel_name))
continue;
if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
continue;
if (pos->immediate)
has_imm = true;
if (pos->immediate != imm)
continue;
evsel__disable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
if (!has_imm)
break;
}
affinity__cleanup(&affinity);
affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@ -462,26 +496,25 @@ void evlist__disable_evsel(struct evlist *evlist, char *evsel_name)
static void __evlist__enable(struct evlist *evlist, char *evsel_name)
{
struct evsel *pos;
struct affinity affinity;
int cpu, i;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity = NULL;
if (affinity__setup(&affinity) < 0)
return;
evlist__for_each_cpu(evlist, i, cpu) {
affinity__set(&affinity, cpu);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
if (evsel__cpu_iter_skip(pos, cpu))
continue;
if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
evsel__enable_cpu(pos, pos->cpu_iter - 1);
}
// See explanation in evlist__close()
if (!cpu_map__is_dummy(evlist->core.cpus)) {
if (affinity__setup(&saved_affinity) < 0)
return;
affinity = &saved_affinity;
}
affinity__cleanup(&affinity);
evlist__for_each_cpu(evlist_cpu_itr, evlist, affinity) {
pos = evlist_cpu_itr.evsel;
if (evsel__strcmp(pos, evsel_name))
continue;
if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
evsel__enable_cpu(pos, evlist_cpu_itr.cpu_map_idx);
}
affinity__cleanup(affinity);
evlist__for_each_entry(evlist, pos) {
if (evsel__strcmp(pos, evsel_name))
continue;
@ -800,7 +833,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *_evlist, bool overwrite, int idx)
static int
perf_evlist__mmap_cb_mmap(struct perf_mmap *_map, struct perf_mmap_param *_mp,
int output, int cpu)
int output, struct perf_cpu cpu)
{
struct mmap *map = container_of(_map, struct mmap, core);
struct mmap_params *mp = container_of(_mp, struct mmap_params, core);
@ -1264,14 +1297,14 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel)
void evlist__close(struct evlist *evlist)
{
struct evsel *evsel;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity affinity;
int cpu, i;
/*
* With perf record core.cpus is usually NULL.
* Use the old method to handle this for now.
*/
if (!evlist->core.cpus) {
if (!evlist->core.cpus || cpu_map__is_dummy(evlist->core.cpus)) {
evlist__for_each_entry_reverse(evlist, evsel)
evsel__close(evsel);
return;
@ -1279,15 +1312,12 @@ void evlist__close(struct evlist *evlist)
if (affinity__setup(&affinity) < 0)
return;
evlist__for_each_cpu(evlist, i, cpu) {
affinity__set(&affinity, cpu);
evlist__for_each_entry_reverse(evlist, evsel) {
if (evsel__cpu_iter_skip(evsel, cpu))
continue;
perf_evsel__close_cpu(&evsel->core, evsel->cpu_iter - 1);
}
evlist__for_each_cpu(evlist_cpu_itr, evlist, &affinity) {
perf_evsel__close_cpu(&evlist_cpu_itr.evsel->core,
evlist_cpu_itr.cpu_map_idx);
}
affinity__cleanup(&affinity);
evlist__for_each_entry_reverse(evlist, evsel) {
perf_evsel__free_fd(&evsel->core);

View File

@ -11,6 +11,7 @@
#include <perf/evsel.h>
#include "symbol_conf.h"
#include <internal/cpumap.h>
#include <perf/cpumap.h>
struct bpf_object;
struct cgroup;
@ -22,6 +23,7 @@ struct target;
struct hashmap;
struct bperf_leader_bpf;
struct bperf_follower_bpf;
struct perf_pmu;
typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
@ -68,6 +70,7 @@ struct evsel {
double scale;
const char *unit;
struct cgroup *cgrp;
const char *metric_id;
enum perf_tool_event tool_event;
/* parse modifier helper */
int exclude_GH;
@ -119,7 +122,6 @@ struct evsel {
bool errored;
struct hashmap *per_pkg_mask;
int err;
int cpu_iter;
struct {
evsel__sb_cb_t *cb;
void *data;
@ -152,6 +154,9 @@ struct evsel {
};
unsigned long open_flags;
int precise_ip_original;
/* for missing_features */
struct perf_pmu *pmu;
};
struct perf_missing_features {
@ -187,12 +192,9 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
static inline int evsel__nr_cpus(struct evsel *evsel)
{
return evsel__cpus(evsel)->nr;
return perf_cpu_map__nr(evsel__cpus(evsel));
}
void perf_counts_values__scale(struct perf_counts_values *count,
bool scale, s8 *pscaled);
void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
struct perf_counts_values *count);
@ -261,6 +263,7 @@ bool evsel__match_bpf_counter_events(const char *name);
int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
const char *evsel__name(struct evsel *evsel);
const char *evsel__metric_id(const struct evsel *evsel);
const char *evsel__group_name(struct evsel *evsel);
int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
@ -277,16 +280,17 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma
void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
void arch_evsel__set_sample_weight(struct evsel *evsel);
void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
int evsel__set_filter(struct evsel *evsel, const char *filter);
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
int evsel__enable_cpu(struct evsel *evsel, int cpu);
int evsel__enable_cpu(struct evsel *evsel, int cpu_map_idx);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
int evsel__disable_cpu(struct evsel *evsel, int cpu);
int evsel__disable_cpu(struct evsel *evsel, int cpu_map_idx);
int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu);
int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu_map_idx);
int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
@ -298,10 +302,6 @@ bool evsel__detect_missing_features(struct evsel *evsel);
enum rlimit_action { NO_CHANGE, SET_TO_MAX, INCREASED_MAX };
bool evsel__increase_rlimit(enum rlimit_action *set_rlimit);
bool evsel__ignore_missing_thread(struct evsel *evsel,
int nr_cpus, int cpu,
struct perf_thread_map *threads,
int thread, int err);
bool evsel__precise_ip_fallback(struct evsel *evsel);
struct perf_sample;
@ -330,32 +330,32 @@ static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
(e1->core.attr.config == e2->core.attr.config);
}
int evsel__read_counter(struct evsel *evsel, int cpu, int thread);
int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread);
int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale);
int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale);
/**
* evsel__read_on_cpu - Read out the results on a CPU and thread
*
* @evsel - event selector to read value
* @cpu - CPU of interest
* @cpu_map_idx - CPU of interest
* @thread - thread of interest
*/
static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread)
static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread)
{
return __evsel__read_on_cpu(evsel, cpu, thread, false);
return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, false);
}
/**
* evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
*
* @evsel - event selector to read value
* @cpu - CPU of interest
* @cpu_map_idx - CPU of interest
* @thread - thread of interest
*/
static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread)
static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx, int thread)
{
return __evsel__read_on_cpu(evsel, cpu, thread, true);
return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true);
}
int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
@ -482,4 +482,18 @@ struct evsel *evsel__leader(struct evsel *evsel);
bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
bool evsel__is_leader(struct evsel *evsel);
void evsel__set_leader(struct evsel *evsel, struct evsel *leader);
int evsel__source_count(const struct evsel *evsel);
/*
* Macro to swap the bit-field postition and size.
* Used when,
* - dont need to swap the entire u64 &&
* - when u64 has variable bit-field sizes &&
* - when presented in a host endian which is different
* than the source endian of the perf.data file
*/
#define bitfield_swap(src, pos, size) \
((((src) >> (pos)) & ((1ull << (size)) - 1)) << (63 - ((pos) + (size) - 1)))
u64 evsel__bitfield_swap_branch_flags(u64 value);
#endif /* __PERF_EVSEL_H */

View File

@ -5,13 +5,18 @@
#include <stdlib.h>
#include <string.h>
#include "metricgroup.h"
#include "cpumap.h"
#include "cputopo.h"
#include "debug.h"
#include "expr.h"
#include "expr-bison.h"
#include "expr-flex.h"
#include "smt.h"
#include <linux/err.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
#include <ctype.h>
#include <math.h>
#ifdef PARSER_DEBUG
extern int expr_debug;
@ -19,13 +24,15 @@ extern int expr_debug;
struct expr_id_data {
union {
double val;
struct {
double val;
int source_count;
} val;
struct {
double val;
const char *metric_name;
const char *metric_expr;
} ref;
struct expr_id *parent;
};
enum {
@ -35,8 +42,6 @@ struct expr_id_data {
EXPR_ID_DATA__REF,
/* A reference but the value has been computed. */
EXPR_ID_DATA__REF_VALUE,
/* A parent is remembered for the recursion check. */
EXPR_ID_DATA__PARENT,
} kind;
};
@ -59,21 +64,39 @@ static bool key_equal(const void *key1, const void *key2,
return !strcmp((const char *)key1, (const char *)key2);
}
/* Caller must make sure id is allocated */
int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
struct hashmap *ids__new(void)
{
struct hashmap *hash;
hash = hashmap__new(key_hash, key_equal, NULL);
if (IS_ERR(hash))
return NULL;
return hash;
}
void ids__free(struct hashmap *ids)
{
struct hashmap_entry *cur;
size_t bkt;
if (ids == NULL)
return;
hashmap__for_each_entry(ids, cur, bkt) {
free((char *)cur->key);
free(cur->value);
}
hashmap__free(ids);
}
int ids__insert(struct hashmap *ids, const char *id)
{
struct expr_id_data *data_ptr = NULL, *old_data = NULL;
char *old_key = NULL;
int ret;
data_ptr = malloc(sizeof(*data_ptr));
if (!data_ptr)
return -ENOMEM;
data_ptr->parent = ctx->parent;
data_ptr->kind = EXPR_ID_DATA__PARENT;
ret = hashmap__set(&ctx->ids, id, data_ptr,
ret = hashmap__set(ids, id, data_ptr,
(const void **)&old_key, (void **)&old_data);
if (ret)
free(data_ptr);
@ -82,8 +105,57 @@ int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
return ret;
}
struct hashmap *ids__union(struct hashmap *ids1, struct hashmap *ids2)
{
size_t bkt;
struct hashmap_entry *cur;
int ret;
struct expr_id_data *old_data = NULL;
char *old_key = NULL;
if (!ids1)
return ids2;
if (!ids2)
return ids1;
if (hashmap__size(ids1) < hashmap__size(ids2)) {
struct hashmap *tmp = ids1;
ids1 = ids2;
ids2 = tmp;
}
hashmap__for_each_entry(ids2, cur, bkt) {
ret = hashmap__set(ids1, cur->key, cur->value,
(const void **)&old_key, (void **)&old_data);
free(old_key);
free(old_data);
if (ret) {
hashmap__free(ids1);
hashmap__free(ids2);
return NULL;
}
}
hashmap__free(ids2);
return ids1;
}
/* Caller must make sure id is allocated */
int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
{
return ids__insert(ctx->ids, id);
}
/* Caller must make sure id is allocated */
int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
{
return expr__add_id_val_source_count(ctx, id, val, /*source_count=*/1);
}
/* Caller must make sure id is allocated */
int expr__add_id_val_source_count(struct expr_parse_ctx *ctx, const char *id,
double val, int source_count)
{
struct expr_id_data *data_ptr = NULL, *old_data = NULL;
char *old_key = NULL;
@ -92,10 +164,11 @@ int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
data_ptr = malloc(sizeof(*data_ptr));
if (!data_ptr)
return -ENOMEM;
data_ptr->val = val;
data_ptr->val.val = val;
data_ptr->val.source_count = source_count;
data_ptr->kind = EXPR_ID_DATA__VALUE;
ret = hashmap__set(&ctx->ids, id, data_ptr,
ret = hashmap__set(ctx->ids, id, data_ptr,
(const void **)&old_key, (void **)&old_data);
if (ret)
free(data_ptr);
@ -140,7 +213,7 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
data_ptr->ref.metric_expr = ref->metric_expr;
data_ptr->kind = EXPR_ID_DATA__REF;
ret = hashmap__set(&ctx->ids, name, data_ptr,
ret = hashmap__set(ctx->ids, name, data_ptr,
(const void **)&old_key, (void **)&old_data);
if (ret)
free(data_ptr);
@ -156,9 +229,24 @@ int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
struct expr_id_data **data)
{
return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1;
return hashmap__find(ctx->ids, id, (void **)data) ? 0 : -1;
}
bool expr__subset_of_ids(struct expr_parse_ctx *haystack,
struct expr_parse_ctx *needles)
{
struct hashmap_entry *cur;
size_t bkt;
struct expr_id_data *data;
hashmap__for_each_entry(needles->ids, cur, bkt) {
if (expr__get_id(haystack, cur->key, &data))
return false;
}
return true;
}
int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
struct expr_id_data **datap)
{
@ -173,21 +261,18 @@ int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
switch (data->kind) {
case EXPR_ID_DATA__VALUE:
pr_debug2("lookup(%s): val %f\n", id, data->val);
break;
case EXPR_ID_DATA__PARENT:
pr_debug2("lookup(%s): parent %s\n", id, data->parent->id);
pr_debug2("lookup(%s): val %f\n", id, data->val.val);
break;
case EXPR_ID_DATA__REF:
pr_debug2("lookup(%s): ref metric name %s\n", id,
data->ref.metric_name);
pr_debug("processing metric: %s ENTRY\n", id);
data->kind = EXPR_ID_DATA__REF_VALUE;
if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr, 1)) {
if (expr__parse(&data->ref.val, ctx, data->ref.metric_expr)) {
pr_debug("%s failed to count\n", id);
return -1;
}
pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
pr_debug("processing metric: %s EXIT: %f\n", id, data->ref.val);
break;
case EXPR_ID_DATA__REF_VALUE:
pr_debug2("lookup(%s): ref val %f metric name %s\n", id,
@ -205,15 +290,28 @@ void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
struct expr_id_data *old_val = NULL;
char *old_key = NULL;
hashmap__delete(&ctx->ids, id,
hashmap__delete(ctx->ids, id,
(const void **)&old_key, (void **)&old_val);
free(old_key);
free(old_val);
}
void expr__ctx_init(struct expr_parse_ctx *ctx)
struct expr_parse_ctx *expr__ctx_new(void)
{
hashmap__init(&ctx->ids, key_hash, key_equal, NULL);
struct expr_parse_ctx *ctx;
ctx = malloc(sizeof(struct expr_parse_ctx));
if (!ctx)
return NULL;
ctx->ids = hashmap__new(key_hash, key_equal, NULL);
if (IS_ERR(ctx->ids)) {
free(ctx);
return NULL;
}
ctx->runtime = 0;
return ctx;
}
void expr__ctx_clear(struct expr_parse_ctx *ctx)
@ -221,20 +319,32 @@ void expr__ctx_clear(struct expr_parse_ctx *ctx)
struct hashmap_entry *cur;
size_t bkt;
hashmap__for_each_entry((&ctx->ids), cur, bkt) {
hashmap__for_each_entry(ctx->ids, cur, bkt) {
free((char *)cur->key);
free(cur->value);
}
hashmap__clear(&ctx->ids);
hashmap__clear(ctx->ids);
}
void expr__ctx_free(struct expr_parse_ctx *ctx)
{
struct hashmap_entry *cur;
size_t bkt;
hashmap__for_each_entry(ctx->ids, cur, bkt) {
free((char *)cur->key);
free(cur->value);
}
hashmap__free(ctx->ids);
free(ctx);
}
static int
__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
int start, int runtime)
bool compute_ids)
{
struct expr_scanner_ctx scanner_ctx = {
.start_token = start,
.runtime = runtime,
.runtime = ctx->runtime,
};
YY_BUFFER_STATE buffer;
void *scanner;
@ -253,7 +363,7 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
expr_set_debug(1, scanner);
#endif
ret = expr_parse(val, ctx, scanner);
ret = expr_parse(val, ctx, compute_ids, scanner);
expr__flush_buffer(buffer, scanner);
expr__delete_buffer(buffer, scanner);
@ -262,15 +372,15 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
}
int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
const char *expr, int runtime)
const char *expr)
{
return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0;
return __expr__parse(final_val, ctx, expr, /*compute_ids=*/false) ? -1 : 0;
}
int expr__find_other(const char *expr, const char *one,
struct expr_parse_ctx *ctx, int runtime)
int expr__find_ids(const char *expr, const char *one,
struct expr_parse_ctx *ctx)
{
int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime);
int ret = __expr__parse(NULL, ctx, expr, /*compute_ids=*/true);
if (one)
expr__del_id(ctx, one);
@ -281,13 +391,60 @@ int expr__find_other(const char *expr, const char *one,
double expr_id_data__value(const struct expr_id_data *data)
{
if (data->kind == EXPR_ID_DATA__VALUE)
return data->val;
return data->val.val;
assert(data->kind == EXPR_ID_DATA__REF_VALUE);
return data->ref.val;
}
struct expr_id *expr_id_data__parent(struct expr_id_data *data)
double expr_id_data__source_count(const struct expr_id_data *data)
{
assert(data->kind == EXPR_ID_DATA__PARENT);
return data->parent;
assert(data->kind == EXPR_ID_DATA__VALUE);
return data->val.source_count;
}
double expr__get_literal(const char *literal)
{
static struct cpu_topology *topology;
double result = NAN;
if (!strcasecmp("#smt_on", literal)) {
result = smt_on() > 0 ? 1.0 : 0.0;
goto out;
}
if (!strcmp("#num_cpus", literal)) {
result = cpu__max_present_cpu().cpu;
goto out;
}
/*
* Assume that topology strings are consistent, such as CPUs "0-1"
* wouldn't be listed as "0,1", and so after deduplication the number of
* these strings gives an indication of the number of packages, dies,
* etc.
*/
if (!topology) {
topology = cpu_topology__new();
if (!topology) {
pr_err("Error creating CPU topology");
goto out;
}
}
if (!strcmp("#num_packages", literal)) {
result = topology->package_cpus_lists;
goto out;
}
if (!strcmp("#num_dies", literal)) {
result = topology->die_cpus_lists;
goto out;
}
if (!strcmp("#num_cores", literal)) {
result = topology->core_cpus_lists;
goto out;
}
pr_err("Unrecognized literal '%s'", literal);
out:
pr_debug2("literal: %s = %f\n", literal, result);
return result;
}

View File

@ -6,6 +6,7 @@
#include <linux/compiler.h>
#include "expr.h"
#include "expr-bison.h"
#include <math.h>
char *expr_get_text(yyscan_t yyscanner);
YYSTYPE *expr_get_lval(yyscan_t yyscanner);
@ -41,11 +42,9 @@ static char *normalize(char *str, int runtime)
char *dst = str;
while (*str) {
if (*str == '@')
*dst++ = '/';
else if (*str == '\\')
if (*str == '\\')
*dst++ = *++str;
else if (*str == '?') {
else if (*str == '?') {
char *paramval;
int i = 0;
int size = asprintf(&paramval, "%d", runtime);
@ -79,6 +78,17 @@ static int str(yyscan_t scanner, int token, int runtime)
yylval->str = normalize(yylval->str, runtime);
return token;
}
static int literal(yyscan_t scanner)
{
YYSTYPE *yylval = expr_get_lval(scanner);
yylval->num = expr__get_literal(expr_get_text(scanner));
if (isnan(yylval->num))
return EXPR_ERROR;
return LITERAL;
}
%}
number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
@ -87,25 +97,18 @@ sch [-,=]
spec \\{sch}
sym [0-9a-zA-Z_\.:@?]+
symbol ({spec}|{sym})+
literal #[0-9a-zA-Z_\.\-]+
%%
struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
{
int start_token = sctx->start_token;
if (sctx->start_token) {
sctx->start_token = 0;
return start_token;
}
}
d_ratio { return D_RATIO; }
max { return MAX; }
min { return MIN; }
if { return IF; }
else { return ELSE; }
#smt_on { return SMT_ON; }
source_count { return SOURCE_COUNT; }
{literal} { return literal(yyscanner); }
{number} { return value(yyscanner); }
{symbol} { return str(yyscanner, ID, sctx->runtime); }
"|" { return '|'; }

View File

@ -1,42 +1,43 @@
/* Simple expression parser */
%{
#define YYDEBUG 1
#include <stdio.h>
#include "util.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include "util/debug.h"
#include <stdlib.h> // strtod()
#define IN_EXPR_Y 1
#include "expr.h"
#include "smt.h"
#include <string.h>
static double d_ratio(double val0, double val1)
{
if (val1 == 0) {
return 0;
}
return val0 / val1;
}
%}
%define api.pure full
%parse-param { double *final_val }
%parse-param { struct expr_parse_ctx *ctx }
%parse-param { bool compute_ids }
%parse-param {void *scanner}
%lex-param {void* scanner}
%union {
double num;
char *str;
struct ids {
/*
* When creating ids, holds the working set of event ids. NULL
* implies the set is empty.
*/
struct hashmap *ids;
/*
* The metric value. When not creating ids this is the value
* read from a counter, a constant or some computed value. When
* creating ids the value is either a constant or BOTTOM. NAN is
* used as the special BOTTOM value, representing a "set of all
* values" case.
*/
double val;
} ids;
}
%token EXPR_PARSE EXPR_OTHER EXPR_ERROR
%token <num> NUMBER
%token <str> ID
%destructor { free ($$); } <str>
%token MIN MAX IF ELSE SMT_ON D_RATIO
%token ID NUMBER MIN MAX IF ELSE LITERAL D_RATIO SOURCE_COUNT EXPR_ERROR
%left MIN MAX IF
%left '|'
%left '^'
@ -45,83 +46,256 @@ static double d_ratio(double val0, double val1)
%left '-' '+'
%left '*' '/' '%'
%left NEG NOT
%type <num> expr if_expr
%type <num> NUMBER LITERAL
%type <str> ID
%destructor { free ($$); } <str>
%type <ids> expr if_expr
%destructor { ids__free($$.ids); } <ids>
%{
static void expr_error(double *final_val __maybe_unused,
struct expr_parse_ctx *ctx __maybe_unused,
bool compute_ids __maybe_unused,
void *scanner,
const char *s)
{
pr_debug("%s\n", s);
}
/*
* During compute ids, the special "bottom" value uses NAN to represent the set
* of all values. NAN is selected as it isn't a useful constant value.
*/
#define BOTTOM NAN
/* During computing ids, does val represent a constant (non-BOTTOM) value? */
static bool is_const(double val)
{
return isfinite(val);
}
static struct ids union_expr(struct ids ids1, struct ids ids2)
{
struct ids result = {
.val = BOTTOM,
.ids = ids__union(ids1.ids, ids2.ids),
};
return result;
}
static struct ids handle_id(struct expr_parse_ctx *ctx, char *id,
bool compute_ids, bool source_count)
{
struct ids result;
if (!compute_ids) {
/*
* Compute the event's value from ID. If the ID isn't known then
* it isn't used to compute the formula so set to NAN.
*/
struct expr_id_data *data;
result.val = NAN;
if (expr__resolve_id(ctx, id, &data) == 0) {
result.val = source_count
? expr_id_data__source_count(data)
: expr_id_data__value(data);
}
result.ids = NULL;
free(id);
} else {
/*
* Set the value to BOTTOM to show that any value is possible
* when the event is computed. Create a set of just the ID.
*/
result.val = BOTTOM;
result.ids = ids__new();
if (!result.ids || ids__insert(result.ids, id)) {
pr_err("Error creating IDs for '%s'", id);
free(id);
}
}
return result;
}
/*
* If we're not computing ids or $1 and $3 are constants, compute the new
* constant value using OP. Its invariant that there are no ids. If computing
* ids for non-constants union the set of IDs that must be computed.
*/
#define BINARY_LONG_OP(RESULT, OP, LHS, RHS) \
if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \
assert(LHS.ids == NULL); \
assert(RHS.ids == NULL); \
RESULT.val = (long)LHS.val OP (long)RHS.val; \
RESULT.ids = NULL; \
} else { \
RESULT = union_expr(LHS, RHS); \
}
#define BINARY_OP(RESULT, OP, LHS, RHS) \
if (!compute_ids || (is_const(LHS.val) && is_const(RHS.val))) { \
assert(LHS.ids == NULL); \
assert(RHS.ids == NULL); \
RESULT.val = LHS.val OP RHS.val; \
RESULT.ids = NULL; \
} else { \
RESULT = union_expr(LHS, RHS); \
}
%}
%%
start:
EXPR_PARSE all_expr
|
EXPR_OTHER all_other
all_other: all_other other
|
other: ID
start: if_expr
{
expr__add_id(ctx, $1);
if (compute_ids)
ctx->ids = ids__union($1.ids, ctx->ids);
if (final_val)
*final_val = $1.val;
}
|
MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ','
|
'<' | '>' | D_RATIO
;
all_expr: if_expr { *final_val = $1; }
;
if_expr: expr IF expr ELSE expr
{
if (fpclassify($3.val) == FP_ZERO) {
/*
* The IF expression evaluated to 0 so treat as false, take the
* ELSE and discard everything else.
*/
$$.val = $5.val;
$$.ids = $5.ids;
ids__free($1.ids);
ids__free($3.ids);
} else if (!compute_ids || is_const($3.val)) {
/*
* If ids aren't computed then treat the expression as true. If
* ids are being computed and the IF expr is a non-zero
* constant, then also evaluate the true case.
*/
$$.val = $1.val;
$$.ids = $1.ids;
ids__free($3.ids);
ids__free($5.ids);
} else if ($1.val == $5.val) {
/*
* LHS == RHS, so both are an identical constant. No need to
* evaluate any events.
*/
$$.val = $1.val;
$$.ids = NULL;
ids__free($1.ids);
ids__free($3.ids);
ids__free($5.ids);
} else {
/*
* Value is either the LHS or RHS and we need the IF expression
* to compute it.
*/
$$ = union_expr($1, union_expr($3, $5));
}
}
| expr
;
if_expr:
expr IF expr ELSE expr { $$ = $3 ? $1 : $5; }
| expr
;
expr: NUMBER
| ID {
struct expr_id_data *data;
if (expr__resolve_id(ctx, $1, &data)) {
free($1);
YYABORT;
}
$$ = expr_id_data__value(data);
free($1);
}
| expr '|' expr { $$ = (long)$1 | (long)$3; }
| expr '&' expr { $$ = (long)$1 & (long)$3; }
| expr '^' expr { $$ = (long)$1 ^ (long)$3; }
| expr '<' expr { $$ = $1 < $3; }
| expr '>' expr { $$ = $1 > $3; }
| expr '+' expr { $$ = $1 + $3; }
| expr '-' expr { $$ = $1 - $3; }
| expr '*' expr { $$ = $1 * $3; }
| expr '/' expr { if ($3 == 0) {
pr_debug("division by zero\n");
YYABORT;
}
$$ = $1 / $3;
}
| expr '%' expr { if ((long)$3 == 0) {
pr_debug("division by zero\n");
YYABORT;
}
$$ = (long)$1 % (long)$3;
}
| '-' expr %prec NEG { $$ = -$2; }
| '(' if_expr ')' { $$ = $2; }
| MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
| MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; }
| SMT_ON { $$ = smt_on() > 0; }
| D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); }
;
expr: NUMBER
{
$$.val = $1;
$$.ids = NULL;
}
| ID { $$ = handle_id(ctx, $1, compute_ids, /*source_count=*/false); }
| SOURCE_COUNT '(' ID ')' { $$ = handle_id(ctx, $3, compute_ids, /*source_count=*/true); }
| expr '|' expr { BINARY_LONG_OP($$, |, $1, $3); }
| expr '&' expr { BINARY_LONG_OP($$, &, $1, $3); }
| expr '^' expr { BINARY_LONG_OP($$, ^, $1, $3); }
| expr '<' expr { BINARY_OP($$, <, $1, $3); }
| expr '>' expr { BINARY_OP($$, >, $1, $3); }
| expr '+' expr { BINARY_OP($$, +, $1, $3); }
| expr '-' expr { BINARY_OP($$, -, $1, $3); }
| expr '*' expr { BINARY_OP($$, *, $1, $3); }
| expr '/' expr
{
if (fpclassify($3.val) == FP_ZERO) {
pr_debug("division by zero\n");
YYABORT;
} else if (!compute_ids || (is_const($1.val) && is_const($3.val))) {
assert($1.ids == NULL);
assert($3.ids == NULL);
$$.val = $1.val / $3.val;
$$.ids = NULL;
} else {
/* LHS and/or RHS need computing from event IDs so union. */
$$ = union_expr($1, $3);
}
}
| expr '%' expr
{
if (fpclassify($3.val) == FP_ZERO) {
pr_debug("division by zero\n");
YYABORT;
} else if (!compute_ids || (is_const($1.val) && is_const($3.val))) {
assert($1.ids == NULL);
assert($3.ids == NULL);
$$.val = (long)$1.val % (long)$3.val;
$$.ids = NULL;
} else {
/* LHS and/or RHS need computing from event IDs so union. */
$$ = union_expr($1, $3);
}
}
| D_RATIO '(' expr ',' expr ')'
{
if (fpclassify($5.val) == FP_ZERO) {
/*
* Division by constant zero always yields zero and no events
* are necessary.
*/
assert($5.ids == NULL);
$$.val = 0.0;
$$.ids = NULL;
ids__free($3.ids);
} else if (!compute_ids || (is_const($3.val) && is_const($5.val))) {
assert($3.ids == NULL);
assert($5.ids == NULL);
$$.val = $3.val / $5.val;
$$.ids = NULL;
} else {
/* LHS and/or RHS need computing from event IDs so union. */
$$ = union_expr($3, $5);
}
}
| '-' expr %prec NEG
{
$$.val = -$2.val;
$$.ids = $2.ids;
}
| '(' if_expr ')'
{
$$ = $2;
}
| MIN '(' expr ',' expr ')'
{
if (!compute_ids) {
$$.val = $3.val < $5.val ? $3.val : $5.val;
$$.ids = NULL;
} else {
$$ = union_expr($3, $5);
}
}
| MAX '(' expr ',' expr ')'
{
if (!compute_ids) {
$$.val = $3.val > $5.val ? $3.val : $5.val;
$$.ids = NULL;
} else {
$$ = union_expr($3, $5);
}
}
| LITERAL
{
$$.val = $1;
$$.ids = NULL;
}
;
%%

View File

@ -42,7 +42,7 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_ent
#error "unsupported architecture"
#endif
#if __BYTE_ORDER == __BIG_ENDIAN
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define GEN_ELF_ENDIAN ELFDATA2MSB
#else
#define GEN_ELF_ENDIAN ELFDATA2LSB

View File

@ -35,7 +35,7 @@
#define INTEL_BTS_ERR_NOINSN 5
#define INTEL_BTS_ERR_LOST 9
#if __BYTE_ORDER == __BIG_ENDIAN
#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define le64_to_cpu bswap_64
#else
#define le64_to_cpu

View File

@ -16,6 +16,7 @@
#include "map_symbol.h"
#include "branch.h"
#include "mem-events.h"
#include "path.h"
#include "srcline.h"
#include "symbol.h"
#include "sort.h"
@ -34,6 +35,7 @@
#include "bpf-event.h"
#include <internal/lib.h> // page_size
#include "cgroup.h"
#include "arm64-frame-pointer-unwind-support.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@ -755,6 +757,14 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
return 0;
}
int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
if (dump_trace)
perf_event__fprintf_aux_output_hw_id(event, stdout);
return 0;
}
int machine__process_switch_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
@ -1407,7 +1417,7 @@ static int maps__set_modules_path_dir(struct maps *maps, const char *dir_name, i
struct stat st;
/*sshfs might return bad dent->d_type, so we have to stat*/
snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
path__join(path, sizeof(path), dir_name, dent->d_name);
if (stat(path, &st))
continue;
@ -2028,6 +2038,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_bpf(machine, event, sample); break;
case PERF_RECORD_TEXT_POKE:
ret = machine__process_text_poke(machine, event, sample); break;
case PERF_RECORD_AUX_OUTPUT_HW_ID:
ret = machine__process_aux_output_hw_id_event(machine, event); break;
default:
ret = -1;
break;
@ -2061,6 +2073,7 @@ static void ip__resolve_ams(struct thread *thread,
ams->addr = ip;
ams->al_addr = al.addr;
ams->al_level = al.level;
ams->ms.maps = al.maps;
ams->ms.sym = al.sym;
ams->ms.map = al.map;
@ -2080,6 +2093,7 @@ static void ip__resolve_data(struct thread *thread,
ams->addr = addr;
ams->al_addr = al.addr;
ams->al_level = al.level;
ams->ms.maps = al.maps;
ams->ms.sym = al.sym;
ams->ms.map = al.map;
@ -2700,6 +2714,15 @@ static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
return err;
}
static u64 get_leaf_frame_caller(struct perf_sample *sample,
struct thread *thread, int usr_idx)
{
if (machine__normalized_is(thread->maps->machine, "arm64"))
return get_leaf_frame_caller_aarch64(sample, thread, usr_idx);
else
return 0;
}
static int thread__resolve_callchain_sample(struct thread *thread,
struct callchain_cursor *cursor,
struct evsel *evsel,
@ -2713,9 +2736,10 @@ static int thread__resolve_callchain_sample(struct thread *thread,
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
int i, j, err, nr_entries;
int i, j, err, nr_entries, usr_idx;
int skip_idx = -1;
int first_call = 0;
u64 leaf_frame_caller;
if (chain)
chain_nr = chain->nr;
@ -2840,6 +2864,34 @@ static int thread__resolve_callchain_sample(struct thread *thread,
continue;
}
/*
* PERF_CONTEXT_USER allows us to locate where the user stack ends.
* Depending on callchain_param.order and the position of PERF_CONTEXT_USER,
* the index will be different in order to add the missing frame
* at the right place.
*/
usr_idx = callchain_param.order == ORDER_CALLEE ? j-2 : j-1;
if (usr_idx >= 0 && chain->ips[usr_idx] == PERF_CONTEXT_USER) {
leaf_frame_caller = get_leaf_frame_caller(sample, thread, usr_idx);
/*
* check if leaf_frame_Caller != ip to not add the same
* value twice.
*/
if (leaf_frame_caller && leaf_frame_caller != ip) {
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, leaf_frame_caller,
false, NULL, NULL, 0);
if (err)
return (err < 0) ? err : 0;
}
}
err = add_callchain_ip(thread, cursor, parent,
root_al, &cpumode, ip,
false, NULL, NULL, 0);
@ -3069,14 +3121,19 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
}
/*
* Compares the raw arch string. N.B. see instead perf_env__arch() if a
* normalized arch is needed.
* Compares the raw arch string. N.B. see instead perf_env__arch() or
* machine__normalized_is() if a normalized arch is needed.
*/
bool machine__is(struct machine *machine, const char *arch)
{
return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
}
bool machine__normalized_is(struct machine *machine, const char *arch)
{
return machine && !strcmp(perf_env__arch(machine->env), arch);
}
int machine__nr_cpus_avail(struct machine *machine)
{
return machine ? perf_env__nr_cpus_avail(machine->env) : 0;

View File

@ -124,6 +124,8 @@ int machine__process_aux_event(struct machine *machine,
union perf_event *event);
int machine__process_itrace_start_event(struct machine *machine,
union perf_event *event);
int machine__process_aux_output_hw_id_event(struct machine *machine,
union perf_event *event);
int machine__process_switch_event(struct machine *machine,
union perf_event *event);
int machine__process_namespaces_event(struct machine *machine,
@ -206,6 +208,7 @@ static inline bool machine__is_host(struct machine *machine)
}
bool machine__is(struct machine *machine, const char *arch);
bool machine__normalized_is(struct machine *machine, const char *arch);
int machine__nr_cpus_avail(struct machine *machine);
struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);

View File

@ -18,6 +18,7 @@ struct addr_map_symbol {
struct map_symbol ms;
u64 addr;
u64 al_addr;
char al_level;
u64 phys_addr;
u64 data_page_size;
};

View File

@ -301,12 +301,25 @@ static const char * const mem_lvlnum[] = {
[PERF_MEM_LVLNUM_NA] = "N/A",
};
static const char * const mem_hops[] = {
"N/A",
/*
* While printing, 'Remote' will be added to represent
* 'Remote core, same node' accesses as remote field need
* to be set with mem_hops field.
*/
"core, same node",
"node, same socket",
"socket, same board",
"board",
};
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
{
size_t i, l = 0;
u64 m = PERF_MEM_LVL_NA;
u64 hit, miss;
int printed;
int printed = 0;
if (mem_info)
m = mem_info->data_src.mem_lvl;
@ -320,21 +333,27 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
/* already taken care of */
m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
if (mem_info && mem_info->data_src.mem_remote) {
strcat(out, "Remote ");
l += 7;
}
printed = 0;
for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
if (!(m & 0x1))
continue;
if (printed++) {
strcat(out, " or ");
l += 4;
/*
* Incase mem_hops field is set, we can skip printing data source via
* PERF_MEM_LVL namespace.
*/
if (mem_info && mem_info->data_src.mem_hops) {
l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]);
} else {
for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
if (!(m & 0x1))
continue;
if (printed++) {
strcat(out, " or ");
l += 4;
}
l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
l += scnprintf(out + l, sz - l, mem_lvl[i]);
}
if (mem_info && mem_info->data_src.mem_lvl_num) {
@ -472,8 +491,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
/*
* Skylake might report unknown remote level via this
* bit, consider it when evaluating remote HITMs.
*
* Incase of power, remote field can also be used to denote cache
* accesses from the another core of same node. Hence, setting
* mrem only when HOPS is zero along with set remote field.
*/
bool mrem = data_src->mem_remote;
bool mrem = (data_src->mem_remote && !data_src->mem_hops);
int err = 0;
#define HITM_INC(__f) \

File diff suppressed because it is too large Load Diff

View File

@ -14,24 +14,51 @@ struct rblist;
struct pmu_events_map;
struct cgroup;
/**
* A node in a rblist keyed by the evsel. The global rblist of metric events
* generally exists in perf_stat_config. The evsel is looked up in the rblist
* yielding a list of metric_expr.
*/
struct metric_event {
struct rb_node nd;
struct evsel *evsel;
struct list_head head; /* list of metric_expr */
};
/**
* A metric referenced by a metric_expr. When parsing a metric expression IDs
* will be looked up, matching either a value (from metric_events) or a
* metric_ref. A metric_ref will then be parsed recursively. The metric_refs and
* metric_events need to be known before parsing so that their values may be
* placed in the parse context for lookup.
*/
struct metric_ref {
const char *metric_name;
const char *metric_expr;
};
/**
* One in a list of metric_expr associated with an evsel. The data is used to
* generate a metric value during stat output.
*/
struct metric_expr {
struct list_head nd;
/** The expression to parse, for example, "instructions/cycles". */
const char *metric_expr;
/** The name of the meric such as "IPC". */
const char *metric_name;
/**
* The "ScaleUnit" that scales and adds a unit to the metric during
* output. For example, "6.4e-05MiB" means to scale the resulting metric
* by 6.4e-05 (typically converting a unit like cache lines to something
* more human intelligible) and then add "MiB" afterward when displayed.
*/
const char *metric_unit;
/** Null terminated array of events used by the metric. */
struct evsel **metric_events;
/** Null terminated array of referenced metrics. */
struct metric_ref *metric_refs;
/** A value substituted for '?' during parsing. */
int runtime;
};
@ -43,19 +70,19 @@ int metricgroup__parse_groups(const struct option *opt,
bool metric_no_group,
bool metric_no_merge,
struct rblist *metric_events);
struct pmu_event *metricgroup__find_metric(const char *metric,
struct pmu_events_map *map);
const struct pmu_event *metricgroup__find_metric(const char *metric,
const struct pmu_events_map *map);
int metricgroup__parse_groups_test(struct evlist *evlist,
struct pmu_events_map *map,
const struct pmu_events_map *map,
const char *str,
bool metric_no_group,
bool metric_no_merge,
struct rblist *metric_events);
void metricgroup__print(bool metrics, bool groups, char *filter,
bool raw, bool details);
bool raw, bool details, const char *pmu_name);
bool metricgroup__has_metric(const char *metric);
int arch_get_runtimeparam(struct pmu_event *pe __maybe_unused);
int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused);
void metricgroup__rblist_exit(struct rblist *metric_events);
int metricgroup__copy_metric_events(struct evlist *evlist, struct cgroup *cgrp,

View File

@ -7,6 +7,7 @@
#include <linux/types.h>
#include <linux/ring_buffer.h>
#include <linux/bitops.h>
#include <perf/cpumap.h>
#include <stdbool.h>
#include <pthread.h> // for cpu_set_t
#ifdef HAVE_AIO_SUPPORT
@ -52,7 +53,7 @@ struct mmap_params {
struct auxtrace_mmap_params auxtrace_mp;
};
int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, int cpu);
int mmap__mmap(struct mmap *map, struct mmap_params *mp, int fd, struct perf_cpu cpu);
void mmap__munmap(struct mmap *map);
union perf_event *perf_mmap__read_forward(struct mmap *map);
@ -64,4 +65,7 @@ size_t mmap__mmap_len(struct mmap *map);
void mmap_cpu_mask__scnprintf(struct mmap_cpu_mask *mask, const char *tag);
int mmap_cpu_mask__duplicate(struct mmap_cpu_mask *original,
struct mmap_cpu_mask *clone);
#endif /*__PERF_MMAP_H */

Some files were not shown because too many files have changed in this diff Show More