mirror of
https://github.com/Qortal/Brooklyn.git
synced 2025-01-30 23:02:18 +00:00
638 lines
14 KiB
C
638 lines
14 KiB
C
/*
|
|
* raspberrypi_axi_monitor.c
|
|
*
|
|
* Author: james.hughes@raspberrypi.org
|
|
*
|
|
* Raspberry Pi AXI performance counters.
|
|
*
|
|
* Copyright (C) 2017 Raspberry Pi Trading Ltd.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*/
|
|
|
|
#include <linux/debugfs.h>
|
|
#include <linux/devcoredump.h>
|
|
#include <linux/device.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/module.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/of.h>
|
|
#include <linux/platform_device.h>
|
|
|
|
#include <soc/bcm2835/raspberrypi-firmware.h>
|
|
|
|
#define NUM_MONITORS 2
|
|
#define NUM_BUS_WATCHERS_PER_MONITOR 3
|
|
|
|
#define SYSTEM_MONITOR 0
|
|
#define VPU_MONITOR 1
|
|
|
|
#define MAX_BUSES 16
|
|
#define DEFAULT_SAMPLE_TIME 100
|
|
|
|
#define NUM_BUS_WATCHER_RESULTS 9
|
|
|
|
struct bus_watcher_data {
|
|
union {
|
|
u32 results[NUM_BUS_WATCHER_RESULTS];
|
|
struct {
|
|
u32 atrans;
|
|
u32 atwait;
|
|
u32 amax;
|
|
u32 wtrans;
|
|
u32 wtwait;
|
|
u32 wmax;
|
|
u32 rtrans;
|
|
u32 rtwait;
|
|
u32 rmax;
|
|
};
|
|
};
|
|
};
|
|
|
|
|
|
struct rpi_axiperf {
|
|
struct platform_device *dev;
|
|
struct dentry *root_folder;
|
|
|
|
struct task_struct *monitor_thread;
|
|
struct mutex lock;
|
|
|
|
struct rpi_firmware *firmware;
|
|
|
|
/* Sample time spent on for each bus */
|
|
int sample_time;
|
|
|
|
/* Now storage for the per monitor settings and the resulting
|
|
* performance figures
|
|
*/
|
|
struct {
|
|
/* Bit field of buses we want to monitor */
|
|
int bus_enabled;
|
|
/* Bit field of buses to filter by */
|
|
int bus_filter;
|
|
/* The current buses being monitored on this monitor */
|
|
int current_bus[NUM_BUS_WATCHERS_PER_MONITOR];
|
|
/* The last bus monitored on this monitor */
|
|
int last_monitored;
|
|
|
|
/* Set true if this mailbox must use the mailbox interface
|
|
* rather than access registers directly.
|
|
*/
|
|
int use_mailbox_interface;
|
|
|
|
/* Current result values */
|
|
struct bus_watcher_data results[MAX_BUSES];
|
|
|
|
struct dentry *debugfs_entry;
|
|
void __iomem *base_address;
|
|
|
|
} monitor[NUM_MONITORS];
|
|
|
|
};
|
|
|
|
static struct rpi_axiperf *state;
|
|
|
|
/* Two monitors, System and VPU, each with the following register sets.
|
|
* Each monitor can only monitor one bus at a time, so we time share them,
|
|
* giving each bus 100ms (default, settable via debugfs) of time on its
|
|
* associated monitor
|
|
* Record results from the three Bus watchers per monitor and push to the sysfs
|
|
*/
|
|
|
|
/* general registers */
|
|
const int GEN_CTRL;
|
|
|
|
const int GEN_CTL_ENABLE_BIT = BIT(0);
|
|
const int GEN_CTL_RESET_BIT = BIT(1);
|
|
|
|
/* Bus watcher registers */
|
|
const int BW_PITCH = 0x40;
|
|
|
|
const int BW0_CTRL = 0x40;
|
|
const int BW1_CTRL = 0x80;
|
|
const int BW2_CTRL = 0xc0;
|
|
|
|
const int BW_ATRANS_OFFSET = 0x04;
|
|
const int BW_ATWAIT_OFFSET = 0x08;
|
|
const int BW_AMAX_OFFSET = 0x0c;
|
|
const int BW_WTRANS_OFFSET = 0x10;
|
|
const int BW_WTWAIT_OFFSET = 0x14;
|
|
const int BW_WMAX_OFFSET = 0x18;
|
|
const int BW_RTRANS_OFFSET = 0x1c;
|
|
const int BW_RTWAIT_OFFSET = 0x20;
|
|
const int BW_RMAX_OFFSET = 0x24;
|
|
|
|
const int BW_CTRL_RESET_BIT = BIT(31);
|
|
const int BW_CTRL_ENABLE_BIT = BIT(30);
|
|
const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29);
|
|
const int BW_CTRL_LIMIT_HALT_BIT = BIT(28);
|
|
|
|
const int BW_CTRL_SOURCE_SHIFT = 8;
|
|
const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits
|
|
const int BW_CTRL_BUS_WATCH_SHIFT;
|
|
const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits
|
|
const int BW_CTRL_BUS_FILTER_SHIFT = 8;
|
|
|
|
const static char *bus_filter_strings[] = {
|
|
"",
|
|
"CORE0_V",
|
|
"ICACHE0",
|
|
"DCACHE0",
|
|
"CORE1_V",
|
|
"ICACHE1",
|
|
"DCACHE1",
|
|
"L2_MAIN",
|
|
"HOST_PORT",
|
|
"HOST_PORT2",
|
|
"HVS",
|
|
"ISP",
|
|
"VIDEO_DCT",
|
|
"VIDEO_SD2AXI",
|
|
"CAM0",
|
|
"CAM1",
|
|
"DMA0",
|
|
"DMA1",
|
|
"DMA2_VPU",
|
|
"JPEG",
|
|
"VIDEO_CME",
|
|
"TRANSPOSER",
|
|
"VIDEO_FME",
|
|
"CCP2TX",
|
|
"USB",
|
|
"V3D0",
|
|
"V3D1",
|
|
"V3D2",
|
|
"AVE",
|
|
"DEBUG",
|
|
"CPU",
|
|
"M30"
|
|
};
|
|
|
|
const int num_bus_filters = ARRAY_SIZE(bus_filter_strings);
|
|
|
|
const static char *system_bus_string[] = {
|
|
"DMA_L2",
|
|
"TRANS",
|
|
"JPEG",
|
|
"SYSTEM_UC",
|
|
"DMA_UC",
|
|
"SYSTEM_L2",
|
|
"CCP2TX",
|
|
"MPHI_RX",
|
|
"MPHI_TX",
|
|
"HVS",
|
|
"H264",
|
|
"ISP",
|
|
"V3D",
|
|
"PERIPHERAL",
|
|
"CPU_UC",
|
|
"CPU_L2"
|
|
};
|
|
|
|
const int num_system_buses = ARRAY_SIZE(system_bus_string);
|
|
|
|
const static char *vpu_bus_string[] = {
|
|
"VPU1_D_L2",
|
|
"VPU0_D_L2",
|
|
"VPU1_I_L2",
|
|
"VPU0_I_L2",
|
|
"SYSTEM_L2",
|
|
"L2_FLUSH",
|
|
"DMA_L2",
|
|
"VPU1_D_UC",
|
|
"VPU0_D_UC",
|
|
"VPU1_I_UC",
|
|
"VPU0_I_UC",
|
|
"SYSTEM_UC",
|
|
"L2_OUT",
|
|
"DMA_UC",
|
|
"SDRAM",
|
|
"L2_IN"
|
|
};
|
|
|
|
const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string);
|
|
|
|
const static char *monitor_name[] = {
|
|
"System",
|
|
"VPU"
|
|
};
|
|
|
|
static inline void write_reg(int monitor, int reg, u32 value)
|
|
{
|
|
writel(value, state->monitor[monitor].base_address + reg);
|
|
}
|
|
|
|
static inline u32 read_reg(int monitor, u32 reg)
|
|
{
|
|
return readl(state->monitor[monitor].base_address + reg);
|
|
}
|
|
|
|
static void read_bus_watcher(int monitor, int watcher, u32 *results)
|
|
{
|
|
if (state->monitor[monitor].use_mailbox_interface) {
|
|
/* We have 9 results, plus the overheads of start address and
|
|
* length So 11 u32 to define
|
|
*/
|
|
u32 tmp[11];
|
|
int err;
|
|
|
|
tmp[0] = (u32)(uintptr_t)(state->monitor[monitor].base_address + watcher
|
|
+ BW_ATRANS_OFFSET);
|
|
tmp[1] = NUM_BUS_WATCHER_RESULTS;
|
|
|
|
err = rpi_firmware_property(state->firmware,
|
|
RPI_FIRMWARE_GET_PERIPH_REG,
|
|
tmp, sizeof(tmp));
|
|
|
|
if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS)
|
|
dev_err_once(&state->dev->dev,
|
|
"Failed to read bus watcher");
|
|
else
|
|
memcpy(results, &tmp[2],
|
|
NUM_BUS_WATCHER_RESULTS * sizeof(u32));
|
|
} else {
|
|
int i;
|
|
void __iomem *addr = state->monitor[monitor].base_address
|
|
+ watcher + BW_ATRANS_OFFSET;
|
|
for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4)
|
|
*results++ = readl(addr);
|
|
}
|
|
}
|
|
|
|
static void set_monitor_control(int monitor, u32 set)
|
|
{
|
|
if (state->monitor[monitor].use_mailbox_interface) {
|
|
u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address +
|
|
GEN_CTRL), 1, set};
|
|
int err = rpi_firmware_property(state->firmware,
|
|
RPI_FIRMWARE_SET_PERIPH_REG,
|
|
tmp, sizeof(tmp));
|
|
|
|
if (err < 0 || tmp[1] != 1)
|
|
dev_err_once(&state->dev->dev,
|
|
"Failed to set monitor control");
|
|
} else
|
|
write_reg(monitor, GEN_CTRL, set);
|
|
}
|
|
|
|
static void set_bus_watcher_control(int monitor, int watcher, u32 set)
|
|
{
|
|
if (state->monitor[monitor].use_mailbox_interface) {
|
|
u32 tmp[3] = {(u32)(uintptr_t)(state->monitor[monitor].base_address +
|
|
watcher), 1, set};
|
|
int err = rpi_firmware_property(state->firmware,
|
|
RPI_FIRMWARE_SET_PERIPH_REG,
|
|
tmp, sizeof(tmp));
|
|
if (err < 0 || tmp[1] != 1)
|
|
dev_err_once(&state->dev->dev,
|
|
"Failed to set bus watcher control");
|
|
} else
|
|
write_reg(monitor, watcher, set);
|
|
}
|
|
|
|
static void monitor(struct rpi_axiperf *state)
|
|
{
|
|
int monitor, num_buses[NUM_MONITORS];
|
|
|
|
mutex_lock(&state->lock);
|
|
|
|
for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
|
|
typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
|
|
|
|
/* Anything enabled? */
|
|
if (mon->bus_enabled == 0) {
|
|
/* No, disable all monitoring for this monitor */
|
|
set_monitor_control(monitor, GEN_CTL_RESET_BIT);
|
|
} else {
|
|
int i;
|
|
|
|
/* Find out how many busses we want to monitor, and
|
|
* spread our 3 actual monitors over them
|
|
*/
|
|
num_buses[monitor] = hweight32(mon->bus_enabled);
|
|
num_buses[monitor] = min(num_buses[monitor],
|
|
NUM_BUS_WATCHERS_PER_MONITOR);
|
|
|
|
for (i = 0; i < num_buses[monitor]; i++) {
|
|
int bus_control;
|
|
|
|
do {
|
|
mon->last_monitored++;
|
|
mon->last_monitored &= 0xf;
|
|
} while ((mon->bus_enabled &
|
|
(1 << mon->last_monitored)) == 0);
|
|
|
|
mon->current_bus[i] = mon->last_monitored;
|
|
|
|
/* Reset the counters */
|
|
set_bus_watcher_control(monitor,
|
|
BW0_CTRL +
|
|
i*BW_PITCH,
|
|
BW_CTRL_RESET_BIT);
|
|
|
|
bus_control = BW_CTRL_ENABLE_BIT |
|
|
mon->current_bus[i];
|
|
|
|
if (mon->bus_filter) {
|
|
bus_control |=
|
|
BW_CTRL_ENABLE_ID_FILTER_BIT;
|
|
bus_control |=
|
|
((mon->bus_filter & 0x1f)
|
|
<< BW_CTRL_BUS_FILTER_SHIFT);
|
|
}
|
|
|
|
// Start capture
|
|
set_bus_watcher_control(monitor,
|
|
BW0_CTRL + i*BW_PITCH,
|
|
bus_control);
|
|
}
|
|
}
|
|
|
|
/* start monitoring */
|
|
set_monitor_control(monitor, GEN_CTL_ENABLE_BIT);
|
|
}
|
|
|
|
mutex_unlock(&state->lock);
|
|
|
|
msleep(state->sample_time);
|
|
|
|
/* Now read the results */
|
|
|
|
mutex_lock(&state->lock);
|
|
for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
|
|
typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
|
|
|
|
/* Anything enabled? */
|
|
if (mon->bus_enabled == 0) {
|
|
/* No, disable all monitoring for this monitor */
|
|
set_monitor_control(monitor, 0);
|
|
} else {
|
|
int i;
|
|
|
|
for (i = 0; i < num_buses[monitor]; i++) {
|
|
int bus = mon->current_bus[i];
|
|
|
|
read_bus_watcher(monitor,
|
|
BW0_CTRL + i*BW_PITCH,
|
|
(u32 *)&mon->results[bus].results);
|
|
}
|
|
}
|
|
}
|
|
mutex_unlock(&state->lock);
|
|
}
|
|
|
|
static int monitor_thread(void *data)
|
|
{
|
|
struct rpi_axiperf *state = data;
|
|
|
|
while (1) {
|
|
monitor(state);
|
|
|
|
if (kthread_should_stop())
|
|
return 0;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static ssize_t myreader(struct file *fp, char __user *user_buffer,
|
|
size_t count, loff_t *position)
|
|
{
|
|
#define INIT_BUFF_SIZE 2048
|
|
|
|
int i;
|
|
int idx = (int)(uintptr_t)(fp->private_data);
|
|
int num_buses, cnt;
|
|
char *string_buffer;
|
|
int buff_size = INIT_BUFF_SIZE;
|
|
char *p;
|
|
typeof(state->monitor[0]) *mon = &(state->monitor[idx]);
|
|
|
|
if (idx < 0 || idx > NUM_MONITORS)
|
|
idx = 0;
|
|
|
|
num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses;
|
|
|
|
string_buffer = kmalloc(buff_size, GFP_KERNEL);
|
|
|
|
if (!string_buffer) {
|
|
dev_err(&state->dev->dev,
|
|
"Failed temporary string allocation\n");
|
|
return 0;
|
|
}
|
|
|
|
p = string_buffer;
|
|
|
|
mutex_lock(&state->lock);
|
|
|
|
if (mon->bus_filter) {
|
|
int filt = min(mon->bus_filter & 0x1f, num_bus_filters);
|
|
|
|
cnt = snprintf(p, buff_size,
|
|
"\nMonitoring transactions from %s only\n",
|
|
bus_filter_strings[filt]);
|
|
p += cnt;
|
|
buff_size -= cnt;
|
|
}
|
|
|
|
cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n"
|
|
"======================================================================================================\n");
|
|
|
|
if (cnt >= buff_size)
|
|
goto done;
|
|
|
|
p += cnt;
|
|
buff_size -= cnt;
|
|
|
|
for (i = 0; i < num_buses; i++) {
|
|
if (mon->bus_enabled & (1 << i)) {
|
|
#define DIVIDER (1024)
|
|
typeof(mon->results[0]) *res = &(mon->results[i]);
|
|
|
|
cnt = snprintf(p, buff_size,
|
|
"%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n",
|
|
idx == SYSTEM_MONITOR ?
|
|
system_bus_string[i] :
|
|
vpu_bus_string[i],
|
|
res->atrans/DIVIDER,
|
|
res->atwait/DIVIDER,
|
|
res->amax/DIVIDER,
|
|
res->wtrans/DIVIDER,
|
|
res->wtwait/DIVIDER,
|
|
res->wmax/DIVIDER,
|
|
res->rtrans/DIVIDER,
|
|
res->rtwait/DIVIDER,
|
|
res->rmax/DIVIDER
|
|
);
|
|
if (cnt >= buff_size)
|
|
goto done;
|
|
|
|
p += cnt;
|
|
buff_size -= cnt;
|
|
}
|
|
}
|
|
|
|
mutex_unlock(&state->lock);
|
|
|
|
done:
|
|
|
|
/* did the last string entry exceeed our buffer size? ie out of string
|
|
* buffer space. Null terminate, use what we have.
|
|
*/
|
|
if (cnt >= buff_size) {
|
|
buff_size = 0;
|
|
string_buffer[INIT_BUFF_SIZE] = 0;
|
|
}
|
|
|
|
cnt = simple_read_from_buffer(user_buffer, count, position,
|
|
string_buffer,
|
|
INIT_BUFF_SIZE - buff_size);
|
|
|
|
kfree(string_buffer);
|
|
|
|
return cnt;
|
|
}
|
|
|
|
static ssize_t mywriter(struct file *fp, const char __user *user_buffer,
|
|
size_t count, loff_t *position)
|
|
{
|
|
int idx = (int)(uintptr_t)(fp->private_data);
|
|
|
|
if (idx < 0 || idx > NUM_MONITORS)
|
|
idx = 0;
|
|
|
|
/* At the moment, this does nothing, but in the future it could be
|
|
* used to reset counters etc
|
|
*/
|
|
return count;
|
|
}
|
|
|
|
static const struct file_operations fops_debug = {
|
|
.read = myreader,
|
|
.write = mywriter,
|
|
.open = simple_open
|
|
};
|
|
|
|
static int rpi_axiperf_probe(struct platform_device *pdev)
|
|
{
|
|
int ret = 0, i;
|
|
struct device *dev = &pdev->dev;
|
|
struct device_node *np = dev->of_node;
|
|
struct device_node *fw_node;
|
|
|
|
state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL);
|
|
if (!state)
|
|
return -ENOMEM;
|
|
|
|
/* Get the firmware handle for future rpi-firmware-xxx calls */
|
|
fw_node = of_parse_phandle(np, "firmware", 0);
|
|
if (!fw_node) {
|
|
dev_err(dev, "Missing firmware node\n");
|
|
return -ENOENT;
|
|
}
|
|
|
|
state->firmware = rpi_firmware_get(fw_node);
|
|
if (!state->firmware)
|
|
return -EPROBE_DEFER;
|
|
|
|
/* Special case for the VPU monitor, we must use the mailbox interface
|
|
* as it is not accessible from the ARM address space.
|
|
*/
|
|
state->monitor[VPU_MONITOR].use_mailbox_interface = 1;
|
|
state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0;
|
|
|
|
for (i = 0; i < NUM_MONITORS; i++) {
|
|
if (state->monitor[i].use_mailbox_interface) {
|
|
of_property_read_u32_index(np, "reg", i*2,
|
|
(u32 *)(&state->monitor[i].base_address));
|
|
} else {
|
|
struct resource *resource =
|
|
platform_get_resource(pdev, IORESOURCE_MEM, i);
|
|
|
|
state->monitor[i].base_address =
|
|
devm_ioremap_resource(&pdev->dev, resource);
|
|
}
|
|
|
|
if (IS_ERR(state->monitor[i].base_address))
|
|
return PTR_ERR(state->monitor[i].base_address);
|
|
|
|
/* Enable all buses by default */
|
|
state->monitor[i].bus_enabled = 0xffff;
|
|
}
|
|
|
|
state->dev = pdev;
|
|
platform_set_drvdata(pdev, state);
|
|
|
|
state->sample_time = DEFAULT_SAMPLE_TIME;
|
|
|
|
/* Set up all the debugfs stuff */
|
|
state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL);
|
|
|
|
for (i = 0; i < NUM_MONITORS; i++) {
|
|
state->monitor[i].debugfs_entry =
|
|
debugfs_create_dir(monitor_name[i], state->root_folder);
|
|
if (IS_ERR(state->monitor[i].debugfs_entry))
|
|
state->monitor[i].debugfs_entry = NULL;
|
|
|
|
debugfs_create_file("data", 0444,
|
|
state->monitor[i].debugfs_entry,
|
|
(void *)(uintptr_t)i, &fops_debug);
|
|
debugfs_create_u32("enable", 0644,
|
|
state->monitor[i].debugfs_entry,
|
|
&state->monitor[i].bus_enabled);
|
|
debugfs_create_u32("filter", 0644,
|
|
state->monitor[i].debugfs_entry,
|
|
&state->monitor[i].bus_filter);
|
|
debugfs_create_u32("sample_time", 0644,
|
|
state->monitor[i].debugfs_entry,
|
|
&state->sample_time);
|
|
}
|
|
|
|
mutex_init(&state->lock);
|
|
|
|
state->monitor_thread = kthread_run(monitor_thread, state,
|
|
"rpi-axiperfmon");
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
static int rpi_axiperf_remove(struct platform_device *dev)
|
|
{
|
|
int ret = 0;
|
|
|
|
kthread_stop(state->monitor_thread);
|
|
|
|
debugfs_remove_recursive(state->root_folder);
|
|
state->root_folder = NULL;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static const struct of_device_id rpi_axiperf_match[] = {
|
|
{
|
|
.compatible = "brcm,bcm2835-axiperf",
|
|
},
|
|
{},
|
|
};
|
|
MODULE_DEVICE_TABLE(of, rpi_axiperf_match);
|
|
|
|
static struct platform_driver rpi_axiperf_driver = {
|
|
.probe = rpi_axiperf_probe,
|
|
.remove = rpi_axiperf_remove,
|
|
.driver = {
|
|
.name = "rpi-bcm2835-axiperf",
|
|
.of_match_table = of_match_ptr(rpi_axiperf_match),
|
|
},
|
|
};
|
|
|
|
module_platform_driver(rpi_axiperf_driver);
|
|
|
|
/* Module information */
|
|
MODULE_AUTHOR("James Hughes <james.hughes@raspberrypi.org>");
|
|
MODULE_DESCRIPTION("RPI AXI Performance monitor driver");
|
|
MODULE_LICENSE("GPL");
|
|
|