Re: [PATCH v9 3/4] drivers/perf: add DesignWare PCIe PMU driver

From: Baolin Wang
Date: Sun Oct 22 2023 - 22:05:47 EST




On 10/22/2023 3:47 PM, Shuai Xue wrote:
Hi, Baolin,

I droped your Revivewed-by tag due to that I made significant changes to this
patch previously, please explicty give me Revivewed-by tag again if you are
happy with the changes.

Yes, I am happy with this version (just some nits as below), and thanks for the review from other guys. Please feel free to add:

Reviewed-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>

On 2023/10/20 21:42, Shuai Xue wrote:
This commit adds the PCIe Performance Monitoring Unit (PMU) driver support
for T-Head Yitian SoC chip. Yitian is based on the Synopsys PCI Express
Core controller IP which provides statistics feature. The PMU is a PCIe
configuration space register block provided by each PCIe Root Port in a
Vendor-Specific Extended Capability named RAS D.E.S (Debug, Error
injection, and Statistics).

To facilitate collection of statistics the controller provides the
following two features for each Root Port:

- one 64-bit counter for Time Based Analysis (RX/TX data throughput and
time spent in each low-power LTSSM state) and
- one 32-bit counter for Event Counting (error and non-error events for
a specified lane)

Note: There is no interrupt for counter overflow.

This driver adds PMU devices for each PCIe Root Port. And the PMU device is
named based the BDF of Root Port. For example,

30:03.0 PCI bridge: Device 1ded:8000 (rev 01)

the PMU device name for this Root Port is dwc_rootport_3018.

Example usage of counting PCIe RX TLP data payload (Units of bytes)::

$# perf stat -a -e dwc_rootport_3018/Rx_PCIe_TLP_Data_Payload/

average RX bandwidth can be calculated like this:

PCIe TX Bandwidth = Rx_PCIe_TLP_Data_Payload / Measure_Time_Window

Signed-off-by: Shuai Xue <xueshuai@xxxxxxxxxxxxxxxxx>
---

[snip]

+static u64 dwc_pcie_pmu_read_time_based_counter(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ struct pci_dev *pdev = pcie_pmu->pdev;
+ int event_id = DWC_PCIE_EVENT_ID(event);
+ u16 ras_des_offset = pcie_pmu->ras_des_offset;
+ u32 lo, hi, ss;
+
+ /*
+ * The 64-bit value of the data counter is spread across two
+ * registers that are not synchronized. In order to read them
+ * atomically, ensure that the high 32 bits match before and after
+ * reading the low 32 bits.
+ */
+ pci_read_config_dword(pdev, ras_des_offset +
+ DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH, &hi);
+ do {
+ /* snapshot the high 32 bits */
+ ss = hi;
+
+ pci_read_config_dword(
+ pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_LOW,
+ &lo);
+ pci_read_config_dword(
+ pdev, ras_des_offset + DWC_PCIE_TIME_BASED_ANAL_DATA_REG_HIGH,
+ &hi);
+ } while (hi != ss);
+
+ /*
+ * The Group#1 event measures the amount of data processed in 16-byte
+ * units. Simplify the end-user interface by multiplying the counter
+ * at the point of read.
+ */
+ if (event_id >= 0x20 && event_id <= 0x23)
+ return (((u64)hi << 32) | lo) << 4;
+ else

You can drop the 'else'.

+ return (((u64)hi << 32) | lo);
+}
+
+static void dwc_pcie_pmu_event_update(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ u64 delta, prev, now;
+
+ do {
+ prev = local64_read(&hwc->prev_count);
+
+ if (type == DWC_PCIE_LANE_EVENT)
+ now = dwc_pcie_pmu_read_lane_event_counter(event);
+ else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ now = dwc_pcie_pmu_read_time_based_counter(event);
+
+ } while (local64_cmpxchg(&hwc->prev_count, prev, now) != prev);
+
+ if (type == DWC_PCIE_LANE_EVENT)
+ delta = (now - prev) & DWC_PCIE_LANE_EVENT_MAX_PERIOD;
+ else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ delta = (now - prev) & DWC_PCIE_TIME_BASED_EVENT_MAX_PERIOD;
+
+ local64_add(delta, &event->count);
+}
+
+static int dwc_pcie_pmu_event_init(struct perf_event *event)
+{
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+ struct perf_event *sibling;
+ u32 lane;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* We don't support sampling */
+ if (is_sampling_event(event))
+ return -EINVAL;
+
+ /* We cannot support task bound events */
+ if (event->cpu < 0 || event->attach_state & PERF_ATTACH_TASK)
+ return -EINVAL;
+
+ if (event->group_leader != event &&
+ !is_software_event(event->group_leader))
+ return -EINVAL;
+
+ for_each_sibling_event(sibling, event->group_leader) {
+ if (sibling->pmu != event->pmu && !is_software_event(sibling))
+ return -EINVAL;
+ }
+
+ if (type == DWC_PCIE_LANE_EVENT) {
+ lane = DWC_PCIE_EVENT_LANE(event);
+ if (lane < 0 || lane >= pcie_pmu->nr_lanes)
+ return -EINVAL;
+ }
+
+ event->cpu = pcie_pmu->on_cpu;
+
+ return 0;
+}
+
+static void dwc_pcie_pmu_set_period(struct hw_perf_event *hwc)
+{
+ local64_set(&hwc->prev_count, 0);
+}

Only dwc_pcie_pmu_event_start() will call this small function, why just remove this function and move local64_set() into dwc_pcie_pmu_event_start()?

+
+static void dwc_pcie_pmu_event_start(struct perf_event *event, int flags)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct dwc_pcie_pmu *pcie_pmu = to_dwc_pcie_pmu(event->pmu);
+ enum dwc_pcie_event_type type = DWC_PCIE_EVENT_TYPE(event);
+
+ hwc->state = 0;
+ dwc_pcie_pmu_set_period(hwc);
+
+ if (type == DWC_PCIE_LANE_EVENT)
+ dwc_pcie_pmu_lane_event_enable(pcie_pmu, true);
+ else if (type == DWC_PCIE_TIME_BASE_EVENT)
+ dwc_pcie_pmu_time_based_event_enable(pcie_pmu, true);
+}
+