Re: [RFC PATCH v7 12/12] memory: RAS2: Add memory RAS2 driver

From: Daniel Ferguson
Date: Thu Mar 28 2024 - 19:41:59 EST


+/*
+ * The below functions are exposed to OSPM, to query, configure and
+ * initiate memory patrol scrub.
+ */
+static int ras2_is_patrol_scrub_support(struct ras2_context *ras2_ctx)
+{
+ int ret;
+ struct acpi_ras2_shared_memory __iomem *generic_comm_base;
+
+ if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
+ return -EFAULT;
+
+ generic_comm_base = ras2_ctx->pcc_comm_addr;
+ guard(spinlock_irqsave)(&ras2_ctx->spinlock);
+ generic_comm_base->set_capabilities[0] = 0;
+
+ /* send command for reading RAS2 capabilities */
+ ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
+ if (ret) {
+ dev_err(ras2_ctx->dev,
+ "%s: ras2_send_pcc_cmd failed\n", __func__);
+ return ret;
+ }
+
+ return generic_comm_base->features[0] & RAS2_SUPPORT_HW_PARTOL_SCRUB;

Since firmware populates the feature bitmask on initialization, it would seem
that we do not need to send a PCC CMD EXEC to read RAS2 capabilities.
+}
+
+static int ras2_get_patrol_scrub_params(struct ras2_context *ras2_ctx,
+ struct ras2_scrub_params *params)
+{
+ int ret = 0;
+ u8 min_supp_scrub_rate, max_supp_scrub_rate;
+ struct acpi_ras2_shared_memory __iomem *generic_comm_base;
+ struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
+
+ if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
+ return -EFAULT;
+
+ generic_comm_base = ras2_ctx->pcc_comm_addr;
+ patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
+
+ guard(spinlock_irqsave)(&ras2_ctx->spinlock);
+ generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
+ /* send command for reading RAS2 capabilities */
+ ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
+ if (ret) {
+ dev_err(ras2_ctx->dev,
+ "%s: ras2_send_pcc_cmd failed\n", __func__);
+ return ret;
+ }

Similarly, since firmware populates the feature bitmask on
initialization, it would seem that we do not need to send
a PCC CMD EXEC to read RAS2 capabilities.
+
+ if (!(generic_comm_base->features[0] & RAS2_SUPPORT_HW_PARTOL_SCRUB) ||
+ !(generic_comm_base->num_parameter_blocks)) {
+ dev_err(ras2_ctx->dev,
+ "%s: Platform does not support HW Patrol Scrubber\n", __func__);
+ return -EOPNOTSUPP;
+ }
+
+ if (!patrol_scrub_params->requested_address_range[1]) {
+ dev_err(ras2_ctx->dev,
+ "%s: Invalid requested address range, \
+ requested_address_range[0]=0x%llx \
+ requested_address_range[1]=0x%llx\n",
+ __func__,
+ patrol_scrub_params->requested_address_range[0],
+ patrol_scrub_params->requested_address_range[1]);
+ return -EOPNOTSUPP;
+ }
+
+ generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
+ patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
+ patrol_scrub_params->patrol_scrub_command = RAS2_GET_PATROL_PARAMETERS;
+
+ /* send command for reading the HW patrol scrub parameters */
+ ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
+ if (ret) {
+ dev_err(ras2_ctx->dev,
+ "%s: failed to read HW patrol scrub parameters\n",
+ __func__);
+ return ret;
+ }
+
+ /* copy output scrub parameters */
+ params->addr_base = patrol_scrub_params->actual_address_range[0];
+ params->addr_size = patrol_scrub_params->actual_address_range[1];
+ params->flags = patrol_scrub_params->flags;
+ params->rate = FIELD_GET(RAS2_PATROL_SCRUB_RATE_OUT_MASK,
+ patrol_scrub_params->scrub_params_out);
+ min_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MIN_RATE_OUT_MASK,
+ patrol_scrub_params->scrub_params_out);
+ max_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MAX_RATE_OUT_MASK,
+ patrol_scrub_params->scrub_params_out);
+ snprintf(params->rate_avail, RAS2_MAX_RATE_RANGE_LENGTH,
+ "%d-%d", min_supp_scrub_rate, max_supp_scrub_rate);
+
+ return 0;
+}
+
+static int ras2_enable_patrol_scrub(struct ras2_context *ras2_ctx, bool enable)
+{
+ int ret = 0;
+ struct ras2_scrub_params params;
+ struct acpi_ras2_shared_memory __iomem *generic_comm_base;
+ u8 scrub_rate_to_set, min_supp_scrub_rate, max_supp_scrub_rate;
+ struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
+
+ if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
+ return -EFAULT;
+
+ generic_comm_base = ras2_ctx->pcc_comm_addr;
+ patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
+
+ if (enable) {
+ ret = ras2_get_patrol_scrub_params(ras2_ctx, &params);
+ if (ret)
+ return ret;
+ }
+
+ guard(spinlock_irqsave)(&ras2_ctx->spinlock);
+ generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
+ patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
+
+ if (enable) {
+ patrol_scrub_params->patrol_scrub_command = RAS2_START_PATROL_SCRUBBER;
+ patrol_scrub_params->requested_address_range[0] = params.addr_base;
+ patrol_scrub_params->requested_address_range[1] = params.addr_size;
+
+ scrub_rate_to_set = FIELD_GET(RAS2_PATROL_SCRUB_RATE_IN_MASK,
+ patrol_scrub_params->scrub_params_in);
+ min_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MIN_RATE_OUT_MASK,
+ patrol_scrub_params->scrub_params_out);
+ max_supp_scrub_rate = FIELD_GET(RAS2_PATROL_SCRUB_MAX_RATE_OUT_MASK,
+ patrol_scrub_params->scrub_params_out);
+ if (scrub_rate_to_set < min_supp_scrub_rate ||
+ scrub_rate_to_set > max_supp_scrub_rate) {
+ dev_warn(ras2_ctx->dev,
+ "patrol scrub rate to set is out of the supported range\n");
+ dev_warn(ras2_ctx->dev,
+ "min_supp_scrub_rate=%d max_supp_scrub_rate=%d\n",
+ min_supp_scrub_rate, max_supp_scrub_rate);
+ return -EINVAL;
+ }
+ } else {
+ patrol_scrub_params->patrol_scrub_command = RAS2_STOP_PATROL_SCRUBBER;
+ }
+
+ /* send command for enable/disable HW patrol scrub */
+ ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
+ if (ret) {
+ pr_err("%s: failed to enable/disable the HW patrol scrub\n", __func__);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int ras2_enable_background_scrub(struct ras2_context *ras2_ctx, bool enable)
+{
+ int ret;
+ struct acpi_ras2_shared_memory __iomem *generic_comm_base;
+ struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
+
+ if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
+ return -EFAULT;
+
+ generic_comm_base = ras2_ctx->pcc_comm_addr;
+ patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
+
+ guard(spinlock_irqsave)(&ras2_ctx->spinlock);
+ generic_comm_base->set_capabilities[0] = RAS2_SUPPORT_HW_PARTOL_SCRUB;
+ patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;

header.type should already be populated by firmware. Is assigning it
here necessary?
+ patrol_scrub_params->patrol_scrub_command = RAS2_START_PATROL_SCRUBBER;
+
+ patrol_scrub_params->scrub_params_in &= ~RAS2_PATROL_SCRUB_EN_BACKGROUND;
+ patrol_scrub_params->scrub_params_in |= FIELD_PREP(RAS2_PATROL_SCRUB_EN_BACKGROUND,
+ enable);
+
+ /* send command for enable/disable HW patrol scrub */
+ ret = ras2_send_pcc_cmd(ras2_ctx, RAS2_PCC_CMD_EXEC);
+ if (ret) {
+ dev_err(ras2_ctx->dev,
+ "%s: failed to enable/disable background patrol scrubbing\n",
+ __func__);
+ return ret;
+ }
+
+ return 0;
+}
+static int ras2_set_patrol_scrub_params(struct ras2_context *ras2_ctx,
+ struct ras2_scrub_params *params, u8 param_type)
+{
+ struct acpi_ras2_shared_memory __iomem *generic_comm_base;
+ struct acpi_ras2_patrol_scrub_parameter __iomem *patrol_scrub_params;
+
+ if (!ras2_ctx || !ras2_ctx->pcc_comm_addr)
+ return -EFAULT;
+
+ generic_comm_base = ras2_ctx->pcc_comm_addr;
+ patrol_scrub_params = ras2_ctx->pcc_comm_addr + sizeof(*generic_comm_base);
+
+ guard(spinlock_irqsave)(&ras2_ctx->spinlock);
+ patrol_scrub_params->header.type = RAS2_TYPE_PATROL_SCRUB;
+ if (param_type == RAS2_MEM_SCRUB_PARAM_ADDR_BASE && params->addr_base) {
+ patrol_scrub_params->requested_address_range[0] = params->addr_base;
+ } else if (param_type == RAS2_MEM_SCRUB_PARAM_ADDR_SIZE && params->addr_size) {
+ patrol_scrub_params->requested_address_range[1] = params->addr_size;
+ } else if (param_type == RAS2_MEM_SCRUB_PARAM_RATE) {
+ patrol_scrub_params->scrub_params_in &= ~RAS2_PATROL_SCRUB_RATE_IN_MASK;
+ patrol_scrub_params->scrub_params_in |= FIELD_PREP(RAS2_PATROL_SCRUB_RATE_IN_MASK,
+ params->rate);
+ } else {
+ dev_err(ras2_ctx->dev, "Invalid patrol scrub parameter to set\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static const struct ras2_hw_scrub_ops ras2_hw_ops = {
+ .enable_scrub = ras2_enable_patrol_scrub,
+ .enable_background_scrub = ras2_enable_background_scrub,
+ .get_scrub_params = ras2_get_patrol_scrub_params,
+ .set_scrub_params = ras2_set_patrol_scrub_params,
+};
+
+static const struct scrub_ops ras2_scrub_ops = {
+ .is_visible = ras2_hw_scrub_is_visible,
+ .read = ras2_hw_scrub_read,
+ .write = ras2_hw_scrub_write,
+ .read_string = ras2_hw_scrub_read_strings,
+};
+
+static DEFINE_IDA(ras2_ida);
+
+static void devm_ras2_release(void *ctx)
+{
+ struct ras2_context *ras2_ctx = ctx;
+
+ ida_free(&ras2_ida, ras2_ctx->id);
+ ras2_unregister_pcc_channel(ras2_ctx);
+}
+
+static int ras2_probe(struct platform_device *pdev)
+{
+ int ret, id;
+ struct mbox_client *cl;
+ struct device *hw_scrub_dev;
+ struct ras2_context *ras2_ctx;
+ char scrub_name[RAS2_MAX_NAME_LENGTH];
+
+ ras2_ctx = devm_kzalloc(&pdev->dev, sizeof(*ras2_ctx), GFP_KERNEL);
+ if (!ras2_ctx)
+ return -ENOMEM;
+
+ ras2_ctx->dev = &pdev->dev;
+ ras2_ctx->ops = &ras2_hw_ops;
+ spin_lock_init(&ras2_ctx->spinlock);
+ platform_set_drvdata(pdev, ras2_ctx);
+
+ cl = &ras2_ctx->mbox_client;
+ /* Request mailbox channel */
+ cl->dev = &pdev->dev;
+ cl->tx_done = ras2_tx_done;
+ cl->knows_txdone = true;
+ ras2_ctx->pcc_subspace_idx = *((int *)pdev->dev.platform_data);
+ dev_dbg(&pdev->dev, "pcc-subspace-id=%d\n", ras2_ctx->pcc_subspace_idx);
+ ret = ras2_register_pcc_channel(ras2_ctx);

In our enabling activities, we have found a challenge here.
Our hardware has a single PCC channel corresponding to a single
platform-wide scrub interface. This driver, following the ACPI spec,
will create a new scrub node for each NUMA node. However, for us,
this means that each scrub device will try to map the same PCC channel,
and this causes an error.
+ if (ret < 0)
+ return ret;
+
+ ret = devm_add_action_or_reset(&pdev->dev, devm_ras2_release, ras2_ctx);
+ if (ret < 0)
+ return ret;
+
+ if (ras2_is_patrol_scrub_support(ras2_ctx)) {
+ id = ida_alloc(&ras2_ida, GFP_KERNEL);
+ if (id < 0)
+ return id;
+ ras2_ctx->id = id;
+ snprintf(scrub_name, sizeof(scrub_name), "%s%d", RAS2_SCRUB, id);
+ dev_set_name(&pdev->dev, RAS2_ID_FORMAT, id);
+ hw_scrub_dev = devm_scrub_device_register(&pdev->dev, scrub_name,
+ ras2_ctx, &ras2_scrub_ops,
+ 0, NULL);
+ if (PTR_ERR_OR_ZERO(hw_scrub_dev))
+ return PTR_ERR_OR_ZERO(hw_scrub_dev);
+ }
+ ras2_ctx->scrub_dev = hw_scrub_dev;
+
+ return 0;
+}