Re: [PATCH] ath10k: Don't always treat modem stop events as crashes

From: Steev Klimaszewski
Date: Sun Sep 05 2021 - 20:43:53 EST



On 9/5/21 4:04 PM, Stephen Boyd wrote:
> When rebooting on sc7180 Trogdor devices I see the following crash from
> the wifi driver.
>
> ath10k_snoc 18800000.wifi: firmware crashed! (guid 83493570-29a2-4e98-a83e-70048c47669c)
>
> This is because a modem stop event looks just like a firmware crash to
> the driver, the qmi connection is closed in both cases. Use the qcom ssr
> notifier block to stop treating the qmi connection close event as a
> firmware crash signal when the modem hasn't actually crashed. See
> ath10k_qmi_event_server_exit() for more details.
>
> This silences the crash message seen during every reboot.
>
> Fixes: 3f14b73c3843 ("ath10k: Enable MSA region dump support for WCN3990")
> Cc: Govind Singh <govinds@xxxxxxxxxxxxxx>
> Cc: Youghandhar Chintala <youghand@xxxxxxxxxxxxxx>
> Cc: Abhishek Kumar <kuabhs@xxxxxxxxxxxx>
> Signed-off-by: Stephen Boyd <swboyd@xxxxxxxxxxxx>
> ---
> drivers/net/wireless/ath/ath10k/snoc.c | 75 ++++++++++++++++++++++++++
> drivers/net/wireless/ath/ath10k/snoc.h | 4 ++
> 2 files changed, 79 insertions(+)
>
> diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
> index ea00fbb15601..fc4970e063f8 100644
> --- a/drivers/net/wireless/ath/ath10k/snoc.c
> +++ b/drivers/net/wireless/ath/ath10k/snoc.c
> @@ -12,6 +12,7 @@
> #include <linux/platform_device.h>
> #include <linux/property.h>
> #include <linux/regulator/consumer.h>
> +#include <linux/remoteproc/qcom_rproc.h>
> #include <linux/of_address.h>
> #include <linux/iommu.h>
>
> @@ -1477,6 +1478,70 @@ void ath10k_snoc_fw_crashed_dump(struct ath10k *ar)
> mutex_unlock(&ar->dump_mutex);
> }
>
> +static int ath10k_snoc_modem_notify(struct notifier_block *nb, unsigned long action,
> + void *data)
> +{
> + struct ath10k_snoc *ar_snoc = container_of(nb, struct ath10k_snoc, nb);
> + struct ath10k *ar = ar_snoc->ar;
> + struct qcom_ssr_notify_data *notify_data = data;
> +
> + switch (action) {
> + case QCOM_SSR_BEFORE_POWERUP:
> + ath10k_dbg(ar, ATH10K_DBG_SNOC, "received modem starting event\n");
> + clear_bit(ATH10K_SNOC_FLAG_UNREGISTERING, &ar_snoc->flags);
> + break;
> +
> + case QCOM_SSR_AFTER_POWERUP:
> + ath10k_dbg(ar, ATH10K_DBG_SNOC, "received modem running event\n");
> + break;
> +
> + case QCOM_SSR_BEFORE_SHUTDOWN:
> + ath10k_dbg(ar, ATH10K_DBG_SNOC, "received modem %s event\n",
> + notify_data->crashed ? "crashed" : "stopping");
> + if (!notify_data->crashed)
> + set_bit(ATH10K_SNOC_FLAG_UNREGISTERING, &ar_snoc->flags);
> + else
> + clear_bit(ATH10K_SNOC_FLAG_UNREGISTERING, &ar_snoc->flags);
> + break;
> +
> + case QCOM_SSR_AFTER_SHUTDOWN:
> + ath10k_dbg(ar, ATH10K_DBG_SNOC, "received modem offline event\n");
> + break;
> +
> + default:
> + ath10k_err(ar, "received unrecognized event %lu\n", action);
> + break;
> + }
> +
> + return NOTIFY_OK;
> +}
> +
> +static int ath10k_modem_init(struct ath10k *ar)
> +{
> + struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
> + void *notifier;
> +
> + ar_snoc->nb.notifier_call = ath10k_snoc_modem_notify;
> +
> + notifier = qcom_register_ssr_notifier("mpss", &ar_snoc->nb);
> + if (IS_ERR(notifier))
> + return PTR_ERR(notifier);
> +
> + ar_snoc->notifier = notifier;
> +
> + return 0;
> +}
> +
> +static void ath10k_modem_deinit(struct ath10k *ar)
> +{
> + int ret;
> + struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
> +
> + ret = qcom_unregister_ssr_notifier(ar_snoc->notifier, &ar_snoc->nb);
> + if (ret)
> + ath10k_err(ar, "error %d unregistering notifier\n", ret);
> +}
> +
> static int ath10k_setup_msa_resources(struct ath10k *ar, u32 msa_size)
> {
> struct device *dev = ar->dev;
> @@ -1740,10 +1805,19 @@ static int ath10k_snoc_probe(struct platform_device *pdev)
> goto err_fw_deinit;
> }
>
> + ret = ath10k_modem_init(ar);
> + if (ret) {
> + ath10k_err(ar, "failed to initialize modem notifier: %d\n", ret);
> + goto err_qmi_deinit;
> + }
> +
> ath10k_dbg(ar, ATH10K_DBG_SNOC, "snoc probe\n");
>
> return 0;
>
> +err_qmi_deinit:
> + ath10k_qmi_deinit(ar);
> +
> err_fw_deinit:
> ath10k_fw_deinit(ar);
>
> @@ -1771,6 +1845,7 @@ static int ath10k_snoc_free_resources(struct ath10k *ar)
> ath10k_fw_deinit(ar);
> ath10k_snoc_free_irq(ar);
> ath10k_snoc_release_resource(ar);
> + ath10k_modem_deinit(ar);
> ath10k_qmi_deinit(ar);
> ath10k_core_destroy(ar);
>
> diff --git a/drivers/net/wireless/ath/ath10k/snoc.h b/drivers/net/wireless/ath/ath10k/snoc.h
> index 5095d1893681..d986edc772f8 100644
> --- a/drivers/net/wireless/ath/ath10k/snoc.h
> +++ b/drivers/net/wireless/ath/ath10k/snoc.h
> @@ -6,6 +6,8 @@
> #ifndef _SNOC_H_
> #define _SNOC_H_
>
> +#include <linux/notifier.h>
> +
> #include "hw.h"
> #include "ce.h"
> #include "qmi.h"
> @@ -75,6 +77,8 @@ struct ath10k_snoc {
> struct clk_bulk_data *clks;
> size_t num_clks;
> struct ath10k_qmi *qmi;
> + struct notifier_block nb;
> + void *notifier;
> unsigned long flags;
> bool xo_cal_supported;
> u32 xo_cal_data;
>
> base-commit: 7d2a07b769330c34b4deabeed939325c77a7ec2f


I was also seeing this on the Lenovo Yoga C630, so I pulled the patch in
to test here and after testing 20 reboots, I have not seen the message once.

Tested-By: Steev Klimaszewski <steev@xxxxxxxx>