Re: [PATCH] wifi: iwlwifi: mvm: enable thermal zone only when firmware is loaded

From: Zhang, Rui
Date: Sun Jul 16 2023 - 09:45:21 EST


On Thu, 2023-07-13 at 11:41 +0200, Daniel Lezcano wrote:
> On 10/07/2023 12:47, Harry Austen wrote:
> > In iwl_mvm_thermal_zone_register(), when registering a thermal
> > zone, the
> > thermal subsystem will evaluate its temperature.
> > But iwl_mvm_tzone_get_temp() fails at this time because
> > iwl_mvm_firmware_running() returns false.
> > And that's why many users report that they see
> > "thermal thermal_zoneX: failed to read out thermal zone (-61)"
> > message during wifi driver probing.
> >
> > This patch attempts to fix this by delaying enabling of the thermal
> > zone
> > until after the firmware has been loaded/initialized. It also gets
> > disabled when going into suspend.
>
> Thanks for taking care of this bug.
>
> The thermal zone will be accessible from userspace

Currently, I see that the mode is checked only in
__thermal_zone_device_update().

should we limit the userspace access for disabled thermal zone? For
example, return failure when reading 'temp' sysfs attribute.

> and can be enabled
> manually.

maybe we should have a .change_mode() callback and return failure when
enabling the thermal zone with wifi firmware unloaded.

> The resulting temperature read error will be the same in this
> case.
>
> IMO, it is cleaner to actually [un]register the thermal zone when the
> firmware is [un]loaded

Austen,
may I know how often is this wifi firmware loaded/unloaded?

thanks,
rui
>
> > Signed-off-by: Harry Austen <hpausten@xxxxxxxxxxxxxx>
> > ---
> >   .../net/wireless/intel/iwlwifi/mvm/mac80211.c  | 18
> > ++++++++++++++++++
> >   drivers/net/wireless/intel/iwlwifi/mvm/tt.c    |  9 +--------
> >   2 files changed, 19 insertions(+), 8 deletions(-)
> >
> > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > index ce7905faa08f..a47d29a64dd4 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
> > @@ -1187,6 +1187,17 @@ int iwl_mvm_mac_start(struct ieee80211_hw
> > *hw)
> >
> >         mutex_unlock(&mvm->mutex);
> >
> > +#ifdef CONFIG_THERMAL
> > +       /* Needs to be done outside of mutex guarded section to
> > prevent deadlock, since enabling
> > +        * the thermal zone calls the .get_temp() callback, which
> > attempts to acquire the mutex.
> > +        */
> > +       if (!ret) {
> > +               ret = thermal_zone_device_enable(mvm-
> > >tz_device.tzone);
> > +               if (ret)
> > +                       IWL_DEBUG_TEMP(mvm, "Failed to enable
> > thermal zone (err = %d)\n", ret);
> > +       }
> > +#endif
> > +
> >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> >
> >         return ret;
> > @@ -1282,6 +1293,7 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
> >   void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
> >   {
> >         struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
> > +       int ret;
> >
> >         flush_work(&mvm->async_handlers_wk);
> >         flush_work(&mvm->add_stream_wk);
> > @@ -1307,6 +1319,12 @@ void iwl_mvm_mac_stop(struct ieee80211_hw
> > *hw)
> >
> >         iwl_mvm_mei_set_sw_rfkill_state(mvm);
> >
> > +#ifdef CONFIG_THERMAL
> > +       ret = thermal_zone_device_disable(mvm->tz_device.tzone);
> > +       if (ret)
> > +               IWL_DEBUG_TEMP(mvm, "Failed to disable thermal zone
> > (err = %d)\n", ret);
> > +#endif
> > +
> >         mutex_lock(&mvm->mutex);
> >         __iwl_mvm_mac_stop(mvm);
> >         mutex_unlock(&mvm->mutex);
> > diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > index 157e96fa23c1..964d2d011c6b 100644
> > --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c
> > @@ -680,7 +680,7 @@ static  struct thermal_zone_device_ops
> > tzone_ops = {
> >
> >   static void iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> >   {
> > -       int i, ret;
> > +       int i;
> >         char name[16];
> >         static atomic_t counter = ATOMIC_INIT(0);
> >
> > @@ -707,13 +707,6 @@ static void
> > iwl_mvm_thermal_zone_register(struct iwl_mvm *mvm)
> >                 return;
> >         }
> >
> > -       ret = thermal_zone_device_enable(mvm->tz_device.tzone);
> > -       if (ret) {
> > -               IWL_DEBUG_TEMP(mvm, "Failed to enable thermal
> > zone\n");
> > -               thermal_zone_device_unregister(mvm-
> > >tz_device.tzone);
> > -               return;
> > -       }
> > -
> >         /* 0 is a valid temperature,
> >          * so initialize the array with S16_MIN which invalid
> > temperature
> >          */
> > --
> > 2.41.0
> >
> >
>