Re: [PATCH 12/12] habanalabs: enable gaudi2 code in driver

From: Greg KH
Date: Tue Jun 28 2022 - 02:29:20 EST


On Mon, Jun 27, 2022 at 11:26:20PM +0300, Oded Gabbay wrote:
> Enable the Gaudi2 ASIC code in the pci probe callback of the driver so
> the driver will handle Gaudi2 ASICs.
>
> Add the PCI ID to the PCI table and add the ASIC enum value to all
> relevant places.
>
> Fixup the device parameters initialization for Gaudi2.
>
> Signed-off-by: Oded Gabbay <ogabbay@xxxxxxxxxx>
> ---
> drivers/misc/habanalabs/common/device.c | 8 +++
> drivers/misc/habanalabs/common/habanalabs.h | 13 ++--
> .../misc/habanalabs/common/habanalabs_drv.c | 67 +++++++++++++++----
> drivers/misc/habanalabs/common/sysfs.c | 6 ++
> 4 files changed, 77 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
> index 38cf2f1659ee..a4656eac495e 100644
> --- a/drivers/misc/habanalabs/common/device.c
> +++ b/drivers/misc/habanalabs/common/device.c
> @@ -645,6 +645,14 @@ static int device_early_init(struct hl_device *hdev)
> gaudi_set_asic_funcs(hdev);
> strscpy(hdev->asic_name, "GAUDI SEC", sizeof(hdev->asic_name));
> break;
> + case ASIC_GAUDI2:
> + gaudi2_set_asic_funcs(hdev);
> + strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name));
> + break;
> + case ASIC_GAUDI2_SEC:
> + gaudi2_set_asic_funcs(hdev);
> + strscpy(hdev->asic_name, "GAUDI2 SEC", sizeof(hdev->asic_name));
> + break;
> default:
> dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
> hdev->asic_type);
> diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
> index 4537845658f8..587b1c537115 100644
> --- a/drivers/misc/habanalabs/common/habanalabs.h
> +++ b/drivers/misc/habanalabs/common/habanalabs.h
> @@ -3033,7 +3033,6 @@ struct hl_reset_info {
> * @disabled: is device disabled.
> * @late_init_done: is late init stage was done during initialization.
> * @hwmon_initialized: is H/W monitor sensors was initialized.
> - * @heartbeat: is heartbeat sanity check towards CPU-CP enabled.

You remove this field, but you still keep it in the structure, you just
move it later down to the bottom for some reason:

> * @reset_on_lockup: true if a reset should be done in case of stuck CS, false
> * otherwise.
> * @dram_default_page_mapping: is DRAM default page mapping enabled.
> @@ -3066,6 +3065,10 @@ struct hl_reset_info {
> * @is_compute_ctx_active: Whether there is an active compute context executing.
> * @compute_ctx_in_release: true if the current compute context is being released.
> * @supports_mmu_prefetch: true if prefetch is supported, otherwise false.
> + * @reset_upon_device_release: reset the device when the user closes the file descriptor of the
> + * device.
> + * @skip_iatu_for_unsecured_device: skip the device PCI controller initialization when working
> + * with device that runs f/w that is not secured.
> */
> struct hl_device {
> struct pci_dev *pdev;
> @@ -3175,7 +3178,6 @@ struct hl_device {
> u8 disabled;
> u8 late_init_done;
> u8 hwmon_initialized;
> - u8 heartbeat;
> u8 reset_on_lockup;
> u8 dram_default_page_mapping;
> u8 memory_scrub;
> @@ -3199,6 +3201,8 @@ struct hl_device {
> u8 is_compute_ctx_active;
> u8 compute_ctx_in_release;
> u8 supports_mmu_prefetch;
> + u8 reset_upon_device_release;
> + u8 skip_iatu_for_unsecured_device;
>
> /* Parameters for bring-up */
> u64 nic_ports_mask;
> @@ -3212,11 +3216,9 @@ struct hl_device {
> u8 dram_scrambler_enable;
> u8 hard_reset_on_fw_events;
> u8 bmc_enable;
> - u8 rl_enable;
> u8 reset_on_preboot_fail;
> - u8 reset_upon_device_release;
> - u8 skip_iatu_for_unsecured_device;
> u8 reset_if_device_not_idle;
> + u8 heartbeat;
> };

Did you mean to do that?

thanks,

greg k-h