Re: [PATCH 9/9] powerpc/pm: support deep sleep feature on T1040

From: Scott Wood
Date: Tue Mar 11 2014 - 21:11:04 EST


On Fri, 2014-03-07 at 12:58 +0800, Chenhui Zhao wrote:
> From: Zhao Chenhui <chenhui.zhao@xxxxxxxxxxxxx>
>
> T1040 supports deep sleep feature, which can switch off most parts of
> the SoC when it is in deep sleep mode. This way, it becomes more
> energy-efficient.
>
> The DDR controller will also be powered off in deep sleep. Therefore,
> the last stage (the latter part of fsl_dp_enter_low) will run without DDR
> access. This piece of code and related TLBs will be prefetched.
>
> Due to the different initialization code between 32-bit and 64-bit, they
> have seperate resume entry and precedure.
>
> The feature supports 32-bit and 64-bit kernel mode.
>
> Signed-off-by: Zhao Chenhui <chenhui.zhao@xxxxxxxxxxxxx>
> ---
> arch/powerpc/include/asm/booke_save_regs.h | 3 +
> arch/powerpc/kernel/cpu_setup_fsl_booke.S | 17 ++
> arch/powerpc/kernel/head_fsl_booke.S | 30 +++
> arch/powerpc/platforms/85xx/Makefile | 2 +-
> arch/powerpc/platforms/85xx/deepsleep.c | 201 +++++++++++++++++++
> arch/powerpc/platforms/85xx/qoriq_pm.c | 38 ++++
> arch/powerpc/platforms/85xx/sleep.S | 295 ++++++++++++++++++++++++++++
> arch/powerpc/sysdev/fsl_soc.h | 7 +
> 8 files changed, 592 insertions(+), 1 deletions(-)
> create mode 100644 arch/powerpc/platforms/85xx/deepsleep.c
> create mode 100644 arch/powerpc/platforms/85xx/sleep.S
>
> diff --git a/arch/powerpc/include/asm/booke_save_regs.h b/arch/powerpc/include/asm/booke_save_regs.h
> index 87c357a..37c1f6c 100644
> --- a/arch/powerpc/include/asm/booke_save_regs.h
> +++ b/arch/powerpc/include/asm/booke_save_regs.h
> @@ -88,6 +88,9 @@
> #define HIBERNATION_FLAG 1
> #define DEEPSLEEP_FLAG 2
>
> +#define CPLD_FLAG 1
> +#define FPGA_FLAG 2

What is this?

> #ifndef __ASSEMBLY__
> extern void booke_cpu_state_save(void *buf, int type);
> extern void *booke_cpu_state_restore(void *buf, int type);
> diff --git a/arch/powerpc/kernel/cpu_setup_fsl_booke.S b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
> index e59d6de..ea9bc28 100644
> --- a/arch/powerpc/kernel/cpu_setup_fsl_booke.S
> +++ b/arch/powerpc/kernel/cpu_setup_fsl_booke.S
> @@ -318,6 +318,23 @@ flush_backside_L2_cache:
> 2:
> blr
>
> +#define CPC_CPCCSR0 0x0
> +#define CPC_CPCCSR0_CPCFL 0x800

This is supposed to be CPU setup, not platform setup.

> +/* r3 : the base address of CPC */
> +_GLOBAL(fsl_flush_cpc_cache)
> + lwz r6, CPC_CPCCSR0(r3)
> + ori r6, r6, CPC_CPCCSR0_CPCFL
> + stw r6, CPC_CPCCSR0(r3)
> + sync
> +
> + /* Wait until completing the flush */
> +1: lwz r6, CPC_CPCCSR0(r3)
> + andi. r6, r6, CPC_CPCCSR0_CPCFL
> + bne 1b
> +
> + blr
> +
> _GLOBAL(__flush_caches_e500v2)

I'm not sure that this belongs here either.

> mflr r0
> bl flush_dcache_L1
> diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
> index 20204fe..3285752 100644
> --- a/arch/powerpc/kernel/head_fsl_booke.S
> +++ b/arch/powerpc/kernel/head_fsl_booke.S
> @@ -162,6 +162,19 @@ _ENTRY(__early_start)
> #include "fsl_booke_entry_mapping.S"
> #undef ENTRY_MAPPING_BOOT_SETUP
>
> +#if defined(CONFIG_SUSPEND) && defined(CONFIG_FSL_CORENET_RCPM)
> + /* if deep_sleep_flag != 0, jump to the deep sleep resume entry */
> + LOAD_REG_ADDR(r4, deep_sleep_flag)
> + lwz r3, 0(r4)
> + cmpwi r3, 0
> + beq 11f
> + /* clear deep_sleep_flag */
> + li r3, 0
> + stw r3, 0(r4)
> + b fsl_deepsleep_resume
> +11:
> +#endif

Why do you come in via the normal kernel entry, versus specifying a
direct entry point for deep sleep resume? How does U-Boot even know
what the normal entry is when resuming?

Be careful of the "beq set_ivor" in the CONFIG_RELOCATABLE section
above. Also you probably don't want the relocation code to run again
when resuming.

> +#if defined(CONFIG_SUSPEND) && defined(CONFIG_FSL_CORENET_RCPM)
> +_ENTRY(__entry_deep_sleep)
> +/*
> + * Bootloader will jump to here when resuming from deep sleep.
> + * After executing the init code in fsl_booke_entry_mapping.S,
> + * will jump to the real resume entry.
> + */
> + li r8, 1
> + bl 12f
> +12: mflr r9
> + addi r9, r9, (deep_sleep_flag - 12b)
> + stw r8, 0(r9)
> + b __early_start
> +deep_sleep_flag:
> + .long 0
> +#endif

It's a bit ambiguous to say "entry_deep_sleep" when it's resuming rather
than entering...

So you do have a special entry point. Why do you go to __early_start
only to quickly test a flag and branch away?

> diff --git a/arch/powerpc/platforms/85xx/Makefile b/arch/powerpc/platforms/85xx/Makefile
> index 7fae817..9a4ea86 100644
> --- a/arch/powerpc/platforms/85xx/Makefile
> +++ b/arch/powerpc/platforms/85xx/Makefile
> @@ -3,7 +3,7 @@
> #
> obj-$(CONFIG_SMP) += smp.o
> ifeq ($(CONFIG_FSL_CORENET_RCPM), y)
> -obj-$(CONFIG_SUSPEND) += qoriq_pm.o
> +obj-$(CONFIG_SUSPEND) += qoriq_pm.o deepsleep.o sleep.o
> endif
>
> obj-y += common.o
> diff --git a/arch/powerpc/platforms/85xx/deepsleep.c b/arch/powerpc/platforms/85xx/deepsleep.c
> new file mode 100644
> index 0000000..ddd7185
> --- /dev/null
> +++ b/arch/powerpc/platforms/85xx/deepsleep.c
> @@ -0,0 +1,201 @@
> +/*
> + * Support deep sleep feature

AFAICT this supports deep sleep on T1040, not on all 85xx that has it.

> + *
> + * Copyright 2014 Freescale Semiconductor Inc.
> + *
> + * Author: Chenhui Zhao <chenhui.zhao@xxxxxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation; either version 2 of the License, or (at your
> + * option) any later version.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/of_platform.h>
> +#include <linux/of_address.h>
> +#include <asm/machdep.h>
> +#include <sysdev/fsl_soc.h>
> +#include <asm/booke_save_regs.h>
> +
> +#define SIZE_1MB 0x100000
> +#define SIZE_2MB 0x200000
> +
> +#define CCSR_SCFG_DPSLPCR 0xfc000
> +#define CCSR_SCFG_DPSLPCR_WDRR_EN 0x1
> +#define CCSR_SCFG_SPARECR2 0xfc504
> +#define CCSR_SCFG_SPARECR3 0xfc508
> +
> +#define CCSR_GPIO1_GPDIR 0x130000
> +#define CCSR_GPIO1_GPODR 0x130004
> +#define CCSR_GPIO1_GPDAT 0x130008
> +#define CCSR_GPIO1_GPDIR_29 0x4
> +
> +/* 128 bytes buffer for restoring data broke by DDR training initialization */
> +#define DDR_BUF_SIZE 128
> +static u8 ddr_buff[DDR_BUF_SIZE] __aligned(64);
> +
> +static void *dcsr_base, *ccsr_base, *pld_base;
> +static int pld_flag;
> +
> +int fsl_dp_iomap(void)
> +{
> + struct device_node *np;
> + const u32 *prop;
> + int ret = 0;
> + u64 ccsr_phy_addr, dcsr_phy_addr;
> +
> + np = of_find_node_by_type(NULL, "soc");
> + if (!np) {
> + pr_err("%s: Can't find the node of \"soc\"\n", __func__);
> + ret = -EINVAL;
> + goto ccsr_err;
> + }
> + prop = of_get_property(np, "ranges", NULL);
> + if (!prop) {
> + pr_err("%s: Can't find the property of \"ranges\"\n", __func__);
> + of_node_put(np);
> + ret = -EINVAL;
> + goto ccsr_err;
> + }

Use get_immrbase(), or better use specific nodes in the device tree.

> + ccsr_phy_addr = of_translate_address(np, prop + 1);
> + ccsr_base = ioremap((phys_addr_t)ccsr_phy_addr, SIZE_2MB);
> + of_node_put(np);
> + if (!ccsr_base) {
> + ret = -ENOMEM;
> + goto ccsr_err;
> + }

Unnecessary cast.

Why 2 MiB?

> + np = of_find_compatible_node(NULL, NULL, "fsl,dcsr");
> + if (!np) {
> + pr_err("%s: Can't find the node of \"fsl,dcsr\"\n", __func__);
> + ret = -EINVAL;
> + goto dcsr_err;
> + }
> + prop = of_get_property(np, "ranges", NULL);
> + if (!prop) {
> + pr_err("%s: Can't find the property of \"ranges\"\n", __func__);
> + of_node_put(np);
> + ret = -EINVAL;
> + goto dcsr_err;
> + }
> + dcsr_phy_addr = of_translate_address(np, prop + 1);
> + dcsr_base = ioremap((phys_addr_t)dcsr_phy_addr, SIZE_1MB);
> + of_node_put(np);
> + if (!dcsr_base) {
> + ret = -ENOMEM;
> + goto dcsr_err;
> + }

If you must do this, add a helper to get the dcsr base -- but do we not
already have dcsr subnodes for what you are using?

> +
> + np = of_find_compatible_node(NULL, NULL, "fsl,fpga-qixis");
> + if (np) {
> + pld_flag = FPGA_FLAG;
> + } else {
> + np = of_find_compatible_node(NULL, NULL, "fsl,p104xrdb-cpld");
> + if (np) {
> + pld_flag = CPLD_FLAG;
> + } else {
> + pr_err("%s: Can't find the FPGA/CPLD node\n",
> + __func__);
> + ret = -EINVAL;
> + goto pld_err;
> + }
> + }

OK, so this file isn't even specific to T1040 -- it's specific to our
reference boards?

> +static void fsl_dp_ddr_save(void *ccsr_base)
> +{
> + u32 ddr_buff_addr;
> +
> + /*
> + * DDR training initialization will break 128 bytes at the beginning
> + * of DDR, therefore, save them so that the bootloader will restore
> + * them. Assume that DDR is mapped to the address space started with
> + * CONFIG_PAGE_OFFSET.
> + */
> + memcpy(ddr_buff, (void *)CONFIG_PAGE_OFFSET, DDR_BUF_SIZE);
> +
> + /* assume ddr_buff is in the physical address space of 4GB */
> + ddr_buff_addr = (u32)(__pa(ddr_buff) & 0xffffffff);

That assumption may break with a relocatable kernel.

> +
> +}
> +
> +int fsl_enter_epu_deepsleep(void)
> +{
> +
> +

No blank lines at begin/end of function.

> diff --git a/arch/powerpc/platforms/85xx/qoriq_pm.c b/arch/powerpc/platforms/85xx/qoriq_pm.c
> index 915b13b..5f2c016 100644
> --- a/arch/powerpc/platforms/85xx/qoriq_pm.c
> +++ b/arch/powerpc/platforms/85xx/qoriq_pm.c
> @@ -20,6 +20,8 @@
> #define FSL_SLEEP 0x1
> #define FSL_DEEP_SLEEP 0x2
>
> +int (*fsl_enter_deepsleep)(void);
> +
> /* specify the sleep state of the present platform */
> int sleep_pm_state;
> /* supported sleep modes by the present platform */
> @@ -28,6 +30,7 @@ static unsigned int sleep_modes;
> static int qoriq_suspend_enter(suspend_state_t state)
> {
> int ret = 0;
> + int cpu;
>
> switch (state) {
> case PM_SUSPEND_STANDBY:
> @@ -39,6 +42,17 @@ static int qoriq_suspend_enter(suspend_state_t state)
>
> break;
>
> + case PM_SUSPEND_MEM:
> +
> + cpu = smp_processor_id();
> + qoriq_pm_ops->irq_mask(cpu);
> +
> + ret = fsl_enter_deepsleep();
> +
> + qoriq_pm_ops->irq_unmask(cpu);
> +
> + break;
> +
> default:
> ret = -EINVAL;
>
> @@ -52,12 +66,30 @@ static int qoriq_suspend_valid(suspend_state_t state)
> if (state == PM_SUSPEND_STANDBY && (sleep_modes & FSL_SLEEP))
> return 1;
>
> + if (state == PM_SUSPEND_MEM && (sleep_modes & FSL_DEEP_SLEEP))
> + return 1;
> +
> return 0;
> }
>
> +static int qoriq_suspend_begin(suspend_state_t state)
> +{
> + if (state == PM_SUSPEND_MEM)
> + return fsl_dp_iomap();
> +
> + return 0;
> +}
> +
> +static void qoriq_suspend_end(void)
> +{
> + fsl_dp_iounmap();
> +}
> +
> static const struct platform_suspend_ops qoriq_suspend_ops = {
> .valid = qoriq_suspend_valid,
> .enter = qoriq_suspend_enter,
> + .begin = qoriq_suspend_begin,
> + .end = qoriq_suspend_end,
> };
>
> static int __init qoriq_suspend_init(void)
> @@ -71,6 +103,12 @@ static int __init qoriq_suspend_init(void)
> if (np)
> sleep_pm_state = PLAT_PM_LPM20;
>
> + np = of_find_compatible_node(NULL, NULL, "fsl,t1040-rcpm");
> + if (np) {
> + fsl_enter_deepsleep = fsl_enter_epu_deepsleep;
> + sleep_modes |= FSL_DEEP_SLEEP;
> + }
> +
> suspend_set_ops(&qoriq_suspend_ops);
>
> return 0;
> diff --git a/arch/powerpc/platforms/85xx/sleep.S b/arch/powerpc/platforms/85xx/sleep.S
> new file mode 100644
> index 0000000..95a5746
> --- /dev/null
> +++ b/arch/powerpc/platforms/85xx/sleep.S
> @@ -0,0 +1,295 @@
> +/*
> + * Implement the low level part of deep sleep
> + *
> + * Copyright 2014 Freescale Semiconductor Inc.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the
> + * Free Software Foundation; either version 2 of the License, or (at your
> + * option) any later version.
> + */
> +
> +#include <asm/page.h>
> +#include <asm/ppc_asm.h>
> +#include <asm/reg.h>
> +#include <asm/asm-offsets.h>
> +#include <asm/booke_save_regs.h>
> +#include <asm/mmu.h>
> +
> +#define FSLDELAY(count) \
> + li r3, (count)@l; \
> + slwi r3, r3, 10; \
> + mtctr r3; \
> +101: nop; \
> + bdnz 101b;

You don't need a namespace prefix on local macros in a non-header file.

Is the timebase stopped where you're calling this from?

> +#define FSL_DIS_ALL_IRQ \
> + mfmsr r8; \
> + rlwinm r8, r8, 0, ~MSR_CE; \
> + rlwinm r8, r8, 0, ~MSR_ME; \
> + rlwinm r8, r8, 0, ~MSR_EE; \
> + rlwinm r8, r8, 0, ~MSR_DE; \
> + mtmsr r8; \
> + isync
> +
> +
> + .section .data
> + .align 6
> +booke_regs_buffer:
> + .space REGS_BUFFER_SIZE
> +
> + .section .txt
> + .align 6
> +
> +_GLOBAL(fsl_dp_enter_low)
> +deepsleep_start:
> + LOAD_REG_ADDR(r9, buf_tmp)
> + PPC_STL r3, 0(r9)
> + PPC_STL r4, 8(r9)
> + PPC_STL r5, 16(r9)
> + PPC_STL r6, 24(r9)
> +
> + LOAD_REG_ADDR(r3, booke_regs_buffer)
> + /* save the return address */
> + mflr r5
> + PPC_STL r5, SR_LR(r3)
> + mfmsr r5
> + PPC_STL r5, SR_MSR(r3)
> + li r4, DEEPSLEEP_FLAG
> + bl booke_cpu_state_save
> +
> + LOAD_REG_ADDR(r9, buf_tmp)
> + PPC_LL r31, 0(r9)
> + PPC_LL r30, 8(r9)
> + PPC_LL r29, 16(r9)
> + PPC_LL r28, 24(r9)
> +
> + /* flush caches */
> + LOAD_REG_ADDR(r3, cur_cpu_spec)
> + PPC_LL r3, 0(r3)
> + PPC_LL r3, CPU_FLUSH_CACHES(r3)
> + PPC_LCMPI 0, r3, 0
> + beq 6f
> +#ifdef CONFIG_PPC64
> + PPC_LL r3, 0(r3)
> +#endif
> + mtctr r3
> + bctrl
> +6:
> +#define CPC_OFFSET 0x10000
> + mr r3, r31
> + addis r3, r3, CPC_OFFSET@h
> + bl fsl_flush_cpc_cache
> +
> + LOAD_REG_ADDR(r8, deepsleep_start)
> + LOAD_REG_ADDR(r9, deepsleep_end)
> +
> + /* prefecth TLB */
> +#define CCSR_GPIO1_GPDAT 0x130008
> +#define CCSR_GPIO1_GPDAT_29 0x4
> + LOAD_REG_IMMEDIATE(r11, CCSR_GPIO1_GPDAT)
> + add r11, r31, r11
> + lwz r10, 0(r11)
> +
> +#define CCSR_RCPM_PCPH15SETR 0xe20b4
> +#define CCSR_RCPM_PCPH15SETR_CORE0 0x1
> + LOAD_REG_IMMEDIATE(r12, CCSR_RCPM_PCPH15SETR)
> + add r12, r31, r12
> + lwz r10, 0(r12)
> +
> +#define CCSR_DDR_SDRAM_CFG_2 0x8114
> +#define CCSR_DDR_SDRAM_CFG_2_FRC_SR 0x80000000
> + LOAD_REG_IMMEDIATE(r13, CCSR_DDR_SDRAM_CFG_2)
> + add r13, r31, r13
> + lwz r10, 0(r13)
> +
> +#define DCSR_EPU_EPGCR 0x000
> +#define DCSR_EPU_EPGCR_GCE 0x80000000
> + li r14, DCSR_EPU_EPGCR
> + add r14, r30, r14
> + lwz r10, 0(r14)
> +
> +#define DCSR_EPU_EPECR15 0x33C
> +#define DCSR_EPU_EPECR15_IC0 0x80000000
> + li r15, DCSR_EPU_EPECR15
> + add r15, r30, r15
> + lwz r10, 0(r15)
> +
> +#define CCSR_SCFG_QMCRDTRSTCR 0xfc40c
> +#define CCSR_SCFG_QMCRDTRSTCR_CRDTRST 0x80000000
> + LOAD_REG_IMMEDIATE(r16, CCSR_SCFG_QMCRDTRSTCR)
> + add r16, r31, r16
> + lwz r10, 0(r16)
> +
> +/*
> + * There are two kind of register maps, one for CPLD and the other for FPGA
> + */
> +#define CPLD_MISCCSR 0x17
> +#define CPLD_MISCCSR_SLEEPEN 0x40
> +#define QIXIS_PWR_CTL2 0x21
> +#define QIXIS_PWR_CTL2_PCTL 0x2
> + PPC_LCMPI 0, r28, FPGA_FLAG
> + beq 20f
> + addi r29, r29, CPLD_MISCCSR
> +20:
> + addi r29, r29, QIXIS_PWR_CTL2
> + lbz r10, 0(r29)


Again, this is not marked as a board-specific file. How do you expect
customers to adapt this mechanism to their boards?

> +
> + /* prefecth code to cache so that executing code after disable DDR */
> +1: lwz r3, 0(r8)
> + addi r8, r8, 4
> + cmpw r8, r9
> + blt 1b
> + msync

Instructions don't execute from dcache... I guess you're assuming it
will allocate in, and stay in, L2/CPC. It'd be safer to lock those
cache lines in icache, or copy the code to SRAM.

> + FSL_DIS_ALL_IRQ
> +
> + /*
> + * Place DDR controller in self refresh mode.
> + * From here on, DDR can't be access any more.
> + */
> + lwz r10, 0(r13)
> + oris r10, r10, CCSR_DDR_SDRAM_CFG_2_FRC_SR@h
> + stw r10, 0(r13)
> +
> + /* can't call udelay() here, so use a macro to delay */
> + FSLDELAY(50)

A timebase loop doesn't require accessing DDR.

You also probably want to do a "sync, readback, data dependency, isync"
sequence to make sure that the store has hit CCSR before you begin your
delay (or is a delay required at all if you do that?).

> + /*
> + * Enable deep sleep signals by write external CPLD/FPGA register.
> + * The bootloader will disable them when wakeup from deep sleep.
> + */
> + lbz r10, 0(r29)
> + PPC_LCMPI 0, r28, FPGA_FLAG
> + beq 22f
> + ori r10, r10, CPLD_MISCCSR_SLEEPEN
> +22:
> + ori r10, r10, QIXIS_PWR_CTL2_PCTL
> + stb r10, 0(r29)
> +
> + /*
> + * Set GPIO1_29 to lock the signal MCKE down during deep sleep.
> + * The bootloader will clear it when wakeup.
> + */
> + lwz r10, 0(r11)
> + ori r10, r10, CCSR_GPIO1_GPDAT_29
> + stw r10, 0(r11)
> +
> + FSLDELAY(10)
> +
> + /* Clear the QMan CITI Credits */
> + lwz r10, 0(r16)
> + oris r10, r10, CCSR_SCFG_QMCRDTRSTCR_CRDTRST@h
> + stw r10, 0(r16)
> +
> + /* Enable all EPU Counters */
> + li r10, 0
> + oris r10, r10, DCSR_EPU_EPGCR_GCE@h
> + stw r10, 0(r14)
> +
> + /* Enable SCU15 to trigger on RCPM Concentrator 0 */
> + lwz r10, 0(r15)
> + oris r10, r10, DCSR_EPU_EPECR15_IC0@h
> + stw r10, 0(r15)
> +
> + /* put Core0 in PH15 mode, trigger EPU FSM */
> + lwz r10, 0(r12)
> + ori r10, r10, CCSR_RCPM_PCPH15SETR_CORE0
> + stw r10, 0(r12)

Shouldn't there be a sync to ensure that the previous I/O happens before
the final store to enter PH15?

-Scott


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/