Re: [PATCHv3] x86/boot/KASLR: skip the specified crashkernel region

From: Baoquan He
Date: Tue Apr 02 2019 - 02:46:43 EST


On 04/02/19 at 12:10pm, Pingfan Liu wrote:
> crashkernel=x@y or or =range1:size1[,range2:size2,...]@offset option may
> fail to reserve the required memory region if KASLR puts kernel into the
> region. To avoid this uncertainty, asking KASLR to skip the required
> region.
>
> Signed-off-by: Pingfan Liu <kernelfans@xxxxxxxxx>
> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Borislav Petkov <bp@xxxxxxxxx>
> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
> Cc: Baoquan He <bhe@xxxxxxxxxx>
> Cc: Will Deacon <will.deacon@xxxxxxx>
> Cc: Nicolas Pitre <nico@xxxxxxxxxx>
> Cc: Pingfan Liu <kernelfans@xxxxxxxxx>
> Cc: Chao Fan <fanc.fnst@xxxxxxxxxxxxxx>
> Cc: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>
> Cc: Ard Biesheuvel <ard.biesheuvel@xxxxxxxxxx>
> Cc: linux-kernel@xxxxxxxxxxxxxxx
> ---
> v2 -> v3: adding parsing of crashkernel=range1:size1[,range2:size2,...]@offset
>
> arch/x86/boot/compressed/kaslr.c | 116 ++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 114 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
> index 2e53c05..7f698f4 100644
> --- a/arch/x86/boot/compressed/kaslr.c
> +++ b/arch/x86/boot/compressed/kaslr.c
> @@ -107,6 +107,7 @@ enum mem_avoid_index {
> MEM_AVOID_BOOTPARAMS,
> MEM_AVOID_MEMMAP_BEGIN,
> MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1,
> + MEM_AVOID_CRASHKERNEL,
> MEM_AVOID_MAX,
> };
>
> @@ -238,6 +239,115 @@ static void parse_gb_huge_pages(char *param, char *val)
> }
> }
>
> +/* code heavily copied from parse_crashkernel_mem() */
> +static void handle_crashkernel_mem(char *cmdline,
> + unsigned long long system_ram,
> + unsigned long long *crash_size,
> + unsigned long long *crash_base)

This version looks better and the logic is simple. It will be much better
if we can share code with parse_crashkernel_mem() since both of them look
almost the same.

> +{
> + char *tmp, *cur = cmdline;
> +
> + /* for each entry of the comma-separated list */
> + do {
> + unsigned long long start, end = ULLONG_MAX, size;
> +
> + /* get the start of the range */
> + start = memparse(cur, &tmp);
> + /* no value given */
> + if (cur == tmp)
> + return;
> + cur = tmp;
> + if (*cur != '-')
> + return;
> + cur++;
> +
> + /* if no ':' is here, than we read the end */
> + if (*cur != ':') {
> + end = memparse(cur, &tmp);
> + /* no value given */
> + if (cur == tmp)
> + return;
> + cur = tmp;
> + /* invalid if crashkernel end <= start */
> + if (end <= start)
> + return;
> + }
> + /* expect ":" after range */
> + if (*cur != ':')
> + return;
> + cur++;
> +
> + size = memparse(cur, &tmp);
> + /* no size value given */
> + if (cur == tmp)
> + return;
> + cur = tmp;
> + if (size >= system_ram)
> + return;
> +
> + /* match ? */
> + if (system_ram >= start && system_ram < end) {
> + *crash_size = size;
> + break;
> + }
> + } while (*cur++ == ',');
> +
> + if (*crash_size > 0) {
> + while (*cur && *cur != ' ' && *cur != '@')
> + cur++;
> + if (*cur == '@') {
> + cur++;
> + *crash_base = memparse(cur, &tmp);
> + }
> + }
> +}
> +
> +/* handle crashkernel=x@y or =range1:size1[,range2:size2,...]@offset options */
> +static void mem_avoid_specified_crashkernel_region(char *option)

Maybe just add more words to explain the specified crashkernel region
cases, but remove the 'speecified' word in function name?

> +{
> + unsigned long long crash_size, crash_base = 0;
> + char *first_colon, *first_space, *cur = option;
> +
> + first_colon = strchr(option, ':');
> + first_space = strchr(option, ' ');
> + /* if contain ":" */
> + if (first_colon && (!first_space || first_colon < first_space)) {
> + int i;
> + u64 total_sz = 0;
> + struct boot_e820_entry *entry;
> +
> + for (i = 0; i < boot_params->e820_entries; i++) {
> + entry = &boot_params->e820_table[i];
> + /* Skip non-RAM entries. */
> + if (entry->type != E820_TYPE_RAM)
> + continue;
> + total_sz += entry->size;

Wrap this for loop into a static function to calculate the system RAM
size?

Other than these, I think this adding looks good. It won't impact the
current handling, and very easy to recognize what it's doing. Thanks for
the effort.

Thanks
Baoquan
> + }
> + handle_crashkernel_mem(option, total_sz, &crash_size,
> + &crash_base);
> + } else {
> + crash_size = memparse(option, &cur);
> + if (option == cur)
> + return;
> + while (*cur && *cur != ' ' && *cur != '@')
> + cur++;
> + if (*cur == '@') {
> + option = cur + 1;
> + crash_base = memparse(option, &cur);
> + }
> + }
> + if (crash_base) {
> + mem_avoid[MEM_AVOID_CRASHKERNEL].start = crash_base;
> + mem_avoid[MEM_AVOID_CRASHKERNEL].size = crash_size;
> + } else {
> + /*
> + * Clearing mem_avoid if no offset is given. This is consistent
> + * with kernel, which uses the last crashkernel= option.
> + */
> + mem_avoid[MEM_AVOID_CRASHKERNEL].start = 0;
> + mem_avoid[MEM_AVOID_CRASHKERNEL].size = 0;
> + }
> +}
>
> static void handle_mem_options(void)
> {
> @@ -248,7 +358,7 @@ static void handle_mem_options(void)
> u64 mem_size;
>
> if (!strstr(args, "memmap=") && !strstr(args, "mem=") &&
> - !strstr(args, "hugepages"))
> + !strstr(args, "hugepages") && !strstr(args, "crashkernel="))
> return;
>
> tmp_cmdline = malloc(len + 1);
> @@ -284,6 +394,8 @@ static void handle_mem_options(void)
> goto out;
>
> mem_limit = mem_size;
> + } else if (strstr(param, "crashkernel")) {
> + mem_avoid_specified_crashkernel_region(val);
> }
> }
>
> @@ -412,7 +524,7 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
>
> /* We don't need to set a mapping for setup_data. */
>
> - /* Mark the memmap regions we need to avoid */
> + /* Mark the regions we need to avoid */
> handle_mem_options();
>
> /* Enumerate the immovable memory regions */
> --
> 2.7.4
>