This assumes other patches in the series are also applied.
---
x86_64/Makefrag.am | 1 +
x86_64/cpuboot.S | 482 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 483 insertions(+)
create mode 100644 x86_64/cpuboot.S
diff --git a/x86_64/Makefrag.am b/x86_64/Makefrag.am
index 2b79e771..e0d4d2f9 100644
--- a/x86_64/Makefrag.am
+++ b/x86_64/Makefrag.am
@@ -92,6 +92,7 @@ libkernel_a_SOURCES += \
i386/i386/percpu.h \
i386/i386/percpu.c \
x86_64/cswitch.S \
+ x86_64/cpuboot.S \
x86_64/debug_trace.S \
x86_64/idt_inittab.S \
x86_64/locore.S \
diff --git a/x86_64/cpuboot.S b/x86_64/cpuboot.S
new file mode 100644
index 00000000..da60798e
--- /dev/null
+++ b/x86_64/cpuboot.S
@@ -0,0 +1,482 @@
+/*
+ * Copyright (C) 2025 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if NCPUS > 1
+#include <mach/machine/asm.h>
+#include <i386/i386asm.h>
+#include <i386/proc_reg.h>
+#include <i386/apic.h>
+#include <i386/cpu_number.h>
+#include <i386/seg.h>
+#include <i386/msr.h>
+#include <i386/gdt.h>
+
+#define KERNELBASE32 (KERNELBASE & 0xffffffff)
+#define RELOC(addr) (addr - apboot)
+#define CR0_CLEAR_FLAGS_CACHE_ENABLE (CR0_CD | CR0_NW)
+#define CR0_SET_FLAGS (CR0_CLEAR_FLAGS_CACHE_ENABLE | CR0_PE)
+#define CR0_CLEAR_FLAGS (CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_TS |
CR0_EM | CR0_MP)
+#define BOOT_CS 0x8
+#define BOOT_DS 0x10
+
+#define GDT_DESCR_M32 4
+#define GDT_TABLE_M32 (14*2)
+
+#define SEG_ACCESS_OFS 40
+#define SEG_GRANLY_OFS 52
+
+.globl apboot, apbootend
+
+/* NOTE: apboot16 section is auto-loaded at runtime to just above 64k
+ * so it can be called as a SIPI vector. Relocations are thus computed simply.
+ */
+.section .apboot16.text,"ax",@progbits
+.align 4096
+ .code16
+apboot:
+ /* This is now address CS:0 in real mode */
+
+ /* Set data seg same as code seg */
+ mov %cs, %dx
+ mov %dx, %ds
+
+ cli
+ xorl %eax, %eax
+ movl %eax, %cr3
+
+ mov %ax, %es
+ mov %ax, %fs
+ mov %ax, %gs
+ mov %ax, %ss
+
+ lgdt RELOC(gdt_descr_tmp)
+
+ movl %cr0, %eax
+ andl $~CR0_CLEAR_FLAGS, %eax
+ orl $CR0_SET_FLAGS, %eax
+ movl %eax, %cr0
+
+ /* ljmpl with no relocation */
+ .byte 0x66
+ .byte 0xea
+ .long 0f
+ .word BOOT_CS
+
+ .code32
+0:
+ /* Protected mode! */
+ movw $BOOT_DS, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %ss
+
+ lgdtl apboot_gdt_descr - KERNELBASE
+ ljmpl $KERNEL_CS, $(1f + KERNELBASE32)
+1:
+ movw $KERNEL_DS, %ax
+ movw %ax, %ds
+ movw %ax, %es
+ movw %ax, %fs
+ movw %ax, %gs
+ movw %ax, %ss
+
+ /*
+ * Prepare minimal page mapping to jump to 64 bit and to C code.
+ * The first 4GB is identity mapped, and the first 2GB are re-mapped
+ * to high addresses at KERNEL_MAP_BASE
+ */
+
+ movl $AP_p3table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p4table)
+ /*
+ * Fill 4 entries in L3 table to cover the whole 32-bit 4GB address
+ * space. Part of it might be remapped later if the kernel is mapped
+ * below 4G.
+ */
+ movl $AP_p2table,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p3table)
+ movl $AP_p2table1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p3table + 8)
+ movl $AP_p2table2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p3table + 16)
+ movl $AP_p2table3,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p3table + 24)
+ /* point each page table level two entry to a page */
+ mov $0,%ecx
+.map_p2_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4k
+ mov %eax,AP_p2table(,%ecx,8)
+ inc %ecx
+ cmp $2048,%ecx // 512 entries per table, map 4 L2 tables
+ jne .map_p2_table
+
+ /*
+ * KERNEL_MAP_BASE must me aligned to 2GB.
+ * Depending on kernel starting address, we might need to add another
+ * entry in the L4 table (controlling 512 GB chunks). In any case, we
+ * add two entries in L3 table to make sure we map 2GB for the kernel.
+ * Note that this may override part of the mapping create above.
+ */
+.kernel_map:
+#if KERNEL_MAP_BASE >= (1U << 39)
+ movl $AP_p3ktable,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p4table + (8 * ((KERNEL_MAP_BASE >> 39) & 0x1FF))) //
select 512G block
+ movl $AP_p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p3ktable + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) ))
// select first 1G block
+ movl $AP_p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p3ktable + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) +
1) )) // select second 1G block
+#else
+ movl $AP_p2ktable1,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p3table + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) ))
// select first 1G block
+ movl $AP_p2ktable2,%eax
+ or $(PTE_V|PTE_W),%eax
+ movl %eax,(AP_p3table + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1)
)) // select second 1G block
+#endif
+
+ mov $0,%ecx
+.map_p2k_table:
+ mov $0x200000,%eax // 2MiB page, should be always available
+ mul %ecx
+ or $(PTE_V|PTE_W|PTE_S),%eax // enable 2MiB page instead of 4K
+ mov %eax,AP_p2ktable1(,%ecx,8)
+ inc %ecx
+ cmp $1024,%ecx // 512 entries per table, map 2 L2 tables
+ jne .map_p2k_table
+
+switch64:
+ /*
+ * Jump to 64 bit mode, we have to
+ * - enable PAE
+ * - enable long mode
+ * - enable paging and load the tables filled above in CR3
+ * - jump to a 64-bit code segment
+ */
+ mov %cr4,%eax
+ or $CR4_PAE,%eax
+ mov %eax,%cr4
+ mov $0xC0000080,%ecx // select EFER register
+ rdmsr
+ or $(1 << 8),%eax // long mode enable bit
+ wrmsr
+ mov $AP_p4table,%eax
+ mov %eax,%cr3
+ mov %cr0,%eax
+ or $CR0_PG,%eax
+ or $CR0_WP,%eax
+ mov %eax,%cr0
+
+ lgdt apboot_gdt64_descr
+ movw $0,%ax
+ movw %ax,%fs
+ movw %ax,%gs
+ movw $16,%ax
+ movw %ax,%ds
+ movw %ax,%es
+ movw %ax,%ss
+ ljmp $8, $start64
+
+ .code64
+
+start64:
+ /* Get CPU number into rbp */
+ movq $1, %rax
+ cpuid
+ shrq $24, %rbx
+ andq %cs:apic_id_mask, %rbx
+ movq %cs:CX(cpu_id_lut, %rbx), %rbp
+
+ /* Copy first gdt64 descriptor and gdt64 to cpu-th area */
+ movq $(GDT_DESCR_M32 + GDT_TABLE_M32), %rcx
+ movq $apboot_gdt64_top, %rsi
+ movq %rsi, %rdi
+ movq $((GDT_DESCR_M32 + GDT_TABLE_M32) * 4), %rax
+ mul %rbp
+ addq %rax, %rdi
+ cld
+ rep movsq
+
+ /* Access per_cpu area */
+ movq %rbp, %rax
+ movq $PC_SIZE,%rbx
+ mul %rbx
+ addq $percpu_array, %rax
+
+ /* Record our cpu number */
+ movq %rbp, (PERCPU_CPU_ID)(%rax)
+
+ /* Make rax hold offset to my cpus gdt64 */
+ movq %rax, %rbx
+ movq $((GDT_DESCR_M32 + GDT_TABLE_M32) * 4), %rax
+ mul %rbp
+
+ /* Patch only our own copy of gdt descriptor */
+ addq %rax, apboot_gdt64_descr_addr(%rax)
+ movq %rax, %rsi
+
+ /* Set GS base address */
+ movq CX(EXT(percpu_array), %rbp), %rdx
+ movl %edx, %eax
+ shrq $32, %rdx
+ movl $MSR_REG_GSBASE, %ecx
+ wrmsr
+
+ /* Set KernelGS base address */
+ movq CX(EXT(percpu_array), %rbp), %rdx
+ movl %edx, %eax
+ shrq $32, %rdx
+ movl $MSR_REG_KGSBASE, %ecx
+ wrmsr
+
+ /* Reload our copy of gdt with 2 args */
+ movq %rsi, %rax
+ movw apboot_gdt64_descr(%rax), %di
+ movq (apboot_gdt64_descr+2)(%rax), %rsi
+ lgdtq apboot_gdt64_descr(%rax)
+
+ movw $PERCPU_DS,%ax
+ movw %ax,%gs
+
+ /* set up mini stack to do far return */
+ movq CX(EXT(int_stack_top), %rbp), %rsp
+
+ /* instead of ljmp */
+ pushq $KERNEL_CS
+ pushq $reloaded_gdt64
+ retfq
+
+/* We are back here still in long mode but with percpu GS */
+reloaded_gdt64:
+ movw $PERCPU_DS, %ax
+ movw %ax, %gs
+
+ /* Load null Interrupt descriptor table */
+ movq $apboot_idt_ptr, %rbx
+ lidt (%rbx)
+
+ /* Enable local apic in xAPIC mode */
+ xorq %rax, %rax
+ xorq %rdx, %rdx
+ movq $APIC_MSR, %rcx
+ rdmsr
+ orq $APIC_MSR_ENABLE, %rax
+ andq $(~(APIC_MSR_BSP | APIC_MSR_X2APIC)), %rax
+ movq $APIC_MSR, %rcx
+ wrmsr
+
+ /* Load int_stack_top[cpu] -> esp */
+ CPU_NUMBER(%edx)
+ movq CX(EXT(int_stack_top), %rdx), %rsp
+
+ /* Ensure stack alignment */
+ andq $(~0xf), %rsp
+
+ /* Reset EFLAGS to a known state */
+ pushq $0
+ popfq
+
+ /* Finish the cpu configuration */
+ call EXT(cpu_ap_main)
+
+3:
+ /* NOT REACHED */
+ hlt
+ jmp 3b
+
+.align 16
+ .word 0
+gdt_descr_tmp:
+ .short 3*8-1
+ .long RELOC(gdt_tmp)
+.align 16
+gdt_tmp:
+ /* 0 */
+ .quad 0
+ /* BOOT_CS */
+ .word 0xffff
+ .word 0x0000
+ .byte 0x00
+ .byte ACC_PL_K | ACC_CODE_R | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
+ .byte 0x00
+ /* BOOT_DS */
+ .word 0xffff
+ .word 0x0000
+ .byte 0x00
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
+ .byte 0x00
+
+apbootend:
+
+.section .apboot16.data,"ad",@progbits
+.align 16
+apboot_idt_ptr:
+ .long 0
+.align 16
+apboot_gdt_top:
+ .word 0
+apboot_gdt_descr:
+ .word (GDT_TABLE_M32 * 4) - 1
+apboot_gdt_descr_addr:
+ .long apboot_gdt - KERNELBASE
+.align 16
+apboot_gdt:
+ /* NULL segment = 0x0 */
+ .quad 0
+
+ /* KERNEL_CS = 0x8 */
+ .word 0xffff /* Segment limit first 0-15 bits*/
+ .word (-KERNELBASE32) & 0xffff /*Base first 0-15 bits*/
+ .byte ((-KERNELBASE32) >> 16) & 0xff /*Base 16-23 bits */
+ .byte ACC_PL_K | ACC_CODE_R | ACC_P /*Access byte */
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf /* High 4 bits */
+ .byte ((-KERNELBASE32) >> 24) & 0xff /*Base 24-31 bits */
+
+ /* KERNEL_DS = 0x10 */
+ .word 0xffff /*Segment limit */
+ .word (-KERNELBASE32) & 0xffff /*Base first 0-15 bits*/
+ .byte ((-KERNELBASE32) >> 16) & 0xff
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P /*Access byte*/
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf /* High 4 bits */
+ .byte ((-KERNELBASE32) >> 24) & 0xff /*Base 24-31 bits */
+
+ /* LDT = 0x18 */
+ .quad 0
+
+ /* TSS = 0x20 */
+ .quad 0
+
+ /* USER_LDT = 0x28 */
+ .quad 0
+
+ /* USER_TSS = 0x30 */
+ .quad 0
+
+ /* LINEAR = 0x38 */
+ .quad 0
+
+ /* FPREGS = 0x40 */
+ .quad 0
+
+ /* USER_GDT = 0x48 and 0x50 */
+ .quad 0
+ .quad 0
+
+ /* USER_TSS64 = 0x58 */
+ .quad 0
+
+ /* USER_TSS64 = 0x60 */
+ .quad 0
+
+ /* boot GS = 0x68 */
+ .word 0xffff
+ .word 0
+ .byte 0
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P
+ .byte ((SZ_32 | SZ_G) << 4) | 0xf
+ .byte 0
+
+.align 4096
+AP_p4table: .space 4096
+AP_p3table: .space 4096
+AP_p2table: .space 4096
+AP_p2table1: .space 4096
+AP_p2table2: .space 4096
+AP_p2table3: .space 4096
+AP_p3ktable: .space 4096
+AP_p2ktable1: .space 4096
+AP_p2ktable2: .space 4096
+
+.code64
+.align 4096
+apboot_gdt64_top:
+ .word 0
+apboot_gdt64_descr:
+ .word (GDT_TABLE_M32 * 4) - 1
+apboot_gdt64_descr_addr:
+ .quad apboot_gdt64
+.align 16
+apboot_gdt64:
+ /* NULL segment */
+ .quad 0
+ /* BOOT_CS */
+ .word 0x0000
+ .word 0x0000
+ .byte 0x00
+ .byte ACC_PL_K | ACC_CODE_R | ACC_P
+ .byte (SZ_64 << 4) | 0xf
+ .byte 0x00
+ /* BOOT_DS */
+ .word 0x0000
+ .word 0x0000
+ .byte 0x00
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P
+ .byte (SZ_64 << 4) | 0xf
+ .byte 0x00
+
+ /* LDT = 0x18 */
+ .quad 0
+
+ /* TSS = 0x20 */
+ .quad 0
+
+ /* USER_LDT = 0x28 */
+ .quad 0
+
+ /* USER_TSS = 0x30 */
+ .quad 0
+
+ /* LINEAR = 0x38 */
+ .quad 0
+
+ /* FPREGS = 0x40 */
+ .quad 0
+
+ /* USER_GDT = 0x48 and 0x50 */
+ .quad 0
+ .quad 0
+
+ /* USER_TSS64 = 0x58 */
+ .quad 0
+
+ /* USER_TSS64 = 0x60 */
+ .quad 0
+
+ /* boot GS = 0x68 */
+ .word 0x0000
+ .word 0
+ .byte 0
+ .byte ACC_PL_K | ACC_DATA_W | ACC_P
+ .byte (SZ_64 << 4) | 0xf
+ .byte 0
+
+/* Empty space for per-cpu gdt64 descriptor and gdt64 */
+.space (NCPUS-1) * (GDT_DESCR_M32 + GDT_TABLE_M32) * 4, 0x0
+
+#endif
--
2.45.2