This assumes other patches in the series are also applied.

---
 x86_64/Makefrag.am |   1 +
 x86_64/cpuboot.S   | 482 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 483 insertions(+)
 create mode 100644 x86_64/cpuboot.S

diff --git a/x86_64/Makefrag.am b/x86_64/Makefrag.am
index 2b79e771..e0d4d2f9 100644
--- a/x86_64/Makefrag.am
+++ b/x86_64/Makefrag.am
@@ -92,6 +92,7 @@ libkernel_a_SOURCES += \
        i386/i386/percpu.h \
        i386/i386/percpu.c \
        x86_64/cswitch.S \
+       x86_64/cpuboot.S \
        x86_64/debug_trace.S \
        x86_64/idt_inittab.S \
        x86_64/locore.S \
diff --git a/x86_64/cpuboot.S b/x86_64/cpuboot.S
new file mode 100644
index 00000000..da60798e
--- /dev/null
+++ b/x86_64/cpuboot.S
@@ -0,0 +1,482 @@
+/*
+ *  Copyright (C) 2025 Free Software Foundation
+ *
+ * This program is free software ; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation ; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with the program ; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#if NCPUS > 1
+#include <mach/machine/asm.h>
+#include <i386/i386asm.h>
+#include <i386/proc_reg.h>
+#include <i386/apic.h>
+#include <i386/cpu_number.h>
+#include <i386/seg.h>
+#include <i386/msr.h>
+#include <i386/gdt.h>
+
+#define KERNELBASE32   (KERNELBASE & 0xffffffff)
+#define RELOC(addr)    (addr - apboot)
+#define CR0_CLEAR_FLAGS_CACHE_ENABLE   (CR0_CD | CR0_NW)
+#define CR0_SET_FLAGS  (CR0_CLEAR_FLAGS_CACHE_ENABLE | CR0_PE)
+#define CR0_CLEAR_FLAGS        (CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_TS | 
CR0_EM | CR0_MP)
+#define BOOT_CS                0x8
+#define BOOT_DS                0x10
+
+#define GDT_DESCR_M32  4
+#define GDT_TABLE_M32  (14*2)
+
+#define SEG_ACCESS_OFS 40
+#define SEG_GRANLY_OFS 52
+
+.globl apboot, apbootend
+
+/* NOTE: apboot16 section is auto-loaded at runtime to just above 64k
+ * so it can be called as a SIPI vector.  Relocations are thus computed simply.
+ */
+.section .apboot16.text,"ax",@progbits
+.align 4096
+       .code16
+apboot:
+       /* This is now address CS:0 in real mode */
+
+       /* Set data seg same as code seg */
+       mov     %cs, %dx
+       mov     %dx, %ds
+
+       cli
+       xorl    %eax, %eax
+       movl    %eax, %cr3
+
+       mov     %ax, %es
+       mov     %ax, %fs
+       mov     %ax, %gs
+       mov     %ax, %ss
+
+       lgdt    RELOC(gdt_descr_tmp)
+
+       movl    %cr0, %eax
+       andl    $~CR0_CLEAR_FLAGS, %eax
+       orl     $CR0_SET_FLAGS, %eax
+       movl    %eax, %cr0
+
+       /* ljmpl with no relocation */
+       .byte 0x66
+       .byte 0xea
+       .long 0f
+       .word BOOT_CS
+
+       .code32
+0:
+       /* Protected mode! */
+       movw    $BOOT_DS, %ax
+       movw    %ax, %ds
+       movw    %ax, %es
+       movw    %ax, %ss
+
+       lgdtl   apboot_gdt_descr - KERNELBASE
+       ljmpl   $KERNEL_CS, $(1f + KERNELBASE32)
+1:
+       movw    $KERNEL_DS, %ax
+       movw    %ax, %ds
+       movw    %ax, %es
+       movw    %ax, %fs
+       movw    %ax, %gs
+       movw    %ax, %ss
+
+       /*
+        * Prepare minimal page mapping to jump to 64 bit and to C code.
+        * The first 4GB is identity mapped, and the first 2GB are re-mapped
+        * to high addresses at KERNEL_MAP_BASE
+        */
+
+       movl    $AP_p3table,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p4table)
+       /*
+        * Fill 4 entries in L3 table to cover the whole 32-bit 4GB address
+        * space. Part of it might be remapped later if the kernel is mapped
+        * below 4G.
+        */
+       movl    $AP_p2table,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p3table)
+       movl    $AP_p2table1,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p3table + 8)
+       movl    $AP_p2table2,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p3table + 16)
+       movl    $AP_p2table3,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p3table + 24)
+       /* point each page table level two entry to a page */
+       mov     $0,%ecx
+.map_p2_table:
+       mov     $0x200000,%eax   // 2MiB page, should be always available
+       mul     %ecx
+       or      $(PTE_V|PTE_W|PTE_S),%eax  // enable 2MiB page instead of 4k
+       mov     %eax,AP_p2table(,%ecx,8)
+       inc     %ecx
+       cmp     $2048,%ecx  // 512 entries per table, map 4 L2 tables
+       jne     .map_p2_table
+
+       /*
+        * KERNEL_MAP_BASE must me aligned to 2GB.
+        * Depending on kernel starting address, we might need to add another
+        * entry in the L4 table (controlling 512 GB chunks). In any case, we
+        * add two entries in L3 table to make sure we map 2GB for the kernel.
+        * Note that this may override part of the mapping create above.
+        */
+.kernel_map:
+#if KERNEL_MAP_BASE >= (1U << 39)
+       movl    $AP_p3ktable,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p4table + (8 * ((KERNEL_MAP_BASE >> 39) & 0x1FF)))  // 
select 512G block
+       movl    $AP_p2ktable1,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p3ktable + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) ))  
// select first 1G block
+       movl    $AP_p2ktable2,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p3ktable + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 
1) ))  // select second 1G block
+#else
+       movl    $AP_p2ktable1,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p3table + (8 * ((KERNEL_MAP_BASE >> 30) & 0x1FF) ))  
// select first 1G block
+       movl    $AP_p2ktable2,%eax
+       or      $(PTE_V|PTE_W),%eax
+       movl    %eax,(AP_p3table + (8 * (((KERNEL_MAP_BASE >> 30) & 0x1FF) + 1) 
))  // select second 1G block
+#endif
+
+       mov     $0,%ecx
+.map_p2k_table:
+       mov     $0x200000,%eax   // 2MiB page, should be always available
+       mul     %ecx
+       or      $(PTE_V|PTE_W|PTE_S),%eax  // enable 2MiB page instead of 4K
+       mov     %eax,AP_p2ktable1(,%ecx,8)
+       inc     %ecx
+       cmp     $1024,%ecx  // 512 entries per table, map 2 L2 tables
+       jne     .map_p2k_table
+
+switch64:
+       /*
+        * Jump to 64 bit mode, we have to
+        * - enable PAE
+        * - enable long mode
+        * - enable paging and load the tables filled above in CR3
+        * - jump to a 64-bit code segment
+        */
+       mov     %cr4,%eax
+       or      $CR4_PAE,%eax
+       mov     %eax,%cr4
+       mov     $0xC0000080,%ecx  // select EFER register
+       rdmsr
+       or      $(1 << 8),%eax  // long mode enable bit
+       wrmsr
+       mov     $AP_p4table,%eax
+       mov     %eax,%cr3
+       mov     %cr0,%eax
+       or      $CR0_PG,%eax
+       or      $CR0_WP,%eax
+       mov     %eax,%cr0
+
+       lgdt    apboot_gdt64_descr
+       movw    $0,%ax
+       movw    %ax,%fs
+       movw    %ax,%gs
+       movw    $16,%ax
+       movw    %ax,%ds
+       movw    %ax,%es
+       movw    %ax,%ss
+       ljmp    $8, $start64
+
+       .code64
+
+start64:
+       /* Get CPU number into rbp */
+       movq    $1, %rax
+       cpuid
+       shrq    $24, %rbx
+       andq    %cs:apic_id_mask, %rbx
+       movq    %cs:CX(cpu_id_lut, %rbx), %rbp
+
+       /* Copy first gdt64 descriptor and gdt64 to cpu-th area */
+       movq    $(GDT_DESCR_M32 + GDT_TABLE_M32), %rcx
+       movq    $apboot_gdt64_top, %rsi
+       movq    %rsi, %rdi
+       movq    $((GDT_DESCR_M32 + GDT_TABLE_M32) * 4), %rax
+       mul     %rbp
+       addq    %rax, %rdi
+       cld
+       rep movsq
+
+       /* Access per_cpu area */
+       movq    %rbp, %rax
+       movq    $PC_SIZE,%rbx
+       mul     %rbx
+       addq    $percpu_array, %rax
+
+       /* Record our cpu number */
+       movq    %rbp, (PERCPU_CPU_ID)(%rax)
+
+       /* Make rax hold offset to my cpus gdt64 */
+       movq    %rax, %rbx
+       movq    $((GDT_DESCR_M32 + GDT_TABLE_M32) * 4), %rax
+       mul     %rbp
+
+       /* Patch only our own copy of gdt descriptor */
+       addq    %rax, apboot_gdt64_descr_addr(%rax)
+       movq    %rax, %rsi
+
+       /* Set GS base address */
+       movq    CX(EXT(percpu_array), %rbp), %rdx
+       movl    %edx, %eax
+       shrq    $32, %rdx
+       movl    $MSR_REG_GSBASE, %ecx
+       wrmsr
+
+       /* Set KernelGS base address */
+       movq    CX(EXT(percpu_array), %rbp), %rdx
+       movl    %edx, %eax
+       shrq    $32, %rdx
+       movl    $MSR_REG_KGSBASE, %ecx
+       wrmsr
+
+       /* Reload our copy of gdt with 2 args */
+       movq    %rsi, %rax
+       movw    apboot_gdt64_descr(%rax), %di
+       movq    (apboot_gdt64_descr+2)(%rax), %rsi
+       lgdtq   apboot_gdt64_descr(%rax)
+
+       movw    $PERCPU_DS,%ax
+       movw    %ax,%gs
+
+       /* set up mini stack to do far return */
+       movq    CX(EXT(int_stack_top), %rbp), %rsp
+
+       /* instead of ljmp */
+       pushq   $KERNEL_CS
+       pushq   $reloaded_gdt64
+       retfq
+
+/* We are back here still in long mode but with percpu GS */
+reloaded_gdt64:
+       movw    $PERCPU_DS, %ax
+       movw    %ax, %gs
+
+       /* Load null Interrupt descriptor table */
+       movq    $apboot_idt_ptr, %rbx
+       lidt    (%rbx)
+
+       /* Enable local apic in xAPIC mode */
+       xorq    %rax, %rax
+       xorq    %rdx, %rdx
+       movq    $APIC_MSR, %rcx
+       rdmsr
+       orq     $APIC_MSR_ENABLE, %rax
+       andq    $(~(APIC_MSR_BSP | APIC_MSR_X2APIC)), %rax
+       movq    $APIC_MSR, %rcx
+       wrmsr
+
+       /* Load int_stack_top[cpu] -> esp */
+       CPU_NUMBER(%edx)
+       movq    CX(EXT(int_stack_top), %rdx), %rsp
+
+       /* Ensure stack alignment */
+       andq    $(~0xf), %rsp
+
+       /* Reset EFLAGS to a known state */
+       pushq   $0
+       popfq
+
+       /* Finish the cpu configuration */
+       call    EXT(cpu_ap_main)
+
+3:
+       /* NOT REACHED */
+       hlt
+       jmp     3b
+
+.align 16
+    .word 0
+gdt_descr_tmp:
+    .short 3*8-1
+    .long RELOC(gdt_tmp)
+.align 16
+gdt_tmp:
+    /* 0 */
+    .quad 0
+    /* BOOT_CS */
+    .word 0xffff
+    .word 0x0000
+    .byte 0x00
+    .byte ACC_PL_K | ACC_CODE_R | ACC_P
+    .byte ((SZ_32 | SZ_G) << 4) | 0xf
+    .byte 0x00
+    /* BOOT_DS */
+    .word 0xffff
+    .word 0x0000
+    .byte 0x00
+    .byte ACC_PL_K | ACC_DATA_W | ACC_P
+    .byte ((SZ_32 | SZ_G) << 4) | 0xf
+    .byte 0x00
+
+apbootend:
+
+.section .apboot16.data,"ad",@progbits
+.align 16
+apboot_idt_ptr:
+       .long 0
+.align 16
+apboot_gdt_top:
+       .word 0
+apboot_gdt_descr:
+       .word (GDT_TABLE_M32 * 4) - 1
+apboot_gdt_descr_addr:
+       .long apboot_gdt - KERNELBASE
+.align 16
+apboot_gdt:
+       /* NULL segment = 0x0 */
+       .quad 0
+
+       /* KERNEL_CS = 0x8 */
+       .word 0xffff /* Segment limit first 0-15 bits*/
+       .word (-KERNELBASE32) & 0xffff /*Base first 0-15 bits*/
+       .byte ((-KERNELBASE32) >> 16) & 0xff /*Base 16-23 bits */
+       .byte ACC_PL_K | ACC_CODE_R | ACC_P /*Access byte */
+       .byte ((SZ_32 | SZ_G) << 4) | 0xf /* High 4 bits */
+       .byte ((-KERNELBASE32) >> 24) & 0xff /*Base 24-31 bits */
+
+       /* KERNEL_DS = 0x10 */
+       .word 0xffff /*Segment limit */
+       .word (-KERNELBASE32) & 0xffff /*Base first 0-15 bits*/
+       .byte ((-KERNELBASE32) >> 16) & 0xff
+       .byte ACC_PL_K | ACC_DATA_W | ACC_P /*Access byte*/
+       .byte ((SZ_32 | SZ_G) << 4) | 0xf /* High 4 bits */
+       .byte ((-KERNELBASE32) >> 24) & 0xff /*Base 24-31 bits */
+
+       /* LDT = 0x18 */
+       .quad 0
+
+       /* TSS = 0x20 */
+       .quad 0
+
+       /* USER_LDT = 0x28 */
+       .quad 0
+
+       /* USER_TSS = 0x30 */
+       .quad 0
+
+       /* LINEAR = 0x38 */
+       .quad 0
+
+       /* FPREGS = 0x40 */
+       .quad 0
+
+       /* USER_GDT = 0x48 and 0x50 */
+       .quad 0
+       .quad 0
+
+       /* USER_TSS64 = 0x58 */
+       .quad 0
+
+       /* USER_TSS64 = 0x60 */
+       .quad 0
+
+       /* boot GS = 0x68 */
+       .word 0xffff
+       .word 0
+       .byte 0
+       .byte ACC_PL_K | ACC_DATA_W | ACC_P
+       .byte ((SZ_32 | SZ_G) << 4) | 0xf
+       .byte 0
+
+.align 4096
+AP_p4table:    .space 4096
+AP_p3table:    .space 4096
+AP_p2table:    .space 4096
+AP_p2table1:   .space 4096
+AP_p2table2:   .space 4096
+AP_p2table3:   .space 4096
+AP_p3ktable:   .space 4096
+AP_p2ktable1:  .space 4096
+AP_p2ktable2:  .space 4096
+
+.code64
+.align 4096
+apboot_gdt64_top:
+       .word   0
+apboot_gdt64_descr:
+       .word   (GDT_TABLE_M32 * 4) - 1
+apboot_gdt64_descr_addr:
+       .quad   apboot_gdt64
+.align 16
+apboot_gdt64:
+       /* NULL segment */
+       .quad   0
+       /* BOOT_CS */
+       .word   0x0000
+       .word   0x0000
+       .byte   0x00
+       .byte   ACC_PL_K | ACC_CODE_R | ACC_P
+       .byte   (SZ_64 << 4) | 0xf
+       .byte   0x00
+       /* BOOT_DS */
+       .word   0x0000
+       .word   0x0000
+       .byte   0x00
+       .byte   ACC_PL_K | ACC_DATA_W | ACC_P
+       .byte   (SZ_64 << 4) | 0xf
+       .byte   0x00
+
+       /* LDT = 0x18 */
+       .quad   0
+
+       /* TSS = 0x20 */
+       .quad   0
+
+       /* USER_LDT = 0x28 */
+       .quad   0
+
+       /* USER_TSS = 0x30 */
+       .quad   0
+
+       /* LINEAR = 0x38 */
+       .quad   0
+
+       /* FPREGS = 0x40 */
+       .quad   0
+
+       /* USER_GDT = 0x48 and 0x50 */
+       .quad   0
+       .quad   0
+
+       /* USER_TSS64 = 0x58 */
+       .quad   0
+
+       /* USER_TSS64 = 0x60 */
+       .quad   0
+
+       /* boot GS = 0x68 */
+       .word   0x0000
+       .word   0
+       .byte   0
+       .byte   ACC_PL_K | ACC_DATA_W | ACC_P
+       .byte   (SZ_64 << 4) | 0xf
+       .byte   0
+
+/* Empty space for per-cpu gdt64 descriptor and gdt64 */
+.space (NCPUS-1) * (GDT_DESCR_M32 + GDT_TABLE_M32) * 4, 0x0
+
+#endif
-- 
2.45.2



Reply via email to