--- i386/Makefrag.am | 6 + i386/i386/cpu_number.h | 31 +++- i386/i386/cswitch.S | 6 +- i386/i386/locore.S | 31 ++-- i386/i386/mp_desc.c | 281 ++++++++++++++++++------------- i386/i386/mp_desc.h | 9 +- i386/i386/xen.h | 2 +- i386/i386at/boothdr.S | 18 +- i386/i386at/model_dep.c | 101 ++--------- i386/i386at/model_dep.h | 3 +- i386/intel/pmap.c | 158 +++++++++++++---- i386/intel/pmap.h | 8 +- linux/dev/arch/i386/kernel/irq.c | 13 +- linux/dev/init/main.c | 2 + 14 files changed, 396 insertions(+), 273 deletions(-)
diff --git a/i386/Makefrag.am b/i386/Makefrag.am index 8d6ef8cd..b74aad35 100644 --- a/i386/Makefrag.am +++ b/i386/Makefrag.am @@ -30,6 +30,8 @@ if HOST_ix86 # libkernel_a_SOURCES += \ + i386/i386at/acpi_parse_apic.h \ + i386/i386at/acpi_parse_apic.c \ i386/i386at/autoconf.c \ i386/i386at/autoconf.h \ i386/i386at/biosmem.c \ @@ -94,7 +96,9 @@ libkernel_a_SOURCES += \ i386/i386/ast_types.h \ i386/i386/cpu.h \ i386/i386/cpu_number.h \ + i386/i386/cpu_number.c \ i386/i386/cswitch.S \ + i386/i386/cpuboot.S \ i386/i386/db_disasm.c \ i386/i386/db_interface.c \ i386/i386/db_interface.h \ @@ -158,6 +162,8 @@ libkernel_a_SOURCES += \ i386/i386/user_ldt.h \ i386/i386/vm_param.h \ i386/i386/xpr.h \ + i386/i386/smp.h \ + i386/i386/smp.c \ i386/intel/pmap.c \ i386/intel/pmap.h \ i386/intel/read_fault.c \ diff --git a/i386/i386/cpu_number.h b/i386/i386/cpu_number.h index 9aef6370..d56cb602 100644 --- a/i386/i386/cpu_number.h +++ b/i386/i386/cpu_number.h @@ -35,14 +35,35 @@ /* More-specific code must define cpu_number() and CPU_NUMBER. */ #ifdef __i386__ #define CX(addr, reg) addr(,reg,4) + +/* CPU_NUMBER(%ebx) will _not_ work! */ +#define CPU_NUMBER(reg) \ + pushfl ;\ + cli ;\ + pushl %esi ;\ + pushl %edi ;\ + pushl %ebx ;\ + pushl %eax ;\ + call EXT(cpu_number) ;\ + movl %eax, %ebx ;\ + popl %eax ;\ + movl %ebx, reg ;\ + popl %ebx ;\ + popl %edi ;\ + popl %esi ;\ + popfl + #endif #ifdef __x86_64__ #define CX(addr, reg) addr(,reg,8) +#warning Missing CPU_NUMBER() for 64 bit +#define CPU_NUMBER(reg) #endif -/* XXX For now */ -#define CPU_NUMBER(reg) movl $0,reg -#define cpu_number() 0 +#ifndef __ASSEMBLER__ +#include "kern/cpu_number.h" +int cpu_number(); +#endif #else /* NCPUS == 1 */ @@ -51,8 +72,4 @@ #endif /* NCPUS == 1 */ -#ifndef __ASSEMBLER__ -#include "kern/cpu_number.h" -#endif - #endif /* _I386_CPU_NUMBER_H_ */ diff --git a/i386/i386/cswitch.S b/i386/i386/cswitch.S index 718c8aac..ae941bdd 100644 --- a/i386/i386/cswitch.S +++ b/i386/i386/cswitch.S @@ -110,7 +110,7 @@ ENTRY(Thread_continue) */ ENTRY(switch_to_shutdown_context) CPU_NUMBER(%edx) - movl EXT(active_stacks)(,%edx,4),%ecx /* get old kernel stack */ + movl CX(EXT(active_stacks),%edx),%ecx /* get old kernel stack */ movl %ebx,KSS_EBX(%ecx) /* save registers */ movl %ebp,KSS_EBP(%ecx) movl %edi,KSS_EDI(%ecx) @@ -124,8 +124,8 @@ ENTRY(switch_to_shutdown_context) movl 4(%esp),%ebx /* get routine to run next */ movl 8(%esp),%esi /* get its argument */ - movl EXT(interrupt_stack)(,%edx,4),%ecx /* point to its interrupt stack */ - lea INTSTACK_SIZE(%ecx),%esp /* switch to it (top) */ + movl CX(EXT(int_stack_base),%edx),%ecx /* point to its interrupt stack */ + lea -4+INTSTACK_SIZE(%ecx),%esp /* switch to it (top) */ pushl %eax /* push thread */ call EXT(thread_dispatch) /* reschedule thread */ diff --git a/i386/i386/locore.S b/i386/i386/locore.S index b5122613..fb92b6e7 100644 --- a/i386/i386/locore.S +++ b/i386/i386/locore.S @@ -541,13 +541,15 @@ _kret_iret: trap_from_kernel: #if MACH_KDB || MACH_TTD movl %esp,%ebx /* save current stack */ - movl %esp,%edx /* on an interrupt stack? */ - and $(~(KERNEL_STACK_SIZE-1)),%edx - cmpl EXT(int_stack_base),%edx + + xorl %ecx,%ecx + and $(~(NCPUS*INTSTACK_SIZE-1)),%edx + cmpl CX(EXT(int_stack_base),%ecx),%edx je 1f /* OK if so */ - CPU_NUMBER(%edx) /* get CPU number */ + CPU_NUMBER(%edx) + cmpl CX(EXT(kernel_stack),%edx),%esp /* already on kernel stack? */ ja 0f @@ -668,9 +670,10 @@ ENTRY(all_intrs) pushl %edx cld /* clear direction flag */ + xorl %ecx,%ecx movl %esp,%edx /* on an interrupt stack? */ - and $(~(KERNEL_STACK_SIZE-1)),%edx - cmpl %ss:EXT(int_stack_base),%edx + and $(~(NCPUS*INTSTACK_SIZE-1)),%edx + cmpl %ss:CX(EXT(int_stack_base),%ecx),%edx je int_from_intstack /* if not: */ pushl %ds /* save segment registers */ @@ -686,6 +689,7 @@ ENTRY(all_intrs) CPU_NUMBER(%edx) movl CX(EXT(int_stack_top),%edx),%ecx + xchgl %ecx,%esp /* switch to interrupt stack */ #if STAT_TIME @@ -724,19 +728,20 @@ LEXT(return_to_iret) /* ( label for kdb_kintr and hardclock) */ pop %fs pop %es pop %ds - pop %edx - pop %ecx - pop %eax + popl %edx + popl %ecx + popl %eax iret /* return to caller */ int_from_intstack: - cmpl EXT(int_stack_base),%esp /* seemingly looping? */ + CPU_NUMBER(%edx) + cmpl CX(EXT(int_stack_base),%edx),%esp /* seemingly looping? */ jb stack_overflowed /* if not: */ call EXT(interrupt) /* call interrupt routine */ _return_to_iret_i: /* ( label for kdb_kintr) */ - pop %edx /* must have been on kernel segs */ - pop %ecx - pop %eax /* no ASTs */ + popl %edx /* must have been on kernel segs */ + popl %ecx + popl %eax /* no ASTs */ iret stack_overflowed: diff --git a/i386/i386/mp_desc.c b/i386/i386/mp_desc.c index 1e9ea0fc..c6a55d90 100644 --- a/i386/i386/mp_desc.c +++ b/i386/i386/mp_desc.c @@ -24,25 +24,36 @@ * the rights to redistribute these changes. */ -#if NCPUS > 1 - -#include <string.h> - #include <kern/cpu_number.h> #include <kern/debug.h> #include <kern/printf.h> +#include <kern/smp.h> +#include <kern/startup.h> +#include <kern/kmutex.h> #include <mach/machine.h> #include <mach/xen.h> #include <vm/vm_kern.h> #include <i386/mp_desc.h> #include <i386/lock.h> +#include <i386/apic.h> +#include <i386/locore.h> +#include <i386/gdt.h> +#include <i386at/idt.h> +#include <i386at/int_init.h> +#include <i386/cpu.h> +#include <i386/smp.h> + #include <i386at/model_dep.h> #include <machine/ktss.h> +#include <machine/smp.h> #include <machine/tss.h> #include <machine/io_perm.h> #include <machine/vm_param.h> +#include <i386at/acpi_parse_apic.h> +#include <string.h> + /* * The i386 needs an interrupt stack to keep the PCB stack from being * overrun by interrupts. All interrupt stacks MUST lie at lower addresses @@ -52,20 +63,35 @@ /* * Addresses of bottom and top of interrupt stacks. */ -vm_offset_t interrupt_stack[NCPUS]; vm_offset_t int_stack_top[NCPUS]; vm_offset_t int_stack_base[NCPUS]; -/* - * Barrier address. - */ -vm_offset_t int_stack_high; +/* Interrupt stack allocation */ +uint8_t solid_intstack[NCPUS*INTSTACK_SIZE] __aligned(NCPUS*INTSTACK_SIZE); + +void +interrupt_stack_alloc(void) +{ + int i; + + /* + * Set up pointers to the top of the interrupt stack. + */ + for (i = 0; i < NCPUS; i++) { + int_stack_base[i] = (vm_offset_t) &solid_intstack[i * INTSTACK_SIZE]; + int_stack_top[i] = (vm_offset_t) &solid_intstack[(i + 1) * INTSTACK_SIZE] - 4; + } +} + +#if NCPUS > 1 /* - * First cpu`s interrupt stack. + * Flag to mark SMP init by BSP complete */ -extern char _intstack[]; /* bottom */ -extern char _eintstack[]; /* top */ +int bspdone; + +extern void *apboot, *apbootend; +extern volatile ApicLocalUnit* lapic; /* * Multiprocessor i386/i486 systems use a separate copy of the @@ -77,7 +103,7 @@ extern char _eintstack[]; /* top */ */ /* - * Allocated descriptor tables. + * Descriptor tables. */ struct mp_desc_table *mp_desc_table[NCPUS] = { 0 }; @@ -102,12 +128,13 @@ extern struct real_descriptor ldt[LDTSZ]; * Allocate and initialize the per-processor descriptor tables. */ -struct mp_desc_table * +int mp_desc_init(int mycpu) { struct mp_desc_table *mpt; + vm_offset_t mem; - if (mycpu == master_cpu) { + if (mycpu == 0) { /* * Master CPU uses the tables built at boot time. * Just set the TSS and GDT pointers. @@ -118,110 +145,28 @@ mp_desc_init(int mycpu) } else { /* - * Other CPUs allocate the table from the bottom of - * the interrupt stack. + * Allocate tables for other CPUs */ - mpt = (struct mp_desc_table *) interrupt_stack[mycpu]; + if (!init_alloc_aligned(sizeof(struct mp_desc_table), &mem)) + panic("not enough memory for descriptor tables"); + mpt = (struct mp_desc_table *)phystokv(mem); mp_desc_table[mycpu] = mpt; mp_ktss[mycpu] = &mpt->ktss; mp_gdt[mycpu] = mpt->gdt; /* - * Copy the tables + * Zero the tables */ - memcpy(mpt->idt, - idt, - sizeof(idt)); - memcpy(mpt->gdt, - gdt, - sizeof(gdt)); - memcpy(mpt->ldt, - ldt, - sizeof(ldt)); - memset(&mpt->ktss, 0, - sizeof(struct task_tss)); + memset(mpt->idt, 0, sizeof(idt)); + memset(mpt->gdt, 0, sizeof(gdt)); + memset(mpt->ldt, 0, sizeof(ldt)); + memset(&mpt->ktss, 0, sizeof(struct task_tss)); - /* - * Fix up the entries in the GDT to point to - * this LDT and this TSS. - */ -#ifdef MACH_RING1 - panic("TODO %s:%d\n",__FILE__,__LINE__); -#else /* MACH_RING1 */ - _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_LDT, - (unsigned)&mpt->ldt, - LDTSZ * sizeof(struct real_descriptor) - 1, - ACC_P|ACC_PL_K|ACC_LDT, 0); - _fill_gdt_sys_descriptor(mpt->gdt, KERNEL_TSS, - (unsigned)&mpt->ktss, - sizeof(struct task_tss) - 1, - ACC_P|ACC_PL_K|ACC_TSS, 0); - - mpt->ktss.tss.ss0 = KERNEL_DS; - mpt->ktss.tss.io_bit_map_offset = IOPB_INVAL; - mpt->ktss.barrier = 0xFF; -#endif /* MACH_RING1 */ - - return mpt; + return mycpu; } } -kern_return_t intel_startCPU(int slot_num) -{ - printf("TODO: intel_startCPU\n"); -} - -/* - * Called after all CPUs have been found, but before the VM system - * is running. The machine array must show which CPUs exist. - */ -void -interrupt_stack_alloc(void) -{ - int i; - int cpu_count; - vm_offset_t stack_start; - - /* - * Count the number of CPUs. - */ - cpu_count = 0; - for (i = 0; i < NCPUS; i++) - if (machine_slot[i].is_cpu) - cpu_count++; - - /* - * Allocate an interrupt stack for each CPU except for - * the master CPU (which uses the bootstrap stack) - */ - if (!init_alloc_aligned(INTSTACK_SIZE*(cpu_count-1), &stack_start)) - panic("not enough memory for interrupt stacks"); - stack_start = phystokv(stack_start); - - /* - * Set up pointers to the top of the interrupt stack. - */ - for (i = 0; i < NCPUS; i++) { - if (i == master_cpu) { - interrupt_stack[i] = (vm_offset_t) _intstack; - int_stack_top[i] = (vm_offset_t) _eintstack; - } - else if (machine_slot[i].is_cpu) { - interrupt_stack[i] = stack_start; - int_stack_top[i] = stack_start + INTSTACK_SIZE; - - stack_start += INTSTACK_SIZE; - } - } - - /* - * Set up the barrier address. All thread stacks MUST - * be above this address. - */ - int_stack_high = stack_start; -} - /* XXX should be adjusted per CPU speed */ int simple_lock_pause_loop = 100; @@ -255,24 +200,130 @@ void interrupt_processor(int cpu) { printf("interrupt cpu %d\n",cpu); + smp_pmap_update(apic_get_cpu_apic_id(cpu)); +} + +void +cpu_setup() +{ + unsigned apic_id = (((ApicLocalUnit*)phystokv(lapic_addr))->apic_id.r >> 24) & 0xff; + uint16_t cpu = apic_get_cpu_kernel_id(apic_id); + + printf("AP=(%u) before\n", cpu); + + pmap_bootstrap(cpu); + printf("AP=(%u) pmap done\n", cpu); + + pmap_make_temporary_mapping(cpu); + printf("AP=(%u) tempmap done\n", cpu); + +#ifndef MACH_HYP + /* Turn paging on. + * TODO: Why does setting the WP bit here cause a crash? + */ + set_cr0(get_cr0() | CR0_PG /* | CR0_WP */); + set_cr0(get_cr0() & ~(CR0_CD | CR0_NW)); + if (CPU_HAS_FEATURE(CPU_FEATURE_PGE)) + set_cr4(get_cr4() | CR4_PGE); +#endif /* MACH_HYP */ + flush_instr_queue(); + printf("AP=(%u) paging done\n", cpu); + + mp_desc_init(cpu); + printf("AP=(%u) mpdesc done\n", cpu); + + ap_gdt_init(cpu); + printf("AP=(%u) gdt done\n", cpu); + + ap_idt_init(cpu); + printf("AP=(%u) idt done\n", cpu); + + ap_int_init(cpu); + printf("AP=(%u) int done\n", cpu); + + ap_ldt_init(cpu); + printf("AP=(%u) ldt done\n", cpu); + + ap_ktss_init(cpu); + printf("AP=(%u) ktss done\n", cpu); + + pmap_remove_temporary_mapping(cpu); + printf("AP=(%u) remove tempmap done\n", cpu); + + pmap_set_page_dir(0); + flush_tlb(); + printf("AP=(%u) reset page dir done\n", cpu); + + /* Initialize machine_slot fields with the cpu data */ + machine_slot[cpu].cpu_subtype = CPU_SUBTYPE_AT386; + machine_slot[cpu].cpu_type = machine_slot[0].cpu_type; + + lapic_enable(); + asm("sti"); + + slave_main(); +} + +void +cpu_ap_main() +{ + do { + asm volatile ("pause" : : : "memory"); + } while (!bspdone); + + cpu_setup(); } kern_return_t cpu_start(int cpu) { - if (machine_slot[cpu].running) - return KERN_FAILURE; + assert(machine_slot[cpu].running != TRUE); + + uint16_t apic_id = apic_get_cpu_apic_id(cpu); + + printf("Trying to enable: %d\n", apic_id); + + smp_startup_cpu(apic_id, AP_BOOT_ADDR); + + printf("Started cpu %d (lapic id %04x)\n", cpu, apic_id); - return intel_startCPU(cpu); + return KERN_SUCCESS; } void start_other_cpus(void) { - int cpu; - for (cpu = 0; cpu < NCPUS; cpu++) - if (cpu != cpu_number()) - cpu_start(cpu); -} + unsigned long flags; + + cpu_intr_save(&flags); + + int ncpus = smp_get_numcpus(); + + //Copy cpu initialization assembly routine + memcpy((void*)phystokv(AP_BOOT_ADDR), (void*) &apboot, + (uint32_t)&apbootend - (uint32_t)&apboot); + +#ifndef APIC + lapic_enable(); /* Enable lapic only once */ +#endif + unsigned cpu; + bspdone = 0; + for (cpu = 1; cpu < ncpus; cpu++) { + machine_slot[cpu].running = FALSE; + //Start cpu + printf("Starting AP %d\n", cpu); + cpu_start(cpu); + } + printf("BSP: Completed SMP init\n"); + bspdone = 1; + + for (cpu = 1; cpu < ncpus; cpu++) { + do { + asm volatile ("pause" : : : "memory"); + } while (machine_slot[cpu].running == FALSE); + } + + cpu_intr_restore(flags); +} #endif /* NCPUS > 1 */ diff --git a/i386/i386/mp_desc.h b/i386/i386/mp_desc.h index ebe1471d..59d50e77 100644 --- a/i386/i386/mp_desc.h +++ b/i386/i386/mp_desc.h @@ -27,6 +27,8 @@ #ifndef _I386_MP_DESC_H_ #define _I386_MP_DESC_H_ +#include <mach/kern_return.h> + #if MULTIPROCESSOR /* @@ -44,6 +46,8 @@ #include "gdt.h" #include "ldt.h" +#define AP_BOOT_ADDR 0x7000 + /* * The descriptor tables are together in a structure * allocated one per processor (except for the boot processor). @@ -70,11 +74,12 @@ extern struct task_tss *mp_ktss[NCPUS]; */ extern struct real_descriptor *mp_gdt[NCPUS]; +extern uint8_t solid_intstack[]; /* * Each CPU calls this routine to set up its descriptor tables. */ -extern struct mp_desc_table * mp_desc_init(int); +extern int mp_desc_init(int); extern void interrupt_processor(int cpu); @@ -88,4 +93,6 @@ extern kern_return_t cpu_start(int cpu); extern kern_return_t cpu_control(int cpu, const int *info, unsigned int count); +extern void interrupt_stack_alloc(void); + #endif /* _I386_MP_DESC_H_ */ diff --git a/i386/i386/xen.h b/i386/i386/xen.h index 8a17748a..dc8ca928 100644 --- a/i386/i386/xen.h +++ b/i386/i386/xen.h @@ -180,7 +180,7 @@ MACH_INLINE int hyp_mmu_update_pte(pt_entry_t pte, pt_entry_t val) #define HYP_BATCH_MMU_UPDATES 256 #define hyp_mmu_update_la(la, val) hyp_mmu_update_pte( \ - (kernel_page_dir[lin2pdenum_cont((vm_offset_t)(la))] & INTEL_PTE_PFN) \ + (ap_page_dir[0][lin2pdenum_cont((vm_offset_t)(la))] & INTEL_PTE_PFN) \ + ptenum((vm_offset_t)(la)) * sizeof(pt_entry_t), val) #endif diff --git a/i386/i386at/boothdr.S b/i386/i386at/boothdr.S index a4830326..79d186eb 100644 --- a/i386/i386at/boothdr.S +++ b/i386/i386at/boothdr.S @@ -1,6 +1,6 @@ #include <mach/machine/asm.h> - +#include <i386/apic.h> #include <i386/i386asm.h> /* @@ -54,7 +54,18 @@ boot_entry: movw %ax,%ss /* Switch to our own interrupt stack. */ - movl $_intstack+INTSTACK_SIZE,%esp + movl $solid_intstack+INTSTACK_SIZE-4, %esp + andl $0xfffffff0,%esp + + /* Enable local apic */ + xorl %eax, %eax + xorl %edx, %edx + movl $APIC_MSR, %ecx + rdmsr + orl $APIC_MSR_ENABLE, %eax + orl $APIC_MSR_BSP, %eax + movl $APIC_MSR, %ecx + wrmsr /* Reset EFLAGS to a known state. */ pushl $0 @@ -91,9 +102,6 @@ iplt_done: /* Jump into C code. */ call EXT(c_boot_entry) - .comm _intstack,INTSTACK_SIZE - .comm _eintstack,0 - .align 16 .word 0 boot_gdt_descr: diff --git a/i386/i386at/model_dep.c b/i386/i386at/model_dep.c index 1819526b..ad1128ca 100644 --- a/i386/i386at/model_dep.c +++ b/i386/i386at/model_dep.c @@ -134,11 +134,9 @@ extern char version[]; /* If set, reboot the system on ctrl-alt-delete. */ boolean_t rebootflag = FALSE; /* exported to kdintr */ -/* Interrupt stack. */ -static char int_stack[KERNEL_STACK_SIZE] __aligned(KERNEL_STACK_SIZE); -#if NCPUS <= 1 -vm_offset_t int_stack_top[1], int_stack_base[1]; -#endif +/* Interrupt stacks */ +extern vm_offset_t int_stack_top[], int_stack_base[]; +extern uint8_t solid_intstack[]; /* bottom */ #ifdef LINUX_DEV extern void linux_init(void); @@ -171,15 +169,20 @@ void machine_init(void) hyp_init(); #else /* MACH_HYP */ +#if (NCPUS > 1) + acpi_apic_init(); +#endif #if defined(APIC) - if (acpi_apic_init() != ACPI_SUCCESS) { - panic("APIC not found, unable to boot"); - } ioapic_configure(); lapic_enable_timer(); +#else + startrtclock(); +#endif #if (NCPUS > 1) smp_init(); +#endif +#if defined(APIC) #warning FIXME: Rather unmask them from their respective drivers /* kd */ unmask_irq(1); @@ -187,8 +190,7 @@ void machine_init(void) unmask_irq(4); /* com1 */ unmask_irq(3); -#endif /* NCPUS > 1 */ -#endif /* APIC */ +#endif #ifdef LINUX_DEV /* @@ -364,8 +366,6 @@ register_boot_data(const struct multiboot_raw_info *mbi) void i386at_init(void) { - /* XXX move to intel/pmap.h */ - extern pt_entry_t *kernel_page_dir; int i; /* @@ -444,7 +444,7 @@ i386at_init(void) * Kernel virtual address starts at VM_KERNEL_MIN_ADDRESS. * XXX make the BIOS page (page 0) read-only. */ - pmap_bootstrap(); + pmap_bootstrap(0); /* * Load physical segments into the VM system. @@ -453,47 +453,8 @@ i386at_init(void) */ biosmem_setup(); - /* - * We'll have to temporarily install a direct mapping - * between physical memory and low linear memory, - * until we start using our new kernel segment descriptors. - */ -#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS - vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS; - if ((vm_offset_t)(-delta) < delta) - delta = (vm_offset_t)(-delta); - int nb_direct = delta >> PDESHIFT; - for (i = 0; i < nb_direct; i++) - kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = - kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i]; -#endif - /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */ -#if VM_MIN_KERNEL_ADDRESS != 0 - kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = - kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)]; -#endif + pmap_make_temporary_mapping(0); -#ifdef MACH_PV_PAGETABLES - for (i = 0; i < PDPNUM; i++) - pmap_set_page_readonly_init((void*) kernel_page_dir + i * INTEL_PGBYTES); -#if PAE - pmap_set_page_readonly_init(kernel_pmap->pdpbase); -#endif /* PAE */ -#endif /* MACH_PV_PAGETABLES */ -#if PAE -#ifdef __x86_64__ - set_cr3((unsigned long)_kvtophys(kernel_pmap->l4base)); -#else - set_cr3((unsigned long)_kvtophys(kernel_pmap->pdpbase)); -#endif -#ifndef MACH_HYP - if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE)) - panic("CPU doesn't have support for PAE."); - set_cr4(get_cr4() | CR4_PAE); -#endif /* MACH_HYP */ -#else - set_cr3((unsigned long)_kvtophys(kernel_page_dir)); -#endif /* PAE */ #ifndef MACH_HYP /* Turn paging on. * Also set the WP bit so that on 486 or better processors @@ -525,40 +486,13 @@ i386at_init(void) mp_desc_init(0); #endif // NCPUS -#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS - /* Get rid of the temporary direct mapping and flush it out of the TLB. */ - for (i = 0 ; i < nb_direct; i++) { -#ifdef MACH_XEN -#ifdef MACH_PSEUDO_PHYS - if (!hyp_mmu_update_pte(kv_to_ma(&kernel_page_dir[lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS) + i]), 0)) -#else /* MACH_PSEUDO_PHYS */ - if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i * INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL)) -#endif /* MACH_PSEUDO_PHYS */ - printf("couldn't unmap frame %d\n", i); -#else /* MACH_XEN */ - kernel_page_dir[lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = 0; -#endif /* MACH_XEN */ - } -#endif - /* Keep BIOS memory mapped */ -#if VM_MIN_KERNEL_ADDRESS != 0 - kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = - kernel_page_dir[lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)]; -#endif - - /* Not used after boot, better give it back. */ -#ifdef MACH_XEN - hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS); -#endif /* MACH_XEN */ - - flush_tlb(); + pmap_remove_temporary_mapping(0); #ifdef MACH_XEN hyp_p2m_init(); #endif /* MACH_XEN */ - int_stack_base[0] = (vm_offset_t)&int_stack; - int_stack_top[0] = int_stack_base[0] + KERNEL_STACK_SIZE - 4; + interrupt_stack_alloc(); } /* @@ -650,7 +584,6 @@ void c_boot_entry(vm_offset_t bi) #endif /* MACH_KDB */ machine_slot[0].is_cpu = TRUE; - machine_slot[0].running = TRUE; machine_slot[0].cpu_subtype = CPU_SUBTYPE_AT386; switch (cpu_type) @@ -698,6 +631,8 @@ startrtclock(void) { #ifndef APIC clkstart(); + asm ("sti"); + unmask_irq(0); #endif } diff --git a/i386/i386at/model_dep.h b/i386/i386at/model_dep.h index a972695f..f72ddc3b 100644 --- a/i386/i386at/model_dep.h +++ b/i386/i386at/model_dep.h @@ -28,10 +28,9 @@ extern vm_offset_t int_stack_top[NCPUS], int_stack_base[NCPUS]; /* Check whether P points to the interrupt stack. */ -#define ON_INT_STACK(P) (((P) & ~(KERNEL_STACK_SIZE-1)) == int_stack_base[0]) +#define ON_INT_STACK(P) (((P) & ~(NCPUS*INTSTACK_SIZE-1)) == int_stack_base[0]) extern vm_offset_t timemmap(dev_t dev, vm_offset_t off, vm_prot_t prot); - void inittodr(void); boolean_t init_alloc_aligned(vm_size_t size, vm_offset_t *addrp); diff --git a/i386/intel/pmap.c b/i386/intel/pmap.c index 0f2ad641..490f8459 100644 --- a/i386/intel/pmap.c +++ b/i386/intel/pmap.c @@ -101,6 +101,8 @@ * Private data structures. */ +pt_entry_t *ap_page_dir[NCPUS] = { 0 }; + /* * For each vm_page_t, there is a list of all currently * valid virtual mappings of that page. An entry is @@ -394,6 +396,8 @@ boolean_t cpu_update_needed[NCPUS]; #define current_pmap() (vm_map_pmap(current_thread()->task->map)) #define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0) +struct pmap tmp_pmap_store[NCPUS]; +pmap_t tmp_pmap[NCPUS]; struct pmap kernel_pmap_store; pmap_t kernel_pmap; @@ -416,12 +420,6 @@ int ptes_per_vm_page; /* number of hardware ptes needed unsigned int inuse_ptepages_count = 0; /* debugging */ -/* - * Pointer to the basic page directory for the kernel. - * Initialized by pmap_bootstrap(). - */ -pt_entry_t *kernel_page_dir; - /* * Two slots for temporary physical page mapping, to allow for * physical-to-physical transfers. @@ -592,12 +590,13 @@ vm_offset_t pmap_map_bd( * and direct-map all physical memory. * Called with mapping off. */ -void pmap_bootstrap(void) +void pmap_bootstrap(int cpu) { /* * Mapping is turned off; we must reference only physical addresses. * The load image of the system is to be mapped 1-1 physical = virtual. */ + pmap_t mykernel_pmap; /* * Set ptes_per_vm_page for general use. @@ -605,22 +604,24 @@ void pmap_bootstrap(void) #if 0 ptes_per_vm_page = PAGE_SIZE / INTEL_PGBYTES; #endif - - /* - * The kernel's pmap is statically allocated so we don't - * have to use pmap_create, which is unlikely to work - * correctly at this part of the boot sequence. - */ - - kernel_pmap = &kernel_pmap_store; + if (cpu != 0) { + mykernel_pmap = tmp_pmap[cpu] = &tmp_pmap_store[cpu]; + } else { + /* + * The kernel's pmap is statically allocated so we don't + * have to use pmap_create, which is unlikely to work + * correctly at this part of the boot sequence. + */ + mykernel_pmap = kernel_pmap = tmp_pmap[0] = &kernel_pmap_store; + } #if NCPUS > 1 lock_init(&pmap_system_lock, FALSE); /* NOT a sleep lock */ #endif /* NCPUS > 1 */ - simple_lock_init(&kernel_pmap->lock); + simple_lock_init(&mykernel_pmap->lock); - kernel_pmap->ref_count = 1; + mykernel_pmap->ref_count = 1; /* * Determine the kernel virtual address range. @@ -644,15 +645,15 @@ void pmap_bootstrap(void) { vm_offset_t addr; init_alloc_aligned(PDPNUM * INTEL_PGBYTES, &addr); - kernel_page_dir = (pt_entry_t*)phystokv(addr); + ap_page_dir[cpu] = (pt_entry_t*)phystokv(addr); } - kernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page()); - memset(kernel_pmap->pdpbase, 0, INTEL_PGBYTES); + mykernel_pmap->pdpbase = (pt_entry_t*)phystokv(pmap_grab_page()); + memset(mykernel_pmap->pdpbase, 0, INTEL_PGBYTES); { int i; for (i = 0; i < PDPNUM; i++) - WRITE_PTE(&kernel_pmap->pdpbase[i], - pa_to_pte(_kvtophys((void *) kernel_page_dir + WRITE_PTE(&mykernel_pmap->pdpbase[i], + pa_to_pte(_kvtophys((void *) ap_page_dir[cpu] + i * INTEL_PGBYTES)) | INTEL_PTE_VALID #if !defined(MACH_HYP) || defined(MACH_PV_PAGETABLES) @@ -662,23 +663,23 @@ void pmap_bootstrap(void) } #ifdef __x86_64__ #ifdef MACH_HYP - kernel_pmap->user_l4base = NULL; - kernel_pmap->user_pdpbase = NULL; + mykernel_pmap->user_l4base = NULL; + mykernel_pmap->user_pdpbase = NULL; #endif - kernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page()); - memset(kernel_pmap->l4base, 0, INTEL_PGBYTES); - WRITE_PTE(&kernel_pmap->l4base[0], pa_to_pte(_kvtophys(kernel_pmap->pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE); + mykernel_pmap->l4base = (pt_entry_t*)phystokv(pmap_grab_page()); + memset(mykernel_pmap->l4base, 0, INTEL_PGBYTES); + WRITE_PTE(&mykernel_pmap->l4base[0], pa_to_pte(_kvtophys(mykernel_pmap->pdpbase)) | INTEL_PTE_VALID | INTEL_PTE_WRITE); #ifdef MACH_PV_PAGETABLES - pmap_set_page_readonly_init(kernel_pmap->l4base); + pmap_set_page_readonly_init(mykernel_pmap->l4base); #endif #endif /* x86_64 */ #else /* PAE */ - kernel_pmap->dirbase = kernel_page_dir = (pt_entry_t*)phystokv(pmap_grab_page()); + mykernel_pmap->dirbase = ap_page_dir[cpu] = (pt_entry_t*)phystokv(pmap_grab_page()); #endif /* PAE */ { unsigned i; for (i = 0; i < NPDES; i++) - kernel_page_dir[i] = 0; + ap_page_dir[cpu][i] = 0; } #ifdef MACH_PV_PAGETABLES @@ -754,7 +755,7 @@ void pmap_bootstrap(void) */ for (va = phystokv(0); va >= phystokv(0) && va < kernel_virtual_end; ) { - pt_entry_t *pde = kernel_page_dir + lin2pdenum_cont(kvtolin(va)); + pt_entry_t *pde = ap_page_dir[cpu] + lin2pdenum_cont(kvtolin(va)); pt_entry_t *ptable = (pt_entry_t*)phystokv(pmap_grab_page()); pt_entry_t *pte; @@ -1261,7 +1262,7 @@ pmap_t pmap_create(vm_size_t size) return PMAP_NULL; } memcpy(page_dir[i], - (void *) kernel_page_dir + i * INTEL_PGBYTES, + (void *) ap_page_dir[0] + i * INTEL_PGBYTES, INTEL_PGBYTES); } @@ -3030,3 +3031,96 @@ pmap_unmap_page_zero (void) #endif /* MACH_PV_PAGETABLES */ } #endif /* __i386__ */ + +void +pmap_make_temporary_mapping(int cpu) +{ + int i; + + /* + * We'll have to temporarily install a direct mapping + * between physical memory and low linear memory, + * until we start using our new kernel segment descriptors. + */ +#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS + vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS; + if ((vm_offset_t)(-delta) < delta) + delta = (vm_offset_t)(-delta); + int nb_direct = delta >> PDESHIFT; + for (i = 0; i < nb_direct; i++) + ap_page_dir[cpu][lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = + ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS) + i]; +#endif + /* We need BIOS memory mapped at 0xc0000 & co for BIOS accesses */ +#if VM_MIN_KERNEL_ADDRESS != 0 + ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = + ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)]; +#endif + +#ifdef MACH_PV_PAGETABLES + for (i = 0; i < PDPNUM; i++) + pmap_set_page_readonly_init((void*) ap_page_dir[cpu] + i * INTEL_PGBYTES); +#if PAE + pmap_set_page_readonly_init(tmp_pmap[cpu]->pdpbase); +#endif /* PAE */ +#endif /* MACH_PV_PAGETABLES */ + + pmap_set_page_dir(cpu); +} + +void +pmap_set_page_dir(int cpu) +{ +#if PAE +#ifdef __x86_64__ + set_cr3((unsigned long)_kvtophys(tmp_pmap[cpu]->l4base)); +#else + set_cr3((unsigned long)_kvtophys(tmp_pmap[cpu]->pdpbase)); +#endif +#ifndef MACH_HYP + if (!CPU_HAS_FEATURE(CPU_FEATURE_PAE)) + panic("CPU doesn't have support for PAE."); + set_cr4(get_cr4() | CR4_PAE); +#endif /* MACH_HYP */ +#else + set_cr3((unsigned long)_kvtophys(ap_page_dir[cpu])); +#endif /* PAE */ +} + +void +pmap_remove_temporary_mapping(int cpu) +{ + int i; + +#if INIT_VM_MIN_KERNEL_ADDRESS != LINEAR_MIN_KERNEL_ADDRESS + vm_offset_t delta = INIT_VM_MIN_KERNEL_ADDRESS - LINEAR_MIN_KERNEL_ADDRESS; + if ((vm_offset_t)(-delta) < delta) + delta = (vm_offset_t)(-delta); + int nb_direct = delta >> PDESHIFT; + /* Get rid of the temporary direct mapping and flush it out of the TLB. */ + for (i = 0 ; i < nb_direct; i++) { +#ifdef MACH_XEN +#ifdef MACH_PSEUDO_PHYS + if (!hyp_mmu_update_pte(kv_to_ma(&ap_page_dir[cpu][lin2pdenum_cont(VM_MIN_KERNEL_ADDRESS) + i]), 0)) +#else /* MACH_PSEUDO_PHYS */ + if (hyp_do_update_va_mapping(VM_MIN_KERNEL_ADDRESS + i * INTEL_PGBYTES, 0, UVMF_INVLPG | UVMF_ALL)) +#endif /* MACH_PSEUDO_PHYS */ + printf("couldn't unmap frame %d\n", i); +#else /* MACH_XEN */ + ap_page_dir[cpu][lin2pdenum_cont(INIT_VM_MIN_KERNEL_ADDRESS) + i] = 0; +#endif /* MACH_XEN */ + } +#endif + /* Keep BIOS memory mapped */ +#if VM_MIN_KERNEL_ADDRESS != 0 + ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS)] = + ap_page_dir[cpu][lin2pdenum_cont(LINEAR_MIN_KERNEL_ADDRESS)]; +#endif + + /* Not used after boot, better give it back. */ +#ifdef MACH_XEN + hyp_free_page(0, (void*) VM_MIN_KERNEL_ADDRESS); +#endif /* MACH_XEN */ + + flush_tlb(); +} diff --git a/i386/intel/pmap.h b/i386/intel/pmap.h index bad640c1..2ec40268 100644 --- a/i386/intel/pmap.h +++ b/i386/intel/pmap.h @@ -474,13 +474,19 @@ pt_entry_t *pmap_pte(const pmap_t pmap, vm_offset_t addr); #define pmap_attribute(pmap,addr,size,attr,value) \ (KERN_INVALID_ADDRESS) +extern pt_entry_t *ap_page_dir[NCPUS]; + /* * Bootstrap the system enough to run with virtual memory. * Allocate the kernel page directory and page tables, * and direct-map all physical memory. * Called with mapping off. */ -extern void pmap_bootstrap(void); +extern void pmap_bootstrap(int cpu); + +extern void pmap_set_page_dir(int cpu); +extern void pmap_make_temporary_mapping(int cpu); +extern void pmap_remove_temporary_mapping(int cpu); extern void pmap_unmap_page_zero (void); diff --git a/linux/dev/arch/i386/kernel/irq.c b/linux/dev/arch/i386/kernel/irq.c index 67feea84..6f99003e 100644 --- a/linux/dev/arch/i386/kernel/irq.c +++ b/linux/dev/arch/i386/kernel/irq.c @@ -31,6 +31,7 @@ #include <i386/spl.h> #include <i386/irq.h> #include <i386/pit.h> +#include <i386/model_dep.h> #define MACH_INCLUDE #include <linux/mm.h> @@ -421,7 +422,7 @@ reserve_mach_irqs (void) { unsigned int i; - for (i = 0; i < NINTR; i++) + for (i = 1; i < NINTR; i++) { if (ivect[i] != intnull) /* This dummy action does not specify SA_SHIRQ, so @@ -707,7 +708,6 @@ void init_IRQ (void) { char *p; - int latch = (CLKNUM + hz / 2) / hz; /* * Ensure interrupts are disabled. @@ -715,19 +715,12 @@ init_IRQ (void) (void) splhigh (); #ifndef APIC - /* - * Program counter 0 of 8253 to interrupt hz times per second. - */ - outb_p (PIT_C0 | PIT_SQUAREMODE | PIT_READMODE, PITCTL_PORT); - outb_p (latch & 0xff, PITCTR0_PORT); - outb (latch >> 8, PITCTR0_PORT); -#endif - /* * Install our clock interrupt handler. */ old_clock_handler = ivect[0]; ivect[0] = linux_timer_intr; +#endif reserve_mach_irqs (); diff --git a/linux/dev/init/main.c b/linux/dev/init/main.c index 6d853957..207724f3 100644 --- a/linux/dev/init/main.c +++ b/linux/dev/init/main.c @@ -160,7 +160,9 @@ linux_init (void) pcmcia_init (); #endif +#ifndef APIC restore_IRQ (); +#endif linux_auto_config = 0; } -- 2.34.1