On Sun, 28 Nov 2010, Philip Guenther wrote: > On Sunday, November 28, 2010, David Gwynne <d...@cvs.openbsd.org> wrote: ... > > Log message: > > bump the number of supported cpus from 32 up to 64. lets me attach and use > > all 48 cores in one of my boxes. > > > > requested by deraadt@ > > made possible by the recent pmap diff by kettenis@ > > Doesn't pm_cpus in the pmap need to change to a u_int64_t and locore.S > and pmap.c (at least) change to match?
Here's a diff to do that. It also corrects the x86_atomic_*_{l,ul}() macros to actually expand to the functions that operate on longs instead of ints (64- and 32-bits, respectively) and removes the unused x86_multicast_ipi() function. Finally, tlb_shoot_wait has been operated on with 32bit atomic ops, so make it an (unsigned) int instead of a long. (This would have never worked on a big-endian platform.) Compile tested only so far (about to get on plane). Philip Index: amd64/intr.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/intr.c,v retrieving revision 1.25 diff -u -p -r1.25 intr.c --- amd64/intr.c 20 Sep 2010 06:33:46 -0000 1.25 +++ amd64/intr.c 29 Nov 2010 03:01:36 -0000 @@ -498,7 +498,7 @@ intr_disestablish(struct intrhand *ih) simple_lock(&ci->ci_slock); pic->pic_hwmask(pic, ih->ih_pin); - x86_atomic_clearbits_l(&ci->ci_ipending, (1 << ih->ih_slot)); + x86_atomic_clearbits_u32(&ci->ci_ipending, (1 << ih->ih_slot)); /* * Remove the handler from the chain. Index: amd64/ipi.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/ipi.c,v retrieving revision 1.8 diff -u -p -r1.8 ipi.c --- amd64/ipi.c 26 Jun 2008 05:42:09 -0000 1.8 +++ amd64/ipi.c 29 Nov 2010 03:01:36 -0000 @@ -50,7 +50,7 @@ x86_send_ipi(struct cpu_info *ci, int ip { int ret; - x86_atomic_setbits_l(&ci->ci_ipis, ipimask); + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); /* Don't send IPI to cpu which isn't (yet) running. */ if (!(ci->ci_flags & CPUF_RUNNING)) @@ -88,7 +88,7 @@ x86_broadcast_ipi(int ipimask) continue; if ((ci->ci_flags & CPUF_RUNNING) == 0) continue; - x86_atomic_setbits_l(&ci->ci_ipis, ipimask); + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); count++; } if (!count) @@ -98,23 +98,6 @@ x86_broadcast_ipi(int ipimask) } void -x86_multicast_ipi(int cpumask, int ipimask) -{ - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; - - cpumask &= ~(1U << cpu_number()); - if (cpumask == 0) - return; - - CPU_INFO_FOREACH(cii, ci) { - if ((cpumask & (1U << ci->ci_cpuid)) == 0) - continue; - x86_send_ipi(ci, ipimask); - } -} - -void x86_ipi_handler(void) { extern struct evcount ipi_count; @@ -122,7 +105,7 @@ x86_ipi_handler(void) u_int32_t pending; int bit; - pending = x86_atomic_testset_ul(&ci->ci_ipis, 0); + pending = x86_atomic_testset_u32(&ci->ci_ipis, 0); for (bit = 0; bit < X86_NIPI && pending; bit++) { if (pending & (1<<bit)) { Index: amd64/locore.S =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v retrieving revision 1.43 diff -u -p -r1.43 locore.S --- amd64/locore.S 13 Nov 2010 04:16:42 -0000 1.43 +++ amd64/locore.S 29 Nov 2010 03:01:37 -0000 @@ -762,7 +762,7 @@ ENTRY(cpu_switchto) /* clear the old pmap's bit for the cpu */ movq PCB_PMAP(%r13),%rcx lock - btrl %edi,PM_CPUS(%rcx) + btrq %rdi,PM_CPUS(%rcx) /* Save stack pointers. */ movq %rsp,PCB_RSP(%r13) @@ -800,9 +800,11 @@ switch_exited: /* set the new pmap's bit for the cpu */ movl CPUVAR(CPUID),%edi movq PCB_PMAP(%r13),%rcx - movl PM_CPUS(%rcx),%eax +#ifdef DIAGNOSTIC + movq PM_CPUS(%rcx),%rax +#endif lock - btsl %edi,PM_CPUS(%rcx) + btsq %rdi,PM_CPUS(%rcx) #ifdef DIAGNOSTIC jc _C_LABEL(switch_pmcpu_set) #endif Index: amd64/pmap.c =================================================================== RCS file: /cvs/src/sys/arch/amd64/amd64/pmap.c,v retrieving revision 1.59 diff -u -p -r1.59 pmap.c --- amd64/pmap.c 20 Nov 2010 20:33:23 -0000 1.59 +++ amd64/pmap.c 29 Nov 2010 03:01:39 -0000 @@ -351,7 +351,7 @@ static __inline boolean_t pmap_is_active(struct pmap *pmap, int cpu_id) { return (pmap == pmap_kernel() || - (pmap->pm_cpus & (1U << cpu_id)) != 0); + (pmap->pm_cpus & (1ULL << cpu_id)) != 0); } static __inline u_int @@ -1064,7 +1064,7 @@ pmap_destroy(struct pmap *pmap) #ifdef DIAGNOSTIC if (pmap->pm_cpus != 0) - printf("pmap_destroy: pmap %p cpus=0x%lx\n", + printf("pmap_destroy: pmap %p cpus=0x%llx\n", (void *)pmap, pmap->pm_cpus); #endif @@ -1127,7 +1127,7 @@ pmap_activate(struct proc *p) /* * mark the pmap in use by this processor. */ - x86_atomic_setbits_ul(&pmap->pm_cpus, (1U << cpu_number())); + x86_atomic_setbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); } } @@ -1143,7 +1143,7 @@ pmap_deactivate(struct proc *p) /* * mark the pmap no longer in use by this processor. */ - x86_atomic_clearbits_ul(&pmap->pm_cpus, (1U << cpu_number())); + x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); } @@ -2437,7 +2437,7 @@ pmap_virtual_space(vaddr_t *vstartp, vad * cpus we need to send the IPI to, then we grab the counter, then * we send the IPIs, then we finally do our own shootdown. * - * Our shootdown is last to make it parallell with the other cpus + * Our shootdown is last to make it parallel with the other cpus * to shorten the spin time. * * Notice that we depend on failures to send IPIs only being able to @@ -2446,7 +2446,7 @@ pmap_virtual_space(vaddr_t *vstartp, vad * release the lock if we get an interrupt in a bad moment. */ -volatile long tlb_shoot_wait; +volatile unsigned int tlb_shoot_wait; volatile vaddr_t tlb_shoot_addr1; volatile vaddr_t tlb_shoot_addr2; @@ -2456,27 +2456,27 @@ pmap_tlb_shootpage(struct pmap *pm, vadd { struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; - long wait = 0; - int mask = 0; + int wait = 0; + u_int64_t mask = 0; CPU_INFO_FOREACH(cii, ci) { if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || !(ci->ci_flags & CPUF_RUNNING)) continue; - mask |= 1 << ci->ci_cpuid; + mask |= 1ULL << ci->ci_cpuid; wait++; } if (wait > 0) { int s = splvm(); - while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { + while (x86_atomic_cas_u32(&tlb_shoot_wait, 0, wait) != 0) { while (tlb_shoot_wait != 0) SPINLOCK_SPIN_HOOK; } tlb_shoot_addr1 = va; CPU_INFO_FOREACH(cii, ci) { - if ((mask & 1 << ci->ci_cpuid) == 0) + if ((mask & 1ULL << ci->ci_cpuid) == 0) continue; if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0) panic("pmap_tlb_shootpage: ipi failed"); @@ -2493,29 +2493,29 @@ pmap_tlb_shootrange(struct pmap *pm, vad { struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; - long wait = 0; - int mask = 0; + int wait = 0; + u_int64_t mask = 0; vaddr_t va; CPU_INFO_FOREACH(cii, ci) { if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || !(ci->ci_flags & CPUF_RUNNING)) continue; - mask |= 1 << ci->ci_cpuid; + mask |= 1ULL << ci->ci_cpuid; wait++; } if (wait > 0) { int s = splvm(); - while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { + while (x86_atomic_cas_u32(&tlb_shoot_wait, 0, wait) != 0) { while (tlb_shoot_wait != 0) SPINLOCK_SPIN_HOOK; } tlb_shoot_addr1 = sva; tlb_shoot_addr2 = eva; CPU_INFO_FOREACH(cii, ci) { - if ((mask & 1 << ci->ci_cpuid) == 0) + if ((mask & 1ULL << ci->ci_cpuid) == 0) continue; if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0) panic("pmap_tlb_shootrange: ipi failed"); @@ -2533,26 +2533,26 @@ pmap_tlb_shoottlb(void) { struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; - long wait = 0; - int mask = 0; + int wait = 0; + u_int64_t mask = 0; CPU_INFO_FOREACH(cii, ci) { if (ci == self || !(ci->ci_flags & CPUF_RUNNING)) continue; - mask |= 1 << ci->ci_cpuid; + mask |= 1ULL << ci->ci_cpuid; wait++; } if (wait) { int s = splvm(); - while (x86_atomic_cas_ul(&tlb_shoot_wait, 0, wait) != 0) { + while (x86_atomic_cas_u32(&tlb_shoot_wait, 0, wait) != 0) { while (tlb_shoot_wait != 0) SPINLOCK_SPIN_HOOK; } CPU_INFO_FOREACH(cii, ci) { - if ((mask & 1 << ci->ci_cpuid) == 0) + if ((mask & 1ULL << ci->ci_cpuid) == 0) continue; if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0) panic("pmap_tlb_shoottlb: ipi failed"); Index: include/atomic.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/atomic.h,v retrieving revision 1.6 diff -u -p -r1.6 atomic.h --- include/atomic.h 25 May 2007 16:22:11 -0000 1.6 +++ include/atomic.h 29 Nov 2010 03:01:39 -0000 @@ -120,12 +120,10 @@ x86_atomic_clearbits_u64(volatile u_int6 __asm __volatile(LOCK " andq %1,%0" : "=m" (*ptr) : "ir" (~bits)); } -#define x86_atomic_testset_ul x86_atomic_testset_u32 +#define x86_atomic_testset_ul x86_atomic_testset_u64 #define x86_atomic_testset_i x86_atomic_testset_i32 -#define x86_atomic_setbits_l x86_atomic_setbits_u32 -#define x86_atomic_setbits_ul x86_atomic_setbits_u32 -#define x86_atomic_clearbits_l x86_atomic_clearbits_u32 -#define x86_atomic_clearbits_ul x86_atomic_clearbits_u32 +#define x86_atomic_setbits_ul x86_atomic_setbits_u64 +#define x86_atomic_clearbits_ul x86_atomic_clearbits_u64 #define atomic_setbits_int x86_atomic_setbits_u32 #define atomic_clearbits_int x86_atomic_clearbits_u32 @@ -134,3 +132,4 @@ x86_atomic_clearbits_u64(volatile u_int6 #endif /* defined(_KERNEL) && !defined(_LOCORE) */ #endif /* _AMD64_ATOMIC_H_ */ + Index: include/intr.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/intr.h,v retrieving revision 1.19 diff -u -p -r1.19 intr.h --- include/intr.h 31 May 2010 21:39:56 -0000 1.19 +++ include/intr.h 29 Nov 2010 03:01:39 -0000 @@ -215,7 +215,6 @@ void intr_printconfig(void); int x86_send_ipi(struct cpu_info *, int); int x86_fast_ipi(struct cpu_info *, int); void x86_broadcast_ipi(int); -void x86_multicast_ipi(int, int); void x86_ipi_handler(void); void x86_intlock(struct intrframe); void x86_intunlock(struct intrframe); Index: include/pmap.h =================================================================== RCS file: /cvs/src/sys/arch/amd64/include/pmap.h,v retrieving revision 1.35 diff -u -p -r1.35 pmap.h --- include/pmap.h 26 Oct 2010 05:49:10 -0000 1.35 +++ include/pmap.h 29 Nov 2010 03:01:40 -0000 @@ -318,7 +318,7 @@ struct pmap { /* pointer to a PTP in our pmap */ struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ - u_int32_t pm_cpus; /* mask of CPUs using pmap */ + u_int64_t pm_cpus; /* mask of CPUs using pmap */ }; /*