On Sun, 28 Nov 2010, Philip Guenther wrote: > On Sun, 28 Nov 2010, Philip Guenther wrote: > > On Sunday, November 28, 2010, David Gwynne <d...@cvs.openbsd.org> wrote: > ... > > > Log message: > > > bump the number of supported cpus from 32 up to 64. lets me attach and use > > > all 48 cores in one of my boxes. > > > > > > requested by deraadt@ > > > made possible by the recent pmap diff by kettenis@ > > > > Doesn't pm_cpus in the pmap need to change to a u_int64_t and locore.S > > and pmap.c (at least) change to match? > > Here's a diff to do that. > > It also corrects the x86_atomic_*_{l,ul}() macros to actually expand to > the functions that operate on longs instead of ints (64- and 32-bits, > respectively) and removes the unused x86_multicast_ipi() function. > Finally, tlb_shoot_wait has been operated on with 32bit atomic ops, so > make it an (unsigned) int instead of a long. (This would have never > worked on a big-endian platform.) > > Compile tested only so far (about to get on plane).
Revised diff that doesn't include my bogus flailing on x86_atomic_cas_ul() (which does operate on unsigned longs) or tlb_shoot_wait. I'm running this now on my lowly little 4 core amd64. Philip Guenther diff -ru t/amd64/intr.c ./amd64/intr.c --- t/amd64/intr.c Sun Nov 28 20:27:17 2010 +++ ./amd64/intr.c Sun Nov 28 18:48:08 2010 @@ -498,7 +498,7 @@ simple_lock(&ci->ci_slock); pic->pic_hwmask(pic, ih->ih_pin); - x86_atomic_clearbits_l(&ci->ci_ipending, (1 << ih->ih_slot)); + x86_atomic_clearbits_u32(&ci->ci_ipending, (1 << ih->ih_slot)); /* * Remove the handler from the chain. diff -ru t/amd64/ipi.c ./amd64/ipi.c --- t/amd64/ipi.c Sun Nov 28 20:27:17 2010 +++ ./amd64/ipi.c Sun Nov 28 18:48:46 2010 @@ -50,7 +50,7 @@ { int ret; - x86_atomic_setbits_l(&ci->ci_ipis, ipimask); + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); /* Don't send IPI to cpu which isn't (yet) running. */ if (!(ci->ci_flags & CPUF_RUNNING)) @@ -88,7 +88,7 @@ continue; if ((ci->ci_flags & CPUF_RUNNING) == 0) continue; - x86_atomic_setbits_l(&ci->ci_ipis, ipimask); + x86_atomic_setbits_u32(&ci->ci_ipis, ipimask); count++; } if (!count) @@ -98,23 +98,6 @@ } void -x86_multicast_ipi(int cpumask, int ipimask) -{ - struct cpu_info *ci; - CPU_INFO_ITERATOR cii; - - cpumask &= ~(1U << cpu_number()); - if (cpumask == 0) - return; - - CPU_INFO_FOREACH(cii, ci) { - if ((cpumask & (1U << ci->ci_cpuid)) == 0) - continue; - x86_send_ipi(ci, ipimask); - } -} - -void x86_ipi_handler(void) { extern struct evcount ipi_count; @@ -122,7 +105,7 @@ u_int32_t pending; int bit; - pending = x86_atomic_testset_ul(&ci->ci_ipis, 0); + pending = x86_atomic_testset_u32(&ci->ci_ipis, 0); for (bit = 0; bit < X86_NIPI && pending; bit++) { if (pending & (1<<bit)) { diff -ru t/amd64/locore.S ./amd64/locore.S --- t/amd64/locore.S Sun Nov 28 20:27:17 2010 +++ ./amd64/locore.S Sun Nov 28 19:00:57 2010 @@ -762,7 +762,7 @@ /* clear the old pmap's bit for the cpu */ movq PCB_PMAP(%r13),%rcx lock - btrl %edi,PM_CPUS(%rcx) + btrq %rdi,PM_CPUS(%rcx) /* Save stack pointers. */ movq %rsp,PCB_RSP(%r13) @@ -800,9 +800,11 @@ /* set the new pmap's bit for the cpu */ movl CPUVAR(CPUID),%edi movq PCB_PMAP(%r13),%rcx - movl PM_CPUS(%rcx),%eax +#ifdef DIAGNOSTIC + movq PM_CPUS(%rcx),%rax +#endif lock - btsl %edi,PM_CPUS(%rcx) + btsq %rdi,PM_CPUS(%rcx) #ifdef DIAGNOSTIC jc _C_LABEL(switch_pmcpu_set) #endif diff -ru t/amd64/pmap.c ./amd64/pmap.c --- t/amd64/pmap.c Sun Nov 28 20:36:05 2010 +++ ./amd64/pmap.c Sun Nov 28 20:32:48 2010 @@ -351,7 +351,7 @@ pmap_is_active(struct pmap *pmap, int cpu_id) { return (pmap == pmap_kernel() || - (pmap->pm_cpus & (1U << cpu_id)) != 0); + (pmap->pm_cpus & (1ULL << cpu_id)) != 0); } static __inline u_int @@ -1064,7 +1064,7 @@ #ifdef DIAGNOSTIC if (pmap->pm_cpus != 0) - printf("pmap_destroy: pmap %p cpus=0x%lx\n", + printf("pmap_destroy: pmap %p cpus=0x%llx\n", (void *)pmap, pmap->pm_cpus); #endif @@ -1127,7 +1127,7 @@ /* * mark the pmap in use by this processor. */ - x86_atomic_setbits_ul(&pmap->pm_cpus, (1U << cpu_number())); + x86_atomic_setbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); } } @@ -1143,7 +1143,7 @@ /* * mark the pmap no longer in use by this processor. */ - x86_atomic_clearbits_ul(&pmap->pm_cpus, (1U << cpu_number())); + x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number())); } @@ -2437,7 +2437,7 @@ * cpus we need to send the IPI to, then we grab the counter, then * we send the IPIs, then we finally do our own shootdown. * - * Our shootdown is last to make it parallell with the other cpus + * Our shootdown is last to make it parallel with the other cpus * to shorten the spin time. * * Notice that we depend on failures to send IPIs only being able to @@ -2457,13 +2457,13 @@ struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; long wait = 0; - int mask = 0; + u_int64_t mask = 0; CPU_INFO_FOREACH(cii, ci) { if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || !(ci->ci_flags & CPUF_RUNNING)) continue; - mask |= 1 << ci->ci_cpuid; + mask |= 1ULL << ci->ci_cpuid; wait++; } @@ -2476,7 +2476,7 @@ } tlb_shoot_addr1 = va; CPU_INFO_FOREACH(cii, ci) { - if ((mask & 1 << ci->ci_cpuid) == 0) + if ((mask & 1ULL << ci->ci_cpuid) == 0) continue; if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0) panic("pmap_tlb_shootpage: ipi failed"); @@ -2494,14 +2494,14 @@ struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; long wait = 0; - int mask = 0; + u_int64_t mask = 0; vaddr_t va; CPU_INFO_FOREACH(cii, ci) { if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) || !(ci->ci_flags & CPUF_RUNNING)) continue; - mask |= 1 << ci->ci_cpuid; + mask |= 1ULL << ci->ci_cpuid; wait++; } @@ -2515,7 +2515,7 @@ tlb_shoot_addr1 = sva; tlb_shoot_addr2 = eva; CPU_INFO_FOREACH(cii, ci) { - if ((mask & 1 << ci->ci_cpuid) == 0) + if ((mask & 1ULL << ci->ci_cpuid) == 0) continue; if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0) panic("pmap_tlb_shootrange: ipi failed"); @@ -2534,12 +2534,12 @@ struct cpu_info *ci, *self = curcpu(); CPU_INFO_ITERATOR cii; long wait = 0; - int mask = 0; + u_int64_t mask = 0; CPU_INFO_FOREACH(cii, ci) { if (ci == self || !(ci->ci_flags & CPUF_RUNNING)) continue; - mask |= 1 << ci->ci_cpuid; + mask |= 1ULL << ci->ci_cpuid; wait++; } @@ -2552,7 +2552,7 @@ } CPU_INFO_FOREACH(cii, ci) { - if ((mask & 1 << ci->ci_cpuid) == 0) + if ((mask & 1ULL << ci->ci_cpuid) == 0) continue; if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0) panic("pmap_tlb_shoottlb: ipi failed"); diff -ru t/include/atomic.h ./include/atomic.h --- t/include/atomic.h Sun Nov 28 20:27:17 2010 +++ ./include/atomic.h Sun Nov 28 20:33:58 2010 @@ -120,12 +120,10 @@ __asm __volatile(LOCK " andq %1,%0" : "=m" (*ptr) : "ir" (~bits)); } -#define x86_atomic_testset_ul x86_atomic_testset_u32 +#define x86_atomic_testset_ul x86_atomic_testset_u64 #define x86_atomic_testset_i x86_atomic_testset_i32 -#define x86_atomic_setbits_l x86_atomic_setbits_u32 -#define x86_atomic_setbits_ul x86_atomic_setbits_u32 -#define x86_atomic_clearbits_l x86_atomic_clearbits_u32 -#define x86_atomic_clearbits_ul x86_atomic_clearbits_u32 +#define x86_atomic_setbits_ul x86_atomic_setbits_u64 +#define x86_atomic_clearbits_ul x86_atomic_clearbits_u64 #define atomic_setbits_int x86_atomic_setbits_u32 #define atomic_clearbits_int x86_atomic_clearbits_u32 diff -ru t/include/intr.h ./include/intr.h --- t/include/intr.h Sun Nov 28 20:27:17 2010 +++ ./include/intr.h Sun Nov 28 18:26:27 2010 @@ -215,7 +215,6 @@ int x86_send_ipi(struct cpu_info *, int); int x86_fast_ipi(struct cpu_info *, int); void x86_broadcast_ipi(int); -void x86_multicast_ipi(int, int); void x86_ipi_handler(void); void x86_intlock(struct intrframe); void x86_intunlock(struct intrframe); diff -ru t/include/pmap.h ./include/pmap.h --- t/include/pmap.h Sun Nov 28 20:27:17 2010 +++ ./include/pmap.h Sun Nov 28 17:43:17 2010 @@ -318,7 +318,7 @@ /* pointer to a PTP in our pmap */ struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ - u_int32_t pm_cpus; /* mask of CPUs using pmap */ + u_int64_t pm_cpus; /* mask of CPUs using pmap */ }; /*