On Sun, 28 Nov 2010, Philip Guenther wrote:
> On Sun, 28 Nov 2010, Philip Guenther wrote:
> > On Sunday, November 28, 2010, David Gwynne <d...@cvs.openbsd.org> wrote:
> ...
> > > Log message:
> > > bump the number of supported cpus from 32 up to 64. lets me attach and use
> > > all 48 cores in one of my boxes.
> > >
> > > requested by deraadt@
> > > made possible by the recent pmap diff by kettenis@
> > 
> > Doesn't pm_cpus in the pmap need to change to a u_int64_t and locore.S
> > and pmap.c (at least) change to match?
> 
> Here's a diff to do that.
> 
> It also corrects the x86_atomic_*_{l,ul}() macros to actually expand to 
> the functions that operate on longs instead of ints (64- and 32-bits, 
> respectively) and removes the unused x86_multicast_ipi() function.  
> Finally, tlb_shoot_wait has been operated on with 32bit atomic ops, so 
> make it an (unsigned) int instead of a long.  (This would have never 
> worked on a big-endian platform.)
> 
> Compile tested only so far (about to get on plane).

Revised diff that doesn't include my bogus flailing on x86_atomic_cas_ul()
(which does operate on unsigned longs) or tlb_shoot_wait.

I'm running this now on my lowly little 4 core amd64.

Philip Guenther


diff -ru t/amd64/intr.c ./amd64/intr.c
--- t/amd64/intr.c      Sun Nov 28 20:27:17 2010
+++ ./amd64/intr.c      Sun Nov 28 18:48:08 2010
@@ -498,7 +498,7 @@
 
        simple_lock(&ci->ci_slock);
        pic->pic_hwmask(pic, ih->ih_pin);       
-       x86_atomic_clearbits_l(&ci->ci_ipending, (1 << ih->ih_slot));
+       x86_atomic_clearbits_u32(&ci->ci_ipending, (1 << ih->ih_slot));
 
        /*
         * Remove the handler from the chain.
diff -ru t/amd64/ipi.c ./amd64/ipi.c
--- t/amd64/ipi.c       Sun Nov 28 20:27:17 2010
+++ ./amd64/ipi.c       Sun Nov 28 18:48:46 2010
@@ -50,7 +50,7 @@
 {
        int ret;
 
-       x86_atomic_setbits_l(&ci->ci_ipis, ipimask);
+       x86_atomic_setbits_u32(&ci->ci_ipis, ipimask);
 
        /* Don't send IPI to cpu which isn't (yet) running. */
        if (!(ci->ci_flags & CPUF_RUNNING))
@@ -88,7 +88,7 @@
                        continue;
                if ((ci->ci_flags & CPUF_RUNNING) == 0)
                        continue;
-               x86_atomic_setbits_l(&ci->ci_ipis, ipimask);
+               x86_atomic_setbits_u32(&ci->ci_ipis, ipimask);
                count++;
        }
        if (!count)
@@ -98,23 +98,6 @@
 }
 
 void
-x86_multicast_ipi(int cpumask, int ipimask)
-{
-       struct cpu_info *ci;
-       CPU_INFO_ITERATOR cii;
-
-       cpumask &= ~(1U << cpu_number());
-       if (cpumask == 0)
-               return;
-
-       CPU_INFO_FOREACH(cii, ci) {
-               if ((cpumask & (1U << ci->ci_cpuid)) == 0)
-                       continue;
-               x86_send_ipi(ci, ipimask);
-       }
-}
-
-void
 x86_ipi_handler(void)
 {
        extern struct evcount ipi_count;
@@ -122,7 +105,7 @@
        u_int32_t pending;
        int bit;
 
-       pending = x86_atomic_testset_ul(&ci->ci_ipis, 0);
+       pending = x86_atomic_testset_u32(&ci->ci_ipis, 0);
 
        for (bit = 0; bit < X86_NIPI && pending; bit++) {
                if (pending & (1<<bit)) {
diff -ru t/amd64/locore.S ./amd64/locore.S
--- t/amd64/locore.S    Sun Nov 28 20:27:17 2010
+++ ./amd64/locore.S    Sun Nov 28 19:00:57 2010
@@ -762,7 +762,7 @@
        /* clear the old pmap's bit for the cpu */
        movq    PCB_PMAP(%r13),%rcx
        lock
-       btrl    %edi,PM_CPUS(%rcx)
+       btrq    %rdi,PM_CPUS(%rcx)
 
        /* Save stack pointers. */
        movq    %rsp,PCB_RSP(%r13)
@@ -800,9 +800,11 @@
        /* set the new pmap's bit for the cpu */
        movl    CPUVAR(CPUID),%edi
        movq    PCB_PMAP(%r13),%rcx
-       movl    PM_CPUS(%rcx),%eax
+#ifdef DIAGNOSTIC
+       movq    PM_CPUS(%rcx),%rax
+#endif
        lock
-       btsl    %edi,PM_CPUS(%rcx)
+       btsq    %rdi,PM_CPUS(%rcx)
 #ifdef DIAGNOSTIC
        jc      _C_LABEL(switch_pmcpu_set)
 #endif
diff -ru t/amd64/pmap.c ./amd64/pmap.c
--- t/amd64/pmap.c      Sun Nov 28 20:36:05 2010
+++ ./amd64/pmap.c      Sun Nov 28 20:32:48 2010
@@ -351,7 +351,7 @@
 pmap_is_active(struct pmap *pmap, int cpu_id)
 {
        return (pmap == pmap_kernel() ||
-           (pmap->pm_cpus & (1U << cpu_id)) != 0);
+           (pmap->pm_cpus & (1ULL << cpu_id)) != 0);
 }
 
 static __inline u_int
@@ -1064,7 +1064,7 @@
 
 #ifdef DIAGNOSTIC
        if (pmap->pm_cpus != 0)
-               printf("pmap_destroy: pmap %p cpus=0x%lx\n",
+               printf("pmap_destroy: pmap %p cpus=0x%llx\n",
                    (void *)pmap, pmap->pm_cpus);
 #endif
 
@@ -1127,7 +1127,7 @@
                /*
                 * mark the pmap in use by this processor.
                 */
-               x86_atomic_setbits_ul(&pmap->pm_cpus, (1U << cpu_number()));
+               x86_atomic_setbits_u64(&pmap->pm_cpus, (1ULL << cpu_number()));
        }
 }
 
@@ -1143,7 +1143,7 @@
        /*
         * mark the pmap no longer in use by this processor. 
         */
-       x86_atomic_clearbits_ul(&pmap->pm_cpus, (1U << cpu_number()));
+       x86_atomic_clearbits_u64(&pmap->pm_cpus, (1ULL << cpu_number()));
 
 }
 
@@ -2437,7 +2437,7 @@
  * cpus we need to send the IPI to, then we grab the counter, then
  * we send the IPIs, then we finally do our own shootdown.
  *
- * Our shootdown is last to make it parallell with the other cpus
+ * Our shootdown is last to make it parallel with the other cpus
  * to shorten the spin time.
  *
  * Notice that we depend on failures to send IPIs only being able to
@@ -2457,13 +2457,13 @@
        struct cpu_info *ci, *self = curcpu();
        CPU_INFO_ITERATOR cii;
        long wait = 0;
-       int mask = 0;
+       u_int64_t mask = 0;
 
        CPU_INFO_FOREACH(cii, ci) {
                if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||
                    !(ci->ci_flags & CPUF_RUNNING))
                        continue;
-               mask |= 1 << ci->ci_cpuid;
+               mask |= 1ULL << ci->ci_cpuid;
                wait++;
        }
 
@@ -2476,7 +2476,7 @@
                }
                tlb_shoot_addr1 = va;
                CPU_INFO_FOREACH(cii, ci) {
-                       if ((mask & 1 << ci->ci_cpuid) == 0)
+                       if ((mask & 1ULL << ci->ci_cpuid) == 0)
                                continue;
                        if (x86_fast_ipi(ci, LAPIC_IPI_INVLPG) != 0)
                                panic("pmap_tlb_shootpage: ipi failed");
@@ -2494,14 +2494,14 @@
        struct cpu_info *ci, *self = curcpu();
        CPU_INFO_ITERATOR cii;
        long wait = 0;
-       int mask = 0;
+       u_int64_t mask = 0;
        vaddr_t va;
 
        CPU_INFO_FOREACH(cii, ci) {
                if (ci == self || !pmap_is_active(pm, ci->ci_cpuid) ||
                    !(ci->ci_flags & CPUF_RUNNING))
                        continue;
-               mask |= 1 << ci->ci_cpuid;
+               mask |= 1ULL << ci->ci_cpuid;
                wait++;
        }
 
@@ -2515,7 +2515,7 @@
                tlb_shoot_addr1 = sva;
                tlb_shoot_addr2 = eva;
                CPU_INFO_FOREACH(cii, ci) {
-                       if ((mask & 1 << ci->ci_cpuid) == 0)
+                       if ((mask & 1ULL << ci->ci_cpuid) == 0)
                                continue;
                        if (x86_fast_ipi(ci, LAPIC_IPI_INVLRANGE) != 0)
                                panic("pmap_tlb_shootrange: ipi failed");
@@ -2534,12 +2534,12 @@
        struct cpu_info *ci, *self = curcpu();
        CPU_INFO_ITERATOR cii;
        long wait = 0;
-       int mask = 0;
+       u_int64_t mask = 0;
 
        CPU_INFO_FOREACH(cii, ci) {
                if (ci == self || !(ci->ci_flags & CPUF_RUNNING))
                        continue;
-               mask |= 1 << ci->ci_cpuid;
+               mask |= 1ULL << ci->ci_cpuid;
                wait++;
        }
 
@@ -2552,7 +2552,7 @@
                }
 
                CPU_INFO_FOREACH(cii, ci) {
-                       if ((mask & 1 << ci->ci_cpuid) == 0)
+                       if ((mask & 1ULL << ci->ci_cpuid) == 0)
                                continue;
                        if (x86_fast_ipi(ci, LAPIC_IPI_INVLTLB) != 0)
                                panic("pmap_tlb_shoottlb: ipi failed");
diff -ru t/include/atomic.h ./include/atomic.h
--- t/include/atomic.h  Sun Nov 28 20:27:17 2010
+++ ./include/atomic.h  Sun Nov 28 20:33:58 2010
@@ -120,12 +120,10 @@
        __asm __volatile(LOCK " andq %1,%0" :  "=m" (*ptr) : "ir" (~bits));
 }
 
-#define x86_atomic_testset_ul  x86_atomic_testset_u32
+#define x86_atomic_testset_ul  x86_atomic_testset_u64
 #define x86_atomic_testset_i   x86_atomic_testset_i32
-#define x86_atomic_setbits_l   x86_atomic_setbits_u32
-#define x86_atomic_setbits_ul  x86_atomic_setbits_u32
-#define x86_atomic_clearbits_l x86_atomic_clearbits_u32
-#define x86_atomic_clearbits_ul        x86_atomic_clearbits_u32
+#define x86_atomic_setbits_ul  x86_atomic_setbits_u64
+#define x86_atomic_clearbits_ul        x86_atomic_clearbits_u64
 
 #define atomic_setbits_int x86_atomic_setbits_u32
 #define atomic_clearbits_int x86_atomic_clearbits_u32
diff -ru t/include/intr.h ./include/intr.h
--- t/include/intr.h    Sun Nov 28 20:27:17 2010
+++ ./include/intr.h    Sun Nov 28 18:26:27 2010
@@ -215,7 +215,6 @@
 int x86_send_ipi(struct cpu_info *, int);
 int x86_fast_ipi(struct cpu_info *, int);
 void x86_broadcast_ipi(int);
-void x86_multicast_ipi(int, int);
 void x86_ipi_handler(void);
 void x86_intlock(struct intrframe);
 void x86_intunlock(struct intrframe);
diff -ru t/include/pmap.h ./include/pmap.h
--- t/include/pmap.h    Sun Nov 28 20:27:17 2010
+++ ./include/pmap.h    Sun Nov 28 17:43:17 2010
@@ -318,7 +318,7 @@
                                        /* pointer to a PTP in our pmap */
        struct pmap_statistics pm_stats;  /* pmap stats (lck by object lock) */
 
-       u_int32_t pm_cpus;              /* mask of CPUs using pmap */
+       u_int64_t pm_cpus;              /* mask of CPUs using pmap */
 };
 
 /*

Reply via email to