Source: golang-1.9
Version: 1.9-1
Severity: normal
Tags: patch
User: debian-powe...@lists.debian.org
Usertags: ppc64

Hi!

Starting with golang-1.9, upstream decided to drop support for
POWER5 on big-endian ppc64 systems and raised the minimum
instruction set for these systems to POWER8.

Since Debian's ppc64 port is still and will always be based
on POWER5, I have decided to revert the changes in question
to make golang-1.9 work on POWER5. Luckily, the changes in
question were actually just code clean-ups and simplifications,
none of which had actual performance impact on little-endian
ppc64 systems.

On a sidenote: Raising the instruction set level for the big-
endian ppc64 port to POWER8 actually never made any sense as
every Linux distribution available actually uses POWER5
on big-endian ppc64 systems. If users want to use POWER8,
they have to install a little-endian ppc64 port which is
what most users want anyway due to the improved level of
compatibility with most existing applications. No one will
buy a POWER8-capable machine and run a big-endian ppc64
port on it, there is simply no use case for the changes
upstream introduced.

Thus, it would be great if you could incorporate this patch
to the golang-1.9 Debian package to make it build on ppc64
again. I'm aware that some IBM folk might not agree with
this change, but I think it makes sense for Debian and
its users. We just released our first installation image
for ppc64 ever, so I am expecting a larger number of
Debian ppc64 installations in the future.

Thanks for consideration!

Adrian

--
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaub...@debian.org
`. `'   Freie Universitaet Berlin - glaub...@physik.fu-berlin.de
  `-    GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913
Description: Re-add support for POWER5
 Starting with golang-1.9, upstream dropped support for
 POWER5 on big-endian ppc64 systems to clean up the code
 a bit. This patch reverts a number of changes that upstream
 made to remove POWER5 support for ppc64 big-endian. This
 change does not have any negative impact on ppc64 little-
 endian targets but it will allow us to continue using
 golang-1.9 on ppc64 big-endian a little longer.
Author: John Paul Adrian Glaubitz <glaub...@physik.fu-berlin.de>
Upstream: https://github.com/golang/go/issues/19074
Last-Update: 2017-09-04

Index: golang-1.9-1.9/src/cmd/compile/internal/ppc64/ssa.go
===================================================================
--- golang-1.9-1.9.orig/src/cmd/compile/internal/ppc64/ssa.go
+++ golang-1.9-1.9/src/cmd/compile/internal/ppc64/ssa.go
@@ -13,6 +13,20 @@ import (
        "math"
 )
 
+var condOps = map[ssa.Op]obj.As{
+       ssa.OpPPC64Equal:        ppc64.ABEQ,
+       ssa.OpPPC64NotEqual:     ppc64.ABNE,
+       ssa.OpPPC64LessThan:     ppc64.ABLT,
+       ssa.OpPPC64GreaterEqual: ppc64.ABGE,
+       ssa.OpPPC64GreaterThan:  ppc64.ABGT,
+       ssa.OpPPC64LessEqual:    ppc64.ABLE,
+
+       ssa.OpPPC64FLessThan:     ppc64.ABLT, // 1 branch for FCMP
+       ssa.OpPPC64FGreaterThan:  ppc64.ABGT, // 1 branch for FCMP
+       ssa.OpPPC64FLessEqual:    ppc64.ABLT, // 2 branches for FCMP <=, second 
is BEQ
+       ssa.OpPPC64FGreaterEqual: ppc64.ABGT, // 2 branches for FCMP >=, second 
is BEQ
+}
+
 // iselOp encodes mapping of comparison operations onto ISEL operands
 type iselOp struct {
        cond        int64
@@ -760,6 +774,27 @@ func ssaGenValue(s *gc.SSAGenState, v *s
                //   rtmp := 1
                //   isel rt,0,rtmp,!cond // rt is target in ppc asm
 
+               if v.Block.Func.Config.OldArch {
+                       p := s.Prog(ppc64.AMOVD)
+                       p.From.Type = obj.TYPE_CONST
+                       p.From.Offset = 1
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = v.Reg()
+
+                       pb := s.Prog(condOps[v.Op])
+                       pb.To.Type = obj.TYPE_BRANCH
+
+                       p = s.Prog(ppc64.AMOVD)
+                       p.From.Type = obj.TYPE_CONST
+                       p.From.Offset = 0
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = v.Reg()
+
+                       p = s.Prog(obj.ANOP)
+                       gc.Patch(pb, p)
+                       break
+               }
+               // Modern PPC uses ISEL
                p := s.Prog(ppc64.AMOVD)
                p.From.Type = obj.TYPE_CONST
                p.From.Offset = 1
@@ -771,6 +806,30 @@ func ssaGenValue(s *gc.SSAGenState, v *s
        case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- 
dealing with NaN prevents REL= to !REL conversion
                ssa.OpPPC64FGreaterEqual:
 
+               if v.Block.Func.Config.OldArch {
+                       p := s.Prog(ppc64.AMOVW)
+                       p.From.Type = obj.TYPE_CONST
+                       p.From.Offset = 1
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = v.Reg()
+
+                       pb0 := s.Prog(condOps[v.Op])
+                       pb0.To.Type = obj.TYPE_BRANCH
+                       pb1 := s.Prog(ppc64.ABEQ)
+                       pb1.To.Type = obj.TYPE_BRANCH
+
+                       p = s.Prog(ppc64.AMOVW)
+                       p.From.Type = obj.TYPE_CONST
+                       p.From.Offset = 0
+                       p.To.Type = obj.TYPE_REG
+                       p.To.Reg = v.Reg()
+
+                       p = s.Prog(obj.ANOP)
+                       gc.Patch(pb0, p)
+                       gc.Patch(pb1, p)
+                       break
+               }
+               // Modern PPC uses ISEL
                p := s.Prog(ppc64.AMOVD)
                p.From.Type = obj.TYPE_CONST
                p.From.Offset = 1
Index: golang-1.9-1.9/src/cmd/compile/internal/ssa/config.go
===================================================================
--- golang-1.9-1.9.orig/src/cmd/compile/internal/ssa/config.go
+++ golang-1.9-1.9/src/cmd/compile/internal/ssa/config.go
@@ -35,6 +35,7 @@ type Config struct {
        noDuffDevice    bool          // Don't use Duff's device
        nacl            bool          // GOOS=nacl
        use387          bool          // GO386=387
+       OldArch         bool          // True for older versions of 
architecture, e.g. true for PPC64BE, false for PPC64LE
        NeedsFpScratch  bool          // No direct move between GP and FP 
register sets
        BigEndian       bool          //
        sparsePhiCutoff uint64        // Sparse phi location algorithm used 
above this #blocks*#variables score
@@ -200,6 +201,7 @@ func NewConfig(arch string, types Types,
                c.hasGReg = true
                c.noDuffDevice = objabi.GOOS == "darwin" // darwin linker 
cannot handle BR26 reloc with non-zero addend
        case "ppc64":
+               c.OldArch = true
                c.BigEndian = true
                fallthrough
        case "ppc64le":
Index: golang-1.9-1.9/src/math/big/arith_ppc64.s
===================================================================
--- /dev/null
+++ golang-1.9-1.9/src/math/big/arith_ppc64.s
@@ -0,0 +1,14 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,ppc64
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+TEXT ·divWW(SB), NOSPLIT, $0
+       BR ·divWW_g(SB)
+
Index: golang-1.9-1.9/src/math/big/arith_ppc64le.s
===================================================================
--- /dev/null
+++ golang-1.9-1.9/src/math/big/arith_ppc64le.s
@@ -0,0 +1,50 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build !math_big_pure_go,ppc64le
+
+#include "textflag.h"
+
+// This file provides fast assembly versions for the elementary
+// arithmetic operations on vectors implemented in arith.go.
+
+// func divWW(x1, x0, y Word) (q, r Word)
+TEXT ·divWW(SB), NOSPLIT, $0
+       MOVD x1+0(FP), R4
+       MOVD x0+8(FP), R5
+       MOVD y+16(FP), R6
+
+       CMPU R4, R6
+       BGE  divbigger
+
+       // from the programmer's note in ch. 3 of the ISA manual, p.74
+       DIVDEU R6, R4, R3
+       DIVDU  R6, R5, R7
+       MULLD  R6, R3, R8
+       MULLD  R6, R7, R20
+       SUB    R20, R5, R10
+       ADD    R7, R3, R3
+       SUB    R8, R10, R4
+       CMPU   R4, R10
+       BLT    adjust
+       CMPU   R4, R6
+       BLT    end
+
+adjust:
+       MOVD $1, R21
+       ADD  R21, R3, R3
+       SUB  R6, R4, R4
+
+end:
+       MOVD R3, q+24(FP)
+       MOVD R4, r+32(FP)
+
+       RET
+
+divbigger:
+       MOVD $-1, R7
+       MOVD R7, q+24(FP)
+       MOVD R7, r+32(FP)
+       RET
+
Index: golang-1.9-1.9/src/math/big/arith_ppc64x.s
===================================================================
--- golang-1.9-1.9.orig/src/math/big/arith_ppc64x.s
+++ golang-1.9-1.9/src/math/big/arith_ppc64x.s
@@ -198,44 +198,5 @@ end:
        MOVD R4, c+56(FP)
        RET
 
-// func divWW(x1, x0, y Word) (q, r Word)
-TEXT ·divWW(SB), NOSPLIT, $0
-       MOVD x1+0(FP), R4
-       MOVD x0+8(FP), R5
-       MOVD y+16(FP), R6
-
-       CMPU R4, R6
-       BGE  divbigger
-
-       // from the programmer's note in ch. 3 of the ISA manual, p.74
-       DIVDEU R6, R4, R3
-       DIVDU  R6, R5, R7
-       MULLD  R6, R3, R8
-       MULLD  R6, R7, R20
-       SUB    R20, R5, R10
-       ADD    R7, R3, R3
-       SUB    R8, R10, R4
-       CMPU   R4, R10
-       BLT    adjust
-       CMPU   R4, R6
-       BLT    end
-
-adjust:
-       MOVD $1, R21
-       ADD  R21, R3, R3
-       SUB  R6, R4, R4
-
-end:
-       MOVD R3, q+24(FP)
-       MOVD R4, r+32(FP)
-
-       RET
-
-divbigger:
-       MOVD $-1, R7
-       MOVD R7, q+24(FP)
-       MOVD R7, r+32(FP)
-       RET
-
 TEXT ·divWVW(SB), NOSPLIT, $0
        BR ·divWVW_g(SB)
Index: golang-1.9-1.9/src/runtime/internal/atomic/asm_ppc64x.s
===================================================================
--- golang-1.9-1.9.orig/src/runtime/internal/atomic/asm_ppc64x.s
+++ golang-1.9-1.9/src/runtime/internal/atomic/asm_ppc64x.s
@@ -165,12 +165,32 @@ TEXT runtime∕internal∕atomic·Store6
 TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9
        MOVD    ptr+0(FP), R3
        MOVBZ   val+8(FP), R4
+#ifdef  GOARCH_ppc64
+       // Align ptr down to 4 bytes so we can use 32-bit load/store.
+       // R5 = (R3 << 0) & ~3
+       RLDCR   $0, R3, $~3, R5
+       // Compute val shift.
+       // Big endian.  ptr = ptr ^ 3
+       XOR     $3, R3
+       // R6 = ((ptr & 3) * 8) = (ptr << 3) & (3*8)
+       RLDC    $3, R3, $(3*8), R6
+       // Shift val for aligned ptr.  R4 = val << R6
+       SLD     R6, R4, R4
+       SYNC
+
+again:
+       LWAR    (R5), R6
+       OR      R4, R6
+       STWCCC  R6, (R5)
+       BNE     again
+#else
        SYNC
 again:
        LBAR    (R3), R6
        OR      R4, R6
        STBCCC  R6, (R3)
        BNE     again
+#endif
        ISYNC
        RET
 
@@ -178,11 +198,34 @@ again:
 TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9
        MOVD    ptr+0(FP), R3
        MOVBZ   val+8(FP), R4
+#ifdef  GOARCH_ppc64
+       // Align ptr down to 4 bytes so we can use 32-bit load/store.
+       // R5 = (R3 << 0) & ~3
+       RLDCR   $0, R3, $~3, R5
+       // Compute val shift.
+       // Big endian.  ptr = ptr ^ 3
+       XOR     $3, R3
+       // R6 = ((ptr & 3) * 8) = (ptr << 3) & (3*8)
+       RLDC    $3, R3, $(3*8), R6
+       // Shift val for aligned ptr.  R4 = val << R6 | ^(0xFF << R6)
+       MOVD    $0xFF, R7
+       SLD     R6, R4
+       SLD     R6, R7
+       XOR     $-1, R7
+       OR      R7, R4
+       SYNC
+again:
+       LWAR    (R5), R6
+       AND     R4, R6
+       STWCCC  R6, (R5)
+       BNE     again
+#else
        SYNC
 again:
        LBAR    (R3),R6
        AND     R4,R6
        STBCCC  R6,(R3)
        BNE     again
+#endif
        ISYNC
        RET

Reply via email to