Source: golang-1.9 Version: 1.9-1 Severity: normal Tags: patch User: debian-powe...@lists.debian.org Usertags: ppc64
Hi! Starting with golang-1.9, upstream decided to drop support for POWER5 on big-endian ppc64 systems and raised the minimum instruction set for these systems to POWER8. Since Debian's ppc64 port is still and will always be based on POWER5, I have decided to revert the changes in question to make golang-1.9 work on POWER5. Luckily, the changes in question were actually just code clean-ups and simplifications, none of which had actual performance impact on little-endian ppc64 systems. On a sidenote: Raising the instruction set level for the big- endian ppc64 port to POWER8 actually never made any sense as every Linux distribution available actually uses POWER5 on big-endian ppc64 systems. If users want to use POWER8, they have to install a little-endian ppc64 port which is what most users want anyway due to the improved level of compatibility with most existing applications. No one will buy a POWER8-capable machine and run a big-endian ppc64 port on it, there is simply no use case for the changes upstream introduced. Thus, it would be great if you could incorporate this patch to the golang-1.9 Debian package to make it build on ppc64 again. I'm aware that some IBM folk might not agree with this change, but I think it makes sense for Debian and its users. We just released our first installation image for ppc64 ever, so I am expecting a larger number of Debian ppc64 installations in the future. Thanks for consideration! Adrian -- .''`. John Paul Adrian Glaubitz : :' : Debian Developer - glaub...@debian.org `. `' Freie Universitaet Berlin - glaub...@physik.fu-berlin.de `- GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913
Description: Re-add support for POWER5 Starting with golang-1.9, upstream dropped support for POWER5 on big-endian ppc64 systems to clean up the code a bit. This patch reverts a number of changes that upstream made to remove POWER5 support for ppc64 big-endian. This change does not have any negative impact on ppc64 little- endian targets but it will allow us to continue using golang-1.9 on ppc64 big-endian a little longer. Author: John Paul Adrian Glaubitz <glaub...@physik.fu-berlin.de> Upstream: https://github.com/golang/go/issues/19074 Last-Update: 2017-09-04 Index: golang-1.9-1.9/src/cmd/compile/internal/ppc64/ssa.go =================================================================== --- golang-1.9-1.9.orig/src/cmd/compile/internal/ppc64/ssa.go +++ golang-1.9-1.9/src/cmd/compile/internal/ppc64/ssa.go @@ -13,6 +13,20 @@ import ( "math" ) +var condOps = map[ssa.Op]obj.As{ + ssa.OpPPC64Equal: ppc64.ABEQ, + ssa.OpPPC64NotEqual: ppc64.ABNE, + ssa.OpPPC64LessThan: ppc64.ABLT, + ssa.OpPPC64GreaterEqual: ppc64.ABGE, + ssa.OpPPC64GreaterThan: ppc64.ABGT, + ssa.OpPPC64LessEqual: ppc64.ABLE, + + ssa.OpPPC64FLessThan: ppc64.ABLT, // 1 branch for FCMP + ssa.OpPPC64FGreaterThan: ppc64.ABGT, // 1 branch for FCMP + ssa.OpPPC64FLessEqual: ppc64.ABLT, // 2 branches for FCMP <=, second is BEQ + ssa.OpPPC64FGreaterEqual: ppc64.ABGT, // 2 branches for FCMP >=, second is BEQ +} + // iselOp encodes mapping of comparison operations onto ISEL operands type iselOp struct { cond int64 @@ -760,6 +774,27 @@ func ssaGenValue(s *gc.SSAGenState, v *s // rtmp := 1 // isel rt,0,rtmp,!cond // rt is target in ppc asm + if v.Block.Func.Config.OldArch { + p := s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 1 + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + pb := s.Prog(condOps[v.Op]) + pb.To.Type = obj.TYPE_BRANCH + + p = s.Prog(ppc64.AMOVD) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + p = s.Prog(obj.ANOP) + gc.Patch(pb, p) + break + } + // Modern PPC uses ISEL p := s.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 @@ -771,6 +806,30 @@ func ssaGenValue(s *gc.SSAGenState, v *s case ssa.OpPPC64FLessEqual, // These include a second branch for EQ -- dealing with NaN prevents REL= to !REL conversion ssa.OpPPC64FGreaterEqual: + if v.Block.Func.Config.OldArch { + p := s.Prog(ppc64.AMOVW) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 1 + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + pb0 := s.Prog(condOps[v.Op]) + pb0.To.Type = obj.TYPE_BRANCH + pb1 := s.Prog(ppc64.ABEQ) + pb1.To.Type = obj.TYPE_BRANCH + + p = s.Prog(ppc64.AMOVW) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 0 + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + p = s.Prog(obj.ANOP) + gc.Patch(pb0, p) + gc.Patch(pb1, p) + break + } + // Modern PPC uses ISEL p := s.Prog(ppc64.AMOVD) p.From.Type = obj.TYPE_CONST p.From.Offset = 1 Index: golang-1.9-1.9/src/cmd/compile/internal/ssa/config.go =================================================================== --- golang-1.9-1.9.orig/src/cmd/compile/internal/ssa/config.go +++ golang-1.9-1.9/src/cmd/compile/internal/ssa/config.go @@ -35,6 +35,7 @@ type Config struct { noDuffDevice bool // Don't use Duff's device nacl bool // GOOS=nacl use387 bool // GO386=387 + OldArch bool // True for older versions of architecture, e.g. true for PPC64BE, false for PPC64LE NeedsFpScratch bool // No direct move between GP and FP register sets BigEndian bool // sparsePhiCutoff uint64 // Sparse phi location algorithm used above this #blocks*#variables score @@ -200,6 +201,7 @@ func NewConfig(arch string, types Types, c.hasGReg = true c.noDuffDevice = objabi.GOOS == "darwin" // darwin linker cannot handle BR26 reloc with non-zero addend case "ppc64": + c.OldArch = true c.BigEndian = true fallthrough case "ppc64le": Index: golang-1.9-1.9/src/math/big/arith_ppc64.s =================================================================== --- /dev/null +++ golang-1.9-1.9/src/math/big/arith_ppc64.s @@ -0,0 +1,14 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !math_big_pure_go,ppc64 + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +TEXT ·divWW(SB), NOSPLIT, $0 + BR ·divWW_g(SB) + Index: golang-1.9-1.9/src/math/big/arith_ppc64le.s =================================================================== --- /dev/null +++ golang-1.9-1.9/src/math/big/arith_ppc64le.s @@ -0,0 +1,50 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !math_big_pure_go,ppc64le + +#include "textflag.h" + +// This file provides fast assembly versions for the elementary +// arithmetic operations on vectors implemented in arith.go. + +// func divWW(x1, x0, y Word) (q, r Word) +TEXT ·divWW(SB), NOSPLIT, $0 + MOVD x1+0(FP), R4 + MOVD x0+8(FP), R5 + MOVD y+16(FP), R6 + + CMPU R4, R6 + BGE divbigger + + // from the programmer's note in ch. 3 of the ISA manual, p.74 + DIVDEU R6, R4, R3 + DIVDU R6, R5, R7 + MULLD R6, R3, R8 + MULLD R6, R7, R20 + SUB R20, R5, R10 + ADD R7, R3, R3 + SUB R8, R10, R4 + CMPU R4, R10 + BLT adjust + CMPU R4, R6 + BLT end + +adjust: + MOVD $1, R21 + ADD R21, R3, R3 + SUB R6, R4, R4 + +end: + MOVD R3, q+24(FP) + MOVD R4, r+32(FP) + + RET + +divbigger: + MOVD $-1, R7 + MOVD R7, q+24(FP) + MOVD R7, r+32(FP) + RET + Index: golang-1.9-1.9/src/math/big/arith_ppc64x.s =================================================================== --- golang-1.9-1.9.orig/src/math/big/arith_ppc64x.s +++ golang-1.9-1.9/src/math/big/arith_ppc64x.s @@ -198,44 +198,5 @@ end: MOVD R4, c+56(FP) RET -// func divWW(x1, x0, y Word) (q, r Word) -TEXT ·divWW(SB), NOSPLIT, $0 - MOVD x1+0(FP), R4 - MOVD x0+8(FP), R5 - MOVD y+16(FP), R6 - - CMPU R4, R6 - BGE divbigger - - // from the programmer's note in ch. 3 of the ISA manual, p.74 - DIVDEU R6, R4, R3 - DIVDU R6, R5, R7 - MULLD R6, R3, R8 - MULLD R6, R7, R20 - SUB R20, R5, R10 - ADD R7, R3, R3 - SUB R8, R10, R4 - CMPU R4, R10 - BLT adjust - CMPU R4, R6 - BLT end - -adjust: - MOVD $1, R21 - ADD R21, R3, R3 - SUB R6, R4, R4 - -end: - MOVD R3, q+24(FP) - MOVD R4, r+32(FP) - - RET - -divbigger: - MOVD $-1, R7 - MOVD R7, q+24(FP) - MOVD R7, r+32(FP) - RET - TEXT ·divWVW(SB), NOSPLIT, $0 BR ·divWVW_g(SB) Index: golang-1.9-1.9/src/runtime/internal/atomic/asm_ppc64x.s =================================================================== --- golang-1.9-1.9.orig/src/runtime/internal/atomic/asm_ppc64x.s +++ golang-1.9-1.9/src/runtime/internal/atomic/asm_ppc64x.s @@ -165,12 +165,32 @@ TEXT runtime∕internal∕atomic·Store6 TEXT runtime∕internal∕atomic·Or8(SB), NOSPLIT, $0-9 MOVD ptr+0(FP), R3 MOVBZ val+8(FP), R4 +#ifdef GOARCH_ppc64 + // Align ptr down to 4 bytes so we can use 32-bit load/store. + // R5 = (R3 << 0) & ~3 + RLDCR $0, R3, $~3, R5 + // Compute val shift. + // Big endian. ptr = ptr ^ 3 + XOR $3, R3 + // R6 = ((ptr & 3) * 8) = (ptr << 3) & (3*8) + RLDC $3, R3, $(3*8), R6 + // Shift val for aligned ptr. R4 = val << R6 + SLD R6, R4, R4 + SYNC + +again: + LWAR (R5), R6 + OR R4, R6 + STWCCC R6, (R5) + BNE again +#else SYNC again: LBAR (R3), R6 OR R4, R6 STBCCC R6, (R3) BNE again +#endif ISYNC RET @@ -178,11 +198,34 @@ again: TEXT runtime∕internal∕atomic·And8(SB), NOSPLIT, $0-9 MOVD ptr+0(FP), R3 MOVBZ val+8(FP), R4 +#ifdef GOARCH_ppc64 + // Align ptr down to 4 bytes so we can use 32-bit load/store. + // R5 = (R3 << 0) & ~3 + RLDCR $0, R3, $~3, R5 + // Compute val shift. + // Big endian. ptr = ptr ^ 3 + XOR $3, R3 + // R6 = ((ptr & 3) * 8) = (ptr << 3) & (3*8) + RLDC $3, R3, $(3*8), R6 + // Shift val for aligned ptr. R4 = val << R6 | ^(0xFF << R6) + MOVD $0xFF, R7 + SLD R6, R4 + SLD R6, R7 + XOR $-1, R7 + OR R7, R4 + SYNC +again: + LWAR (R5), R6 + AND R4, R6 + STWCCC R6, (R5) + BNE again +#else SYNC again: LBAR (R3),R6 AND R4,R6 STBCCC R6,(R3) BNE again +#endif ISYNC RET