[PATCH v2 02/30] Include generic parity.h in some architectures' bitops.h

2016-04-06 Thread zengzhaoxiu
From: Zhaoxiu Zeng 

Use the generic version

Signed-off-by: Zhaoxiu Zeng 
---
 arch/arc/include/asm/bitops.h  | 1 +
 arch/arm/include/asm/bitops.h  | 1 +
 arch/arm64/include/asm/bitops.h| 1 +
 arch/avr32/include/asm/bitops.h| 1 +
 arch/c6x/include/asm/bitops.h  | 1 +
 arch/cris/include/asm/bitops.h | 1 +
 arch/frv/include/asm/bitops.h  | 1 +
 arch/h8300/include/asm/bitops.h| 1 +
 arch/hexagon/include/asm/bitops.h  | 1 +
 arch/m32r/include/asm/bitops.h | 1 +
 arch/m68k/include/asm/bitops.h | 1 +
 arch/metag/include/asm/bitops.h| 1 +
 arch/mn10300/include/asm/bitops.h  | 1 +
 arch/openrisc/include/asm/bitops.h | 1 +
 arch/parisc/include/asm/bitops.h   | 1 +
 arch/s390/include/asm/bitops.h | 1 +
 arch/sh/include/asm/bitops.h   | 1 +
 arch/xtensa/include/asm/bitops.h   | 1 +
 18 files changed, 18 insertions(+)

diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 0352fb8..7967e47 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -370,6 +370,7 @@ static inline __attribute__ ((const)) int __ffs(unsigned 
long x)
 #define ffz(x) __ffs(~(x))
 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index e943e6c..99f28a6 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -313,6 +313,7 @@ static inline unsigned long __ffs(unsigned long x)
 
 #include 
 #include 
+#include 
 #include 
 
 #ifdef __ARMEB__
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
index 9c19594..eac4965 100644
--- a/arch/arm64/include/asm/bitops.h
+++ b/arch/arm64/include/asm/bitops.h
@@ -44,6 +44,7 @@ extern int test_and_change_bit(int nr, volatile unsigned long 
*p);
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/avr32/include/asm/bitops.h b/arch/avr32/include/asm/bitops.h
index 910d537..9f4a2ce 100644
--- a/arch/avr32/include/asm/bitops.h
+++ b/arch/avr32/include/asm/bitops.h
@@ -298,6 +298,7 @@ static inline int ffs(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 #include 
 
 extern unsigned long find_next_zero_bit_le(const void *addr,
diff --git a/arch/c6x/include/asm/bitops.h b/arch/c6x/include/asm/bitops.h
index f0ab012..94eb0d1 100644
--- a/arch/c6x/include/asm/bitops.h
+++ b/arch/c6x/include/asm/bitops.h
@@ -87,6 +87,7 @@ static inline int ffs(int x)
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h
index 8062cb5..06bc246 100644
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 0df8e95..f2a7ee8 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -314,6 +314,7 @@ int __ilog2_u64(u64 n)
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index 05999ab..e392db2 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -172,6 +172,7 @@ static inline unsigned long __ffs(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/hexagon/include/asm/bitops.h 
b/arch/hexagon/include/asm/bitops.h
index 5e4a59b..2df614e 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -290,6 +290,7 @@ static inline unsigned long __fls(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
diff --git a/arch/m32r/include/asm/bitops.h b/arch/m32r/include/asm/bitops.h
index 86ba2b4..e3cf46b 100644
--- a/arch/m32r/include/asm/bitops.h
+++ b/arch/m32r/include/asm/bitops.h
@@ -259,6 +259,7 @@ static __inline__ int test_and_change_bit(int nr, volatile 
void * addr)
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #endif /* __KERNEL__ */
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index b4a9b0d..fd673ea 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -519,6 +519,7 @@ static inline int __fls(int x)
 #include 
 #include 
 #include 
+#include 
 #include 
 #endif /* __KERNEL__ */
 
diff --git a/arch/metag/include/asm/bitops.h b/arch/metag/include/asm/bitops.h
index 2671134..ad13087 100644
--- a/arch/metag/include/asm/bitops.h
+++ b/arch/metag/include/asm/bitops.h
@@ -118,6 +118,7 @@ static inline int test_and_change_bit(unsigned int bit,
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/mn10300/include/asm/bitops.h 
b/arch/mn10300/include/asm/bitops.h
index fe6f8e2..60761b7 100644
--- a/arch/mn10300/include/asm/bitops.h
+++ b/arch/mn10300/include/asm/bitops.h
@@ -225,6 +225,7 @@ int ffs(int x)
 #include 
 #include 
 #include

[PATCH V3 02/29] Include generic parity.h in some architectures' bitops.h

2016-04-13 Thread zengzhaoxiu
From: Zhaoxiu Zeng 

Signed-off-by: Zhaoxiu Zeng 
---
 arch/arc/include/asm/bitops.h  | 1 +
 arch/arm/include/asm/bitops.h  | 1 +
 arch/arm64/include/asm/bitops.h| 1 +
 arch/avr32/include/asm/bitops.h| 1 +
 arch/c6x/include/asm/bitops.h  | 1 +
 arch/cris/include/asm/bitops.h | 1 +
 arch/frv/include/asm/bitops.h  | 1 +
 arch/h8300/include/asm/bitops.h| 1 +
 arch/hexagon/include/asm/bitops.h  | 1 +
 arch/m32r/include/asm/bitops.h | 1 +
 arch/m68k/include/asm/bitops.h | 1 +
 arch/metag/include/asm/bitops.h| 1 +
 arch/mn10300/include/asm/bitops.h  | 1 +
 arch/openrisc/include/asm/bitops.h | 1 +
 arch/parisc/include/asm/bitops.h   | 1 +
 arch/s390/include/asm/bitops.h | 1 +
 arch/sh/include/asm/bitops.h   | 1 +
 arch/xtensa/include/asm/bitops.h   | 1 +
 18 files changed, 18 insertions(+)

diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 0352fb8..7967e47 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -370,6 +370,7 @@ static inline __attribute__ ((const)) int __ffs(unsigned 
long x)
 #define ffz(x) __ffs(~(x))
 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index e943e6c..99f28a6 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -313,6 +313,7 @@ static inline unsigned long __ffs(unsigned long x)
 
 #include 
 #include 
+#include 
 #include 
 
 #ifdef __ARMEB__
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
index 9c19594..eac4965 100644
--- a/arch/arm64/include/asm/bitops.h
+++ b/arch/arm64/include/asm/bitops.h
@@ -44,6 +44,7 @@ extern int test_and_change_bit(int nr, volatile unsigned long 
*p);
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/avr32/include/asm/bitops.h b/arch/avr32/include/asm/bitops.h
index 910d537..9f4a2ce 100644
--- a/arch/avr32/include/asm/bitops.h
+++ b/arch/avr32/include/asm/bitops.h
@@ -298,6 +298,7 @@ static inline int ffs(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 #include 
 
 extern unsigned long find_next_zero_bit_le(const void *addr,
diff --git a/arch/c6x/include/asm/bitops.h b/arch/c6x/include/asm/bitops.h
index f0ab012..94eb0d1 100644
--- a/arch/c6x/include/asm/bitops.h
+++ b/arch/c6x/include/asm/bitops.h
@@ -87,6 +87,7 @@ static inline int ffs(int x)
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h
index 8062cb5..06bc246 100644
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 0df8e95..f2a7ee8 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -314,6 +314,7 @@ int __ilog2_u64(u64 n)
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index 05999ab..e392db2 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -172,6 +172,7 @@ static inline unsigned long __ffs(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/hexagon/include/asm/bitops.h 
b/arch/hexagon/include/asm/bitops.h
index 5e4a59b..2df614e 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -290,6 +290,7 @@ static inline unsigned long __fls(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
diff --git a/arch/m32r/include/asm/bitops.h b/arch/m32r/include/asm/bitops.h
index 86ba2b4..e3cf46b 100644
--- a/arch/m32r/include/asm/bitops.h
+++ b/arch/m32r/include/asm/bitops.h
@@ -259,6 +259,7 @@ static __inline__ int test_and_change_bit(int nr, volatile 
void * addr)
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #endif /* __KERNEL__ */
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index b4a9b0d..fd673ea 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -519,6 +519,7 @@ static inline int __fls(int x)
 #include 
 #include 
 #include 
+#include 
 #include 
 #endif /* __KERNEL__ */
 
diff --git a/arch/metag/include/asm/bitops.h b/arch/metag/include/asm/bitops.h
index 2671134..ad13087 100644
--- a/arch/metag/include/asm/bitops.h
+++ b/arch/metag/include/asm/bitops.h
@@ -118,6 +118,7 @@ static inline int test_and_change_bit(unsigned int bit,
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/mn10300/include/asm/bitops.h 
b/arch/mn10300/include/asm/bitops.h
index fe6f8e2..60761b7 100644
--- a/arch/mn10300/include/asm/bitops.h
+++ b/arch/mn10300/include/asm/bitops.h
@@ -225,6 +225,7 @@ int ffs(int x)
 #include 
 #include 
 #include 
+#include 
 #include 
 

[PATCH V3 00/29] bitops: add parity functions

2016-04-13 Thread zengzhaoxiu
From: Zhaoxiu Zeng 

When I do "grep parity -r linux", I found many parity calculations
distributed in many drivers.

This patch series does:
  1. provide generic and architecture-specific parity calculations
  2. remove drivers' local parity calculations, use bitops' parity
 functions instead
  3. replace "hweightN(x) & 1" with "parityN(x)" to improve readability,
 and improve performance on some CPUs that without popcount support

I did not use GCC's __builtin_parity* functions, based on the following reasons:
  1. I don't know where to identify which version of GCC from the beginning
 supported __builtin_parity for the architecture.
  2. For the architecture that doesn't has popcount instruction, GCC instead use
 "call __paritysi2" (__paritydi2 for 64-bits). So if use __builtin_parity, 
we must
 provide __paritysi2 and __paritydi2 functions for these architectures.
 Additionally, parity4,8,16 might be "__builtin_parity(x & mask)", but the 
"& mask"
 operation is totally unnecessary.
  3. For the architecture that has popcount instruction, we do the same things.
  4. For powerpc, sparc, and x86, we do runtime patching to use popcount 
instruction
 if the CPU support.

I have compiled successfully with x86_64_defconfig, i386_defconfig, 
pseries_defconfig
and sparc64_defconfig.

Changes to v2:
- Add constant PARITY_MAGIC (proposals by Sam Ravnborg)
- Add include/asm-generic/bitops/popc-parity.h (proposals by Chris Metcalf)
- Tile uses popc-parity.h directly
- Mips uses popc-parity.h if has usable __builtin_popcount
- Add few comments in powerpc's and sparc's parity.S
- X86, remove custom calling convention

Changes to v1:
- Add runtime patching for powerpc, sparc, and x86
- Avr32 use grenric parity too
- Fix error in ssfdc's patch, and add commit message
- Don't change the original code composition of drivers/iio/gyro/adxrs450.c
- Directly assignement to phy_cap.parity in drivers/scsi/isci/phy.c

Regards,

=== diffstat ===

Zhaoxiu Zeng (29):
  bitops: add parity functions
  Include generic parity.h in some architectures' bitops.h
  Add alpha-specific parity functions
  Add blackfin-specific parity functions
  Add ia64-specific parity functions
  Tile and MIPS (if has usable __builtin_popcount) use popcount parity
functions
  Add powerpc-specific parity functions
  Add sparc-specific parity functions
  Add x86-specific parity functions
  sunrpc: use parity8
  mips: use parity functions in cerr-sb1.c
  bch: use parity32
  media: use parity8 in vivid-vbi-gen.c
  media: use parity functions in saa7115
  input: use parity32 in grip_mp
  input: use parity64 in sidewinder
  input: use parity16 in ams_delta_serio
  scsi: use parity32 in isci's phy
  mtd: use parity16 in ssfdc
  mtd: use parity functions in inftlcore
  crypto: use parity functions in qat_hal
  mtd: use parity16 in sm_ftl
  ethernet: use parity8 in sun/niu.c
  input: use parity8 in pcips2
  input: use parity8 in saps2
  iio: use parity32 in adxrs450
  serial: use parity32 in max3100
  input: use parity8 in elantech
  ethernet: use parity8 in broadcom/tg3.c

 arch/alpha/include/asm/bitops.h  |  27 +
 arch/arc/include/asm/bitops.h|   1 +
 arch/arm/include/asm/bitops.h|   1 +
 arch/arm64/include/asm/bitops.h  |   1 +
 arch/avr32/include/asm/bitops.h  |   1 +
 arch/blackfin/include/asm/bitops.h   |  31 ++
 arch/c6x/include/asm/bitops.h|   1 +
 arch/cris/include/asm/bitops.h   |   1 +
 arch/frv/include/asm/bitops.h|   1 +
 arch/h8300/include/asm/bitops.h  |   1 +
 arch/hexagon/include/asm/bitops.h|   1 +
 arch/ia64/include/asm/bitops.h   |  31 ++
 arch/m32r/include/asm/bitops.h   |   1 +
 arch/m68k/include/asm/bitops.h   |   1 +
 arch/metag/include/asm/bitops.h  |   1 +
 arch/mips/include/asm/bitops.h   |   7 ++
 arch/mips/mm/cerr-sb1.c  |  67 -
 arch/mn10300/include/asm/bitops.h|   1 +
 arch/openrisc/include/asm/bitops.h   |   1 +
 arch/parisc/include/asm/bitops.h |   1 +
 arch/powerpc/include/asm/bitops.h|  11 +++
 arch/powerpc/lib/Makefile|   2 +-
 arch/powerpc/lib/parity_64.S | 142 +++
 arch/powerpc/lib/ppc_ksyms.c |   5 +
 arch/s390/include/asm/bitops.h   |   1 +
 arch/sh/include/asm/bitops.h |   1 +
 arch/sparc/include/asm/bitops_32.h   |   1 +
 arch/sparc/include/asm/bitops_64.h   |  18 
 arch/sparc/kernel/sparc_ksyms_64.c   |   6 ++
 arch/sparc/lib/Makefile  |   2 +-
 arch/sparc/lib/parity.S  | 128 
 arch/tile/include/asm/bitops.h   |   2 +
 arch/x86/include/asm/arch_hweight.h  |   5 +
 arch/x86/include/asm/

[PATCH V3 00/29] bitops: add parity functions

2016-04-13 Thread zengzhaoxiu
From: Zhaoxiu Zeng 

When I do "grep parity -r linux", I found many parity calculations
distributed in many drivers.

This patch series does:
  1. provide generic and architecture-specific parity calculations
  2. remove drivers' local parity calculations, use bitops' parity
 functions instead
  3. replace "hweightN(x) & 1" with "parityN(x)" to improve readability,
 and improve performance on some CPUs that without popcount support

I did not use GCC's __builtin_parity* functions, based on the following reasons:
  1. I don't know where to identify which version of GCC from the beginning
 supported __builtin_parity for the architecture.
  2. For the architecture that doesn't has popcount instruction, GCC instead use
 "call __paritysi2" (__paritydi2 for 64-bits). So if use __builtin_parity, 
we must
 provide __paritysi2 and __paritydi2 functions for these architectures.
 Additionally, parity4,8,16 might be "__builtin_parity(x & mask)", but the 
"& mask"
 operation is totally unnecessary.
  3. For the architecture that has popcount instruction, we do the same things.
  4. For powerpc, sparc, and x86, we do runtime patching to use popcount 
instruction
 if the CPU support.

I have compiled successfully with x86_64_defconfig, i386_defconfig, 
pseries_defconfig
and sparc64_defconfig.

Changes to v2:
- Add constant PARITY_MAGIC (proposals by Sam Ravnborg)
- Add include/asm-generic/bitops/popc-parity.h (proposals by Chris Metcalf)
- Tile uses popc-parity.h directly
- Mips uses popc-parity.h if has usable __builtin_popcount
- Add few comments in powerpc's and sparc's parity.S
- X86, remove custom calling convention

Changes to v1:
- Add runtime patching for powerpc, sparc, and x86
- Avr32 use grenric parity too
- Fix error in ssfdc's patch, and add commit message
- Don't change the original code composition of drivers/iio/gyro/adxrs450.c
- Directly assignement to phy_cap.parity in drivers/scsi/isci/phy.c

Regards,

=== diffstat ===

Zhaoxiu Zeng (29):
  bitops: add parity functions
  Include generic parity.h in some architectures' bitops.h
  Add alpha-specific parity functions
  Add blackfin-specific parity functions
  Add ia64-specific parity functions
  Tile and MIPS (if has usable __builtin_popcount) use popcount parity
functions
  Add powerpc-specific parity functions
  Add sparc-specific parity functions
  Add x86-specific parity functions
  sunrpc: use parity8
  mips: use parity functions in cerr-sb1.c
  bch: use parity32
  media: use parity8 in vivid-vbi-gen.c
  media: use parity functions in saa7115
  input: use parity32 in grip_mp
  input: use parity64 in sidewinder
  input: use parity16 in ams_delta_serio
  scsi: use parity32 in isci's phy
  mtd: use parity16 in ssfdc
  mtd: use parity functions in inftlcore
  crypto: use parity functions in qat_hal
  mtd: use parity16 in sm_ftl
  ethernet: use parity8 in sun/niu.c
  input: use parity8 in pcips2
  input: use parity8 in saps2
  iio: use parity32 in adxrs450
  serial: use parity32 in max3100
  input: use parity8 in elantech
  ethernet: use parity8 in broadcom/tg3.c

 arch/alpha/include/asm/bitops.h  |  27 +
 arch/arc/include/asm/bitops.h|   1 +
 arch/arm/include/asm/bitops.h|   1 +
 arch/arm64/include/asm/bitops.h  |   1 +
 arch/avr32/include/asm/bitops.h  |   1 +
 arch/blackfin/include/asm/bitops.h   |  31 ++
 arch/c6x/include/asm/bitops.h|   1 +
 arch/cris/include/asm/bitops.h   |   1 +
 arch/frv/include/asm/bitops.h|   1 +
 arch/h8300/include/asm/bitops.h  |   1 +
 arch/hexagon/include/asm/bitops.h|   1 +
 arch/ia64/include/asm/bitops.h   |  31 ++
 arch/m32r/include/asm/bitops.h   |   1 +
 arch/m68k/include/asm/bitops.h   |   1 +
 arch/metag/include/asm/bitops.h  |   1 +
 arch/mips/include/asm/bitops.h   |   7 ++
 arch/mips/mm/cerr-sb1.c  |  67 -
 arch/mn10300/include/asm/bitops.h|   1 +
 arch/openrisc/include/asm/bitops.h   |   1 +
 arch/parisc/include/asm/bitops.h |   1 +
 arch/powerpc/include/asm/bitops.h|  11 +++
 arch/powerpc/lib/Makefile|   2 +-
 arch/powerpc/lib/parity_64.S | 142 +++
 arch/powerpc/lib/ppc_ksyms.c |   5 +
 arch/s390/include/asm/bitops.h   |   1 +
 arch/sh/include/asm/bitops.h |   1 +
 arch/sparc/include/asm/bitops_32.h   |   1 +
 arch/sparc/include/asm/bitops_64.h   |  18 
 arch/sparc/kernel/sparc_ksyms_64.c   |   6 ++
 arch/sparc/lib/Makefile  |   2 +-
 arch/sparc/lib/parity.S  | 128 
 arch/tile/include/asm/bitops.h   |   2 +
 arch/x86/include/asm/arch_hweight.h  |   5 +
 arch/x86/include/asm/

[patch V4] lib: GCD: Use binary GCD algorithm instead of Euclidean

2016-05-06 Thread zengzhaoxiu
From: Zhaoxiu Zeng 

The binary GCD algorithm is based on the following facts:
1. If a and b are all evens, then gcd(a,b) = 2 * gcd(a/2, b/2)
2. If a is even and b is odd, then gcd(a,b) = gcd(a/2, b)
3. If a and b are all odds, then gcd(a,b) = gcd((a-b)/2, b) = 
gcd((a+b)/2, b)

Even on x86 machines with reasonable division hardware, the binary
algorithm runs about 25% faster (80% the execution time) than the
division-based Euclidian algorithm.

On platforms like Alpha and ARMv6 where division is a function call to
emulation code, it's even more significant.

There are two variants of the code here, depending on whether a
fast __ffs (find least significant set bit) instruction is available.
This allows the unpredictable branches in the bit-at-a-time shifting
loop to be eliminated.

If fast __ffs is not available, the "even/odd" GCD variant is used.

I use the following code to benchmark:

#include 
#include 
#include 
#include 
#include 
#include 

#define swap(a, b) \
do { \
a ^= b; \
b ^= a; \
a ^= b; \
} while (0)

unsigned long gcd0(unsigned long a, unsigned long b)
{
unsigned long r;

if (a < b) {
swap(a, b);
}

if (b == 0)
return a;

while ((r = a % b) != 0) {
a = b;
b = r;
}

return b;
}

unsigned long gcd1(unsigned long a, unsigned long b)
{
unsigned long r = a | b;

if (!a || !b)
return r;

b >>= __builtin_ctzl(b);

for (;;) {
a >>= __builtin_ctzl(a);
if (a == b)
return a << __builtin_ctzl(r);

if (a < b)
swap(a, b);
a -= b;
}
}

unsigned long gcd2(unsigned long a, unsigned long b)
{
unsigned long r = a | b;

if (!a || !b)
return r;

r &= -r;

while (!(b & r))
b >>= 1;

for (;;) {
while (!(a & r))
a >>= 1;
if (a == b)
return a;

if (a < b)
swap(a, b);
a -= b;
a >>= 1;
if (a & r)
a += b;
a >>= 1;
}
}

unsigned long gcd3(unsigned long a, unsigned long b)
{
unsigned long r = a | b;

if (!a || !b)
return r;

b >>= __builtin_ctzl(b);
if (b == 1)
return r & -r;

for (;;) {
a >>= __builtin_ctzl(a);
if (a == 1)
return r & -r;
if (a == b)
return a << __builtin_ctzl(r);

if (a < b)
swap(a, b);
a -= b;
}
}

unsigned long gcd4(unsigned long a, unsigned long b)
{
unsigned long r = a | b;

if (!a || !b)
return r;

r &= -r;

while (!(b & r))
b >>= 1;
if (b == r)
return r;

for (;;) {
while (!(a & r))
a >>= 1;
if (a == r)
return r;
if (a == b)
return a;

if (a < b)
swap(a, b);
a -= b;
a >>= 1;
if (a & r)
a += b;
a >>= 1;
}
}

static unsigned long (*gcd_func[])(unsigned long a, unsigned long b) = {
gcd0, gcd1, gcd2, gcd3, gcd4,
};

#define TEST_ENTRIES (sizeof(gcd_func) / sizeof(gcd_func[0]))

#if defined(__x86_64__)

#define rdtscll(val) do { \
unsigned long __a,__d; \
__asm__ __volatile__("rdtsc" : "=a" (__a), "=d" (__d)); \
(val) = ((unsigned long long)__a) | (((unsigned long 
long)__d)<<32); \
} while(0)

static unsigned 

[patch V4 02/31] bitops: Include generic parity.h in some architectures' bitops.h

2016-05-11 Thread zengzhaoxiu
From: Zhaoxiu Zeng 

Simply use the generic version.

Signed-off-by: Zhaoxiu Zeng 
Acked-by: Hans-Christian Noren Egtvedt  [for avr32]
---
 arch/arc/include/asm/bitops.h  | 1 +
 arch/arm/include/asm/bitops.h  | 1 +
 arch/arm64/include/asm/bitops.h| 1 +
 arch/avr32/include/asm/bitops.h| 1 +
 arch/c6x/include/asm/bitops.h  | 1 +
 arch/cris/include/asm/bitops.h | 1 +
 arch/frv/include/asm/bitops.h  | 1 +
 arch/h8300/include/asm/bitops.h| 1 +
 arch/hexagon/include/asm/bitops.h  | 1 +
 arch/m32r/include/asm/bitops.h | 1 +
 arch/m68k/include/asm/bitops.h | 1 +
 arch/metag/include/asm/bitops.h| 1 +
 arch/mn10300/include/asm/bitops.h  | 1 +
 arch/openrisc/include/asm/bitops.h | 1 +
 arch/parisc/include/asm/bitops.h   | 1 +
 arch/s390/include/asm/bitops.h | 1 +
 arch/sh/include/asm/bitops.h   | 1 +
 arch/xtensa/include/asm/bitops.h   | 1 +
 18 files changed, 18 insertions(+)

diff --git a/arch/arc/include/asm/bitops.h b/arch/arc/include/asm/bitops.h
index 0352fb8..7967e47 100644
--- a/arch/arc/include/asm/bitops.h
+++ b/arch/arc/include/asm/bitops.h
@@ -370,6 +370,7 @@ static inline __attribute__ ((const)) int __ffs(unsigned 
long x)
 #define ffz(x) __ffs(~(x))
 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h
index e943e6c..99f28a6 100644
--- a/arch/arm/include/asm/bitops.h
+++ b/arch/arm/include/asm/bitops.h
@@ -313,6 +313,7 @@ static inline unsigned long __ffs(unsigned long x)
 
 #include 
 #include 
+#include 
 #include 
 
 #ifdef __ARMEB__
diff --git a/arch/arm64/include/asm/bitops.h b/arch/arm64/include/asm/bitops.h
index 9c19594..eac4965 100644
--- a/arch/arm64/include/asm/bitops.h
+++ b/arch/arm64/include/asm/bitops.h
@@ -44,6 +44,7 @@ extern int test_and_change_bit(int nr, volatile unsigned long 
*p);
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/avr32/include/asm/bitops.h b/arch/avr32/include/asm/bitops.h
index 910d537..9f4a2ce 100644
--- a/arch/avr32/include/asm/bitops.h
+++ b/arch/avr32/include/asm/bitops.h
@@ -298,6 +298,7 @@ static inline int ffs(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 #include 
 
 extern unsigned long find_next_zero_bit_le(const void *addr,
diff --git a/arch/c6x/include/asm/bitops.h b/arch/c6x/include/asm/bitops.h
index f0ab012..94eb0d1 100644
--- a/arch/c6x/include/asm/bitops.h
+++ b/arch/c6x/include/asm/bitops.h
@@ -87,6 +87,7 @@ static inline int ffs(int x)
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/cris/include/asm/bitops.h b/arch/cris/include/asm/bitops.h
index 8062cb5..06bc246 100644
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
diff --git a/arch/frv/include/asm/bitops.h b/arch/frv/include/asm/bitops.h
index 0df8e95..f2a7ee8 100644
--- a/arch/frv/include/asm/bitops.h
+++ b/arch/frv/include/asm/bitops.h
@@ -314,6 +314,7 @@ int __ilog2_u64(u64 n)
 
 #include 
 #include 
+#include 
 #include 
 
 #include 
diff --git a/arch/h8300/include/asm/bitops.h b/arch/h8300/include/asm/bitops.h
index 05999ab..e392db2 100644
--- a/arch/h8300/include/asm/bitops.h
+++ b/arch/h8300/include/asm/bitops.h
@@ -172,6 +172,7 @@ static inline unsigned long __ffs(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/hexagon/include/asm/bitops.h 
b/arch/hexagon/include/asm/bitops.h
index 5e4a59b..2df614e 100644
--- a/arch/hexagon/include/asm/bitops.h
+++ b/arch/hexagon/include/asm/bitops.h
@@ -290,6 +290,7 @@ static inline unsigned long __fls(unsigned long word)
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
diff --git a/arch/m32r/include/asm/bitops.h b/arch/m32r/include/asm/bitops.h
index 86ba2b4..e3cf46b 100644
--- a/arch/m32r/include/asm/bitops.h
+++ b/arch/m32r/include/asm/bitops.h
@@ -259,6 +259,7 @@ static __inline__ int test_and_change_bit(int nr, volatile 
void * addr)
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #endif /* __KERNEL__ */
diff --git a/arch/m68k/include/asm/bitops.h b/arch/m68k/include/asm/bitops.h
index b4a9b0d..fd673ea 100644
--- a/arch/m68k/include/asm/bitops.h
+++ b/arch/m68k/include/asm/bitops.h
@@ -519,6 +519,7 @@ static inline int __fls(int x)
 #include 
 #include 
 #include 
+#include 
 #include 
 #endif /* __KERNEL__ */
 
diff --git a/arch/metag/include/asm/bitops.h b/arch/metag/include/asm/bitops.h
index 2671134..ad13087 100644
--- a/arch/metag/include/asm/bitops.h
+++ b/arch/metag/include/asm/bitops.h
@@ -118,6 +118,7 @@ static inline int test_and_change_bit(unsigned int bit,
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
diff --git a/arch/mn10300/include/asm/bitops.h 
b/arch/mn10300/include/asm/bitops.h
index fe6f8e2..60761b7 100644
--- a/arch/mn10300/include/asm/bitops.h
+++ b/arch/mn10300/include/asm/bitops.h
@@ -22