On Sat, 11 Jan 2014, Janne Grunau wrote:
NEON and VFPv4 are not optional for ARMv8. For cpuflag support we are
handling both as extensions. This is consistant with x86_64 which
implies SSE2 which is still handled as extension.
---
configure | 25 ++++++++++--
libavutil/aarch64/Makefile | 1 +
libavutil/aarch64/asm.S | 63 +++++++++++++++++++++++++++++
libavutil/{cpu_internal.h => aarch64/cpu.c} | 22 ++++------
libavutil/{cpu_internal.h => aarch64/cpu.h} | 22 +++++-----
libavutil/cpu.c | 19 +++++++--
libavutil/cpu.h | 1 +
libavutil/cpu_internal.h | 1 +
8 files changed, 123 insertions(+), 31 deletions(-)
create mode 100644 libavutil/aarch64/Makefile
create mode 100644 libavutil/aarch64/asm.S
copy libavutil/{cpu_internal.h => aarch64/cpu.c} (64%)
copy libavutil/{cpu_internal.h => aarch64/cpu.h} (57%)
diff --git a/configure b/configure
index fba8815..58640a4 100755
--- a/configure
+++ b/configure
@@ -1233,6 +1233,15 @@ ARCH_LIST='
x86_64
'
+# both NEON and VFPv4 are not really extensions for aarch64 since they are
+# not optional. They are handled as extensions to support disabling
+# optimizations via cpu flags. This is consistent with x86_64
+ARCH_EXT_LIST_AARCH64='
+ neon
+ vfp
+ vfpv4
+'
+
ARCH_EXT_LIST_ARM='
armv5te
armv6
@@ -1260,6 +1269,7 @@ ARCH_EXT_LIST_X86='
'
ARCH_EXT_LIST="
+ $ARCH_EXT_LIST_AARCH64
$ARCH_EXT_LIST_ARM
$ARCH_EXT_LIST_X86
altivec
@@ -1543,9 +1553,10 @@ CMDLINE_APPEND="
armv5te_deps="arm"
armv6_deps="arm"
armv6t2_deps="arm"
-neon_deps="arm"
-vfp_deps="arm"
+neon_deps_any="aarch64 arm"
+vfp_deps_any="aarch64 arm"
vfpv3_deps="vfp"
+vfpv4_deps="vfp"
map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
@@ -3597,7 +3608,11 @@ od -t x1 $TMPO | grep -q '42 *49 *47 *45' && enable
bigendian
check_inline_asm inline_asm_labels '"1:\n"'
-if enabled alpha; then
+if enabled aarch64; then
+ # internal assembler in clang 3.3 does not support this instruction
+ enabled neon && check_insn neon 'ext v0.8B, v0.8B, v1.8B, #1'
+ enabled vfpv4 && check_insn vfpv4 'fmadd d0, d0, d1, d2'
+elif enabled alpha; then
check_cflags -mieee
@@ -4249,6 +4264,10 @@ if enabled x86; then
echo "EBX available ${ebx_available-no}"
echo "EBP available ${ebp_available-no}"
fi
+if enabled aarch64; then
+ echo "NEON enabled ${neon-no}"
+ echo "VFPv4 enabled ${vfpv4-no}"
+fi
if enabled arm; then
echo "ARMv5TE enabled ${armv5te-no}"
echo "ARMv6 enabled ${armv6-no}"
diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
new file mode 100644
index 0000000..13d26a1
--- /dev/null
+++ b/libavutil/aarch64/Makefile
@@ -0,0 +1 @@
+OBJS += aarch64/cpu.o
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
new file mode 100644
index 0000000..7da18ce
--- /dev/null
+++ b/libavutil/aarch64/asm.S
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <[email protected]>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#ifdef __ELF__
+# define ELF
+#else
+# define ELF //
+#endif
+
+.macro function name, export=0
+ .macro endfunc
+ELF .size \name, . - \name
+ .endfunc
+ .purgem endfunc
+ .endm
+ .text
+ .align 2
It would probably be nice to add the align parameter to match the 32 bit
version of the header.
+ .if \export
+ .global EXTERN_ASM\name
+EXTERN_ASM\name:
+ .endif
+ELF .type \name, %function
+ .func \name
+\name:
+.endm
+
+.macro const name, align=2
+ .macro endconst
+ELF .size \name, . - \name
+ .purgem endconst
+ .endm
+ .section .rodata
+ .align \align
+\name:
+.endm
+
+.macro movrel rd, val
+#if CONFIG_PIC
+ adrp \rd, #:pg_hi21:\val
+ add \rd, \rd, #:lo12:\val
+#else
+ ldr \rd, =\val
+#endif
+.endm
diff --git a/libavutil/cpu_internal.h b/libavutil/aarch64/cpu.c
similarity index 64%
copy from libavutil/cpu_internal.h
copy to libavutil/aarch64/cpu.c
index 08f6e85..b9894b5 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/aarch64/cpu.c
@@ -16,18 +16,12 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifndef AVUTIL_CPU_INTERNAL_H
-#define AVUTIL_CPU_INTERNAL_H
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+#include "config.h"
-#include "cpu.h"
-
-#define CPUEXT_SUFFIX(flags, suffix, cpuext) \
- (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext))
-
-#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
-
-int ff_get_cpu_flags_arm(void);
-int ff_get_cpu_flags_ppc(void);
-int ff_get_cpu_flags_x86(void);
-
-#endif /* AVUTIL_CPU_INTERNAL_H */
+int ff_get_cpu_flags_aarch64(void)
+{
+ return AV_CPU_FLAG_NEON * HAVE_NEON |
+ AV_CPU_FLAG_VFPV4 * HAVE_VFPV4;
+}
The git copy detection makes more harm than help here IMO but the code
itself looks ok.
diff --git a/libavutil/cpu_internal.h b/libavutil/aarch64/cpu.h
similarity index 57%
copy from libavutil/cpu_internal.h
copy to libavutil/aarch64/cpu.h
index 08f6e85..e90209b 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/aarch64/cpu.h
@@ -16,18 +16,20 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#ifndef AVUTIL_CPU_INTERNAL_H
-#define AVUTIL_CPU_INTERNAL_H
+#ifndef AVUTIL_AARCH64_CPU_H
+#define AVUTIL_AARCH64_CPU_H
-#include "cpu.h"
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
-#define CPUEXT_SUFFIX(flags, suffix, cpuext) \
- (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext))
+#define have_neon(flags) CPUEXT(flags, NEON)
+#define have_vfpv4(flags) CPUEXT(flags, VFPV3)
-#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
+#define have_neon_external(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, NEON)
+#define have_vfpv4_external(flags) CPUEXT_SUFFIX(flags, _EXTERNAL, VFPV3)
-int ff_get_cpu_flags_arm(void);
-int ff_get_cpu_flags_ppc(void);
-int ff_get_cpu_flags_x86(void);
+#define have_neon_inline(flags) CPUEXT_SUFFIX(flags, _INLINE, NEON)
+#define have_vfpv4_inline(flags) CPUEXT_SUFFIX(flags, _INLINE, VFPV3)
We don't have the _external and _inline ones on 32 bit arm - I guess this
is ok but I'll want to think about this again when I get to the other
patches on how you use them there (but I wanted to send the rest of this
review before getting there).
// Martin
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel