NEON and VFPv4 are not optional for ARMv8. For cpuflag support we are
handling both as extensions. This is consistant with x86_64 which
implies SSE2 which is still handled as extension.
---
 configure                                   | 25 ++++++++++--
 libavutil/aarch64/Makefile                  |  1 +
 libavutil/aarch64/asm.S                     | 63 +++++++++++++++++++++++++++++
 libavutil/{cpu_internal.h => aarch64/cpu.c} | 22 ++++------
 libavutil/{cpu_internal.h => aarch64/cpu.h} | 22 +++++-----
 libavutil/cpu.c                             | 19 +++++++--
 libavutil/cpu.h                             |  1 +
 libavutil/cpu_internal.h                    |  1 +
 8 files changed, 123 insertions(+), 31 deletions(-)
 create mode 100644 libavutil/aarch64/Makefile
 create mode 100644 libavutil/aarch64/asm.S
 copy libavutil/{cpu_internal.h => aarch64/cpu.c} (64%)
 copy libavutil/{cpu_internal.h => aarch64/cpu.h} (57%)

diff --git a/configure b/configure
index fba8815..58640a4 100755
--- a/configure
+++ b/configure
@@ -1233,6 +1233,15 @@ ARCH_LIST='
     x86_64
 '
 
+# both NEON and VFPv4 are not really extensions for aarch64 since they are
+# not optional. They are handled as extensions to support disabling
+# optimizations via cpu flags. This is consistent with x86_64
+ARCH_EXT_LIST_AARCH64='
+    neon
+    vfp
+    vfpv4
+'
+
 ARCH_EXT_LIST_ARM='
     armv5te
     armv6
@@ -1260,6 +1269,7 @@ ARCH_EXT_LIST_X86='
 '
 
 ARCH_EXT_LIST="
+    $ARCH_EXT_LIST_AARCH64
     $ARCH_EXT_LIST_ARM
     $ARCH_EXT_LIST_X86
     altivec
@@ -1543,9 +1553,10 @@ CMDLINE_APPEND="
 armv5te_deps="arm"
 armv6_deps="arm"
 armv6t2_deps="arm"
-neon_deps="arm"
-vfp_deps="arm"
+neon_deps_any="aarch64 arm"
+vfp_deps_any="aarch64 arm"
 vfpv3_deps="vfp"
+vfpv4_deps="vfp"
 
 map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM
 
@@ -3597,7 +3608,11 @@ od -t x1 $TMPO | grep -q '42 *49 *47 *45' && enable 
bigendian
 
 check_inline_asm inline_asm_labels '"1:\n"'
 
-if enabled alpha; then
+if enabled aarch64; then
+    # internal assembler in clang 3.3 does not support this instruction
+    enabled neon  && check_insn neon  'ext   v0.8B, v0.8B, v1.8B, #1'
+    enabled vfpv4 && check_insn vfpv4 'fmadd d0,    d0,    d1,    d2'
+elif enabled alpha; then
 
     check_cflags -mieee
 
@@ -4249,6 +4264,10 @@ if enabled x86; then
     echo "EBX available             ${ebx_available-no}"
     echo "EBP available             ${ebp_available-no}"
 fi
+if enabled aarch64; then
+    echo "NEON enabled              ${neon-no}"
+    echo "VFPv4 enabled             ${vfpv4-no}"
+fi
 if enabled arm; then
     echo "ARMv5TE enabled           ${armv5te-no}"
     echo "ARMv6 enabled             ${armv6-no}"
diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
new file mode 100644
index 0000000..13d26a1
--- /dev/null
+++ b/libavutil/aarch64/Makefile
@@ -0,0 +1 @@
+OBJS += aarch64/cpu.o
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
new file mode 100644
index 0000000..7da18ce
--- /dev/null
+++ b/libavutil/aarch64/asm.S
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <[email protected]>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#ifdef __ELF__
+#   define ELF
+#else
+#   define ELF //
+#endif
+
+.macro  function name, export=0
+    .macro endfunc
+ELF     .size   \name, . - \name
+        .endfunc
+        .purgem endfunc
+    .endm
+        .text
+        .align          2
+    .if \export
+        .global EXTERN_ASM\name
+EXTERN_ASM\name:
+    .endif
+ELF     .type   \name, %function
+        .func   \name
+\name:
+.endm
+
+.macro  const   name, align=2
+    .macro endconst
+ELF     .size   \name, . - \name
+        .purgem endconst
+    .endm
+        .section        .rodata
+        .align          \align
+\name:
+.endm
+
+.macro  movrel rd, val
+#if CONFIG_PIC
+        adrp            \rd, #:pg_hi21:\val
+        add             \rd, \rd, #:lo12:\val
+#else
+        ldr             \rd, =\val
+#endif
+.endm
diff --git a/libavutil/cpu_internal.h b/libavutil/aarch64/cpu.c
similarity index 64%
copy from libavutil/cpu_internal.h
copy to libavutil/aarch64/cpu.c
index 08f6e85..b9894b5 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/aarch64/cpu.c
@@ -16,18 +16,12 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVUTIL_CPU_INTERNAL_H
-#define AVUTIL_CPU_INTERNAL_H
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+#include "config.h"
 
-#include "cpu.h"
-
-#define CPUEXT_SUFFIX(flags, suffix, cpuext)                            \
-    (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext))
-
-#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
-
-int ff_get_cpu_flags_arm(void);
-int ff_get_cpu_flags_ppc(void);
-int ff_get_cpu_flags_x86(void);
-
-#endif /* AVUTIL_CPU_INTERNAL_H */
+int ff_get_cpu_flags_aarch64(void)
+{
+    return AV_CPU_FLAG_NEON    * HAVE_NEON    |
+           AV_CPU_FLAG_VFPV4   * HAVE_VFPV4;
+}
diff --git a/libavutil/cpu_internal.h b/libavutil/aarch64/cpu.h
similarity index 57%
copy from libavutil/cpu_internal.h
copy to libavutil/aarch64/cpu.h
index 08f6e85..e90209b 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/aarch64/cpu.h
@@ -16,18 +16,20 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVUTIL_CPU_INTERNAL_H
-#define AVUTIL_CPU_INTERNAL_H
+#ifndef AVUTIL_AARCH64_CPU_H
+#define AVUTIL_AARCH64_CPU_H
 
-#include "cpu.h"
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
 
-#define CPUEXT_SUFFIX(flags, suffix, cpuext)                            \
-    (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext))
+#define have_neon(flags)    CPUEXT(flags, NEON)
+#define have_vfpv4(flags)   CPUEXT(flags, VFPV3)
 
-#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
+#define have_neon_external(flags)    CPUEXT_SUFFIX(flags, _EXTERNAL, NEON)
+#define have_vfpv4_external(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, VFPV3)
 
-int ff_get_cpu_flags_arm(void);
-int ff_get_cpu_flags_ppc(void);
-int ff_get_cpu_flags_x86(void);
+#define have_neon_inline(flags)      CPUEXT_SUFFIX(flags, _INLINE, NEON)
+#define have_vfpv4_inline(flags)     CPUEXT_SUFFIX(flags, _INLINE, VFPV3)
 
-#endif /* AVUTIL_CPU_INTERNAL_H */
+#endif /* AVUTIL_AARCH64_CPU_H */
diff --git a/libavutil/cpu.c b/libavutil/cpu.c
index 25af4c5..02d54e9 100644
--- a/libavutil/cpu.c
+++ b/libavutil/cpu.c
@@ -51,9 +51,14 @@ int av_get_cpu_flags(void)
     if (checked)
         return flags;
 
-    if (ARCH_ARM) flags = ff_get_cpu_flags_arm();
-    if (ARCH_PPC) flags = ff_get_cpu_flags_ppc();
-    if (ARCH_X86) flags = ff_get_cpu_flags_x86();
+    if (ARCH_AARCH64)
+        flags = ff_get_cpu_flags_aarch64();
+    if (ARCH_ARM)
+        flags = ff_get_cpu_flags_arm();
+    if (ARCH_PPC)
+        flags = ff_get_cpu_flags_ppc();
+    if (ARCH_X86)
+        flags = ff_get_cpu_flags_x86();
 
     flags  &= cpuflags_mask;
     checked = 1;
@@ -114,6 +119,9 @@ int av_parse_cpu_flags(const char *s)
         { "vfp",      NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFP     
 },    .unit = "flags" },
         { "vfpv3",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFPV3   
 },    .unit = "flags" },
         { "neon",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON    
 },    .unit = "flags" },
+#elif ARCH_AARCH64
+        { "neon",     NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_NEON    
 },    .unit = "flags" },
+        { "vfpv4",    NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_VFPV4   
 },    .unit = "flags" },
 #endif
         { NULL },
     };
@@ -170,7 +178,10 @@ static const struct {
     int flag;
     const char *name;
 } cpu_flag_tab[] = {
-#if   ARCH_ARM
+#if   ARCH_AARCH64
+    { AV_CPU_FLAG_NEON,      "neon"       },
+    { AV_CPU_FLAG_VFPV4,     "vfpv4"      },
+#elif ARCH_ARM
     { AV_CPU_FLAG_ARMV5TE,   "armv5te"    },
     { AV_CPU_FLAG_ARMV6,     "armv6"      },
     { AV_CPU_FLAG_ARMV6T2,   "armv6t2"    },
diff --git a/libavutil/cpu.h b/libavutil/cpu.h
index 29036e3..1b42018 100644
--- a/libavutil/cpu.h
+++ b/libavutil/cpu.h
@@ -58,6 +58,7 @@
 #define AV_CPU_FLAG_VFP          (1 << 3)
 #define AV_CPU_FLAG_VFPV3        (1 << 4)
 #define AV_CPU_FLAG_NEON         (1 << 5)
+#define AV_CPU_FLAG_VFPV4        (1 << 6)
 
 /**
  * Return the flags which specify extensions supported by the CPU.
diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h
index 08f6e85..3bfe8a8 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/cpu_internal.h
@@ -26,6 +26,7 @@
 
 #define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
 
+int ff_get_cpu_flags_aarch64(void);
 int ff_get_cpu_flags_arm(void);
 int ff_get_cpu_flags_ppc(void);
 int ff_get_cpu_flags_x86(void);
-- 
1.8.5.2

_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to