On Sat, 11 Jan 2014, Janne Grunau wrote:

NEON and VFPv4 are not optional for ARMv8. For cpuflag support we are
handling both as extensions. This is consistant with x86_64 which
implies SSE2 which is still handled as extension.
---
configure                                   | 25 ++++++++++--
libavutil/aarch64/Makefile                  |  1 +
libavutil/aarch64/asm.S                     | 63 +++++++++++++++++++++++++++++
libavutil/{cpu_internal.h => aarch64/cpu.c} | 22 ++++------
libavutil/{cpu_internal.h => aarch64/cpu.h} | 22 +++++-----
libavutil/cpu.c                             | 19 +++++++--
libavutil/cpu.h                             |  1 +
libavutil/cpu_internal.h                    |  1 +
8 files changed, 123 insertions(+), 31 deletions(-)
create mode 100644 libavutil/aarch64/Makefile
create mode 100644 libavutil/aarch64/asm.S
copy libavutil/{cpu_internal.h => aarch64/cpu.c} (64%)
copy libavutil/{cpu_internal.h => aarch64/cpu.h} (57%)

diff --git a/configure b/configure
index fba8815..58640a4 100755
--- a/configure
+++ b/configure
@@ -1233,6 +1233,15 @@ ARCH_LIST='
    x86_64
'

+# both NEON and VFPv4 are not really extensions for aarch64 since they are
+# not optional. They are handled as extensions to support disabling
+# optimizations via cpu flags. This is consistent with x86_64
+ARCH_EXT_LIST_AARCH64='
+    neon
+    vfp
+    vfpv4
+'
+
ARCH_EXT_LIST_ARM='
    armv5te
    armv6
@@ -1260,6 +1269,7 @@ ARCH_EXT_LIST_X86='
'

ARCH_EXT_LIST="
+    $ARCH_EXT_LIST_AARCH64
    $ARCH_EXT_LIST_ARM
    $ARCH_EXT_LIST_X86
    altivec
@@ -1543,9 +1553,10 @@ CMDLINE_APPEND="
armv5te_deps="arm"
armv6_deps="arm"
armv6t2_deps="arm"
-neon_deps="arm"
-vfp_deps="arm"
+neon_deps_any="aarch64 arm"
+vfp_deps_any="aarch64 arm"
vfpv3_deps="vfp"
+vfpv4_deps="vfp"

map 'eval ${v}_inline_deps=inline_asm' $ARCH_EXT_LIST_ARM

@@ -3597,7 +3608,11 @@ od -t x1 $TMPO | grep -q '42 *49 *47 *45' && enable 
bigendian

check_inline_asm inline_asm_labels '"1:\n"'

-if enabled alpha; then
+if enabled aarch64; then
+    # internal assembler in clang 3.3 does not support this instruction
+    enabled neon  && check_insn neon  'ext   v0.8B, v0.8B, v1.8B, #1'
+    enabled vfpv4 && check_insn vfpv4 'fmadd d0,    d0,    d1,    d2'
+elif enabled alpha; then

    check_cflags -mieee

@@ -4249,6 +4264,10 @@ if enabled x86; then
    echo "EBX available             ${ebx_available-no}"
    echo "EBP available             ${ebp_available-no}"
fi
+if enabled aarch64; then
+    echo "NEON enabled              ${neon-no}"
+    echo "VFPv4 enabled             ${vfpv4-no}"
+fi
if enabled arm; then
    echo "ARMv5TE enabled           ${armv5te-no}"
    echo "ARMv6 enabled             ${armv6-no}"
diff --git a/libavutil/aarch64/Makefile b/libavutil/aarch64/Makefile
new file mode 100644
index 0000000..13d26a1
--- /dev/null
+++ b/libavutil/aarch64/Makefile
@@ -0,0 +1 @@
+OBJS += aarch64/cpu.o
diff --git a/libavutil/aarch64/asm.S b/libavutil/aarch64/asm.S
new file mode 100644
index 0000000..7da18ce
--- /dev/null
+++ b/libavutil/aarch64/asm.S
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <[email protected]>
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+#ifdef __ELF__
+#   define ELF
+#else
+#   define ELF //
+#endif
+
+.macro  function name, export=0
+    .macro endfunc
+ELF     .size   \name, . - \name
+        .endfunc
+        .purgem endfunc
+    .endm
+        .text
+        .align          2

It would probably be nice to add the align parameter to match the 32 bit version of the header.

+    .if \export
+        .global EXTERN_ASM\name
+EXTERN_ASM\name:
+    .endif
+ELF     .type   \name, %function
+        .func   \name
+\name:
+.endm
+
+.macro  const   name, align=2
+    .macro endconst
+ELF     .size   \name, . - \name
+        .purgem endconst
+    .endm
+        .section        .rodata
+        .align          \align
+\name:
+.endm
+
+.macro  movrel rd, val
+#if CONFIG_PIC
+        adrp            \rd, #:pg_hi21:\val
+        add             \rd, \rd, #:lo12:\val
+#else
+        ldr             \rd, =\val
+#endif
+.endm
diff --git a/libavutil/cpu_internal.h b/libavutil/aarch64/cpu.c
similarity index 64%
copy from libavutil/cpu_internal.h
copy to libavutil/aarch64/cpu.c
index 08f6e85..b9894b5 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/aarch64/cpu.c
@@ -16,18 +16,12 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

-#ifndef AVUTIL_CPU_INTERNAL_H
-#define AVUTIL_CPU_INTERNAL_H
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"
+#include "config.h"

-#include "cpu.h"
-
-#define CPUEXT_SUFFIX(flags, suffix, cpuext)                            \
-    (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext))
-
-#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
-
-int ff_get_cpu_flags_arm(void);
-int ff_get_cpu_flags_ppc(void);
-int ff_get_cpu_flags_x86(void);
-
-#endif /* AVUTIL_CPU_INTERNAL_H */
+int ff_get_cpu_flags_aarch64(void)
+{
+    return AV_CPU_FLAG_NEON    * HAVE_NEON    |
+           AV_CPU_FLAG_VFPV4   * HAVE_VFPV4;
+}

The git copy detection makes more harm than help here IMO but the code itself looks ok.

diff --git a/libavutil/cpu_internal.h b/libavutil/aarch64/cpu.h
similarity index 57%
copy from libavutil/cpu_internal.h
copy to libavutil/aarch64/cpu.h
index 08f6e85..e90209b 100644
--- a/libavutil/cpu_internal.h
+++ b/libavutil/aarch64/cpu.h
@@ -16,18 +16,20 @@
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

-#ifndef AVUTIL_CPU_INTERNAL_H
-#define AVUTIL_CPU_INTERNAL_H
+#ifndef AVUTIL_AARCH64_CPU_H
+#define AVUTIL_AARCH64_CPU_H

-#include "cpu.h"
+#include "config.h"
+#include "libavutil/cpu.h"
+#include "libavutil/cpu_internal.h"

-#define CPUEXT_SUFFIX(flags, suffix, cpuext)                            \
-    (HAVE_ ## cpuext ## suffix && ((flags) & AV_CPU_FLAG_ ## cpuext))
+#define have_neon(flags)    CPUEXT(flags, NEON)
+#define have_vfpv4(flags)   CPUEXT(flags, VFPV3)

-#define CPUEXT(flags, cpuext) CPUEXT_SUFFIX(flags, , cpuext)
+#define have_neon_external(flags)    CPUEXT_SUFFIX(flags, _EXTERNAL, NEON)
+#define have_vfpv4_external(flags)   CPUEXT_SUFFIX(flags, _EXTERNAL, VFPV3)

-int ff_get_cpu_flags_arm(void);
-int ff_get_cpu_flags_ppc(void);
-int ff_get_cpu_flags_x86(void);
+#define have_neon_inline(flags)      CPUEXT_SUFFIX(flags, _INLINE, NEON)
+#define have_vfpv4_inline(flags)     CPUEXT_SUFFIX(flags, _INLINE, VFPV3)


We don't have the _external and _inline ones on 32 bit arm - I guess this is ok but I'll want to think about this again when I get to the other patches on how you use them there (but I wanted to send the rest of this review before getting there).

// Martin
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to