On Tue, Apr 21, 2015 at 05:58:39PM +0200, Jakub Jelinek wrote: > Attached is a minimal patch to get at least a trivial OpenMP 4.0 testcase > offloading to NVPTX (the first patch). The second patch is WIP, just first > few needed changes to make libgomp to build for NVPTX (several weeks of work > at least).
Here is an updated patch, which allows libgomp.a to be built on nvptx-none target. Nothing is really tested and there will be a lot of work porting it, so that it will actually work properly, but at least it is a start. --- libgomp/configure.tgt.jj 2015-04-21 08:38:00.000000000 +0200 +++ libgomp/configure.tgt 2015-04-23 16:19:07.179401650 +0200 @@ -7,6 +7,8 @@ # config_path An ordered list of directories to search for # sources and headers. This is relative to the # config subdirectory of the source tree. +# use_pthreads "yes" if POSIX threads should be used. +# broken_alloca "yes" if alloca nor VLAs should be used in libgomp. # XCFLAGS Add extra compile flags to use. # XLDFLAGS Add extra link flags to use. @@ -27,8 +29,10 @@ if test $gcc_cv_have_tls = yes ; then esac fi -# Since we require POSIX threads, assume a POSIX system by default. +# On most targets we require POSIX threads, assume a POSIX system by default. config_path="posix" +use_pthreads=yes +broken_alloca=no # Check for futex enabled all at once. if test x$enable_linux_futex = xyes; then @@ -151,6 +155,12 @@ case "${target}" in XLDFLAGS="${XLDFLAGS} -lpthread" ;; + nvptx*-*-*) + config_path="nvptx" + use_pthreads=no + broken_alloca=yes + ;; + *) ;; --- libgomp/team.c.jj 2015-04-21 08:38:00.000000000 +0200 +++ libgomp/team.c 2015-04-23 13:13:19.654259364 +0200 @@ -30,11 +30,13 @@ #include <stdlib.h> #include <string.h> +#ifdef LIBGOMP_USE_PTHREADS /* This attribute contains PTHREAD_CREATE_DETACHED. */ pthread_attr_t gomp_thread_attr; /* This key is for the thread destructor. */ pthread_key_t gomp_thread_destructor; +#endif /* This is the libgomp per-thread data structure. */ @@ -59,6 +61,7 @@ struct gomp_thread_start_data }; +#ifdef LIBGOMP_USE_PTHREADS /* This function is a pthread_create entry point. This contains the idle loop in which a thread waits to be called up to become part of a team. */ @@ -133,6 +136,7 @@ gomp_thread_start (void *xdata) thr->task = NULL; return NULL; } +#endif /* Create a new team data structure. */ @@ -194,6 +198,7 @@ free_team (struct gomp_team *team) /* Allocate and initialize a thread pool. */ +#ifdef LIBGOMP_USE_PTHREADS static struct gomp_thread_pool *gomp_new_thread_pool (void) { struct gomp_thread_pool *pool @@ -204,6 +209,7 @@ static struct gomp_thread_pool *gomp_new pool->last_team = NULL; return pool; } +#endif static void gomp_free_pool_helper (void *thread_pool) @@ -215,7 +221,9 @@ gomp_free_pool_helper (void *thread_pool gomp_sem_destroy (&thr->release); thr->thread_pool = NULL; thr->task = NULL; +#ifdef LIBGOMP_USE_PTHREADS pthread_exit (NULL); +#endif } /* Free a thread pool and release its threads. */ @@ -267,6 +275,7 @@ gomp_free_thread (void *arg __attribute_ } } +#ifdef LIBGOMP_USE_PTHREADS /* Launch a team. */ void @@ -834,6 +843,7 @@ gomp_team_start (void (*fn) (void *), vo && team->prev_ts.place_partition_len > 64) free (affinity_thr); } +#endif /* Terminate the current team. This is only to be called by the master @@ -911,7 +921,7 @@ gomp_team_end (void) } } - +#ifdef LIBGOMP_USE_PTHREADS /* Constructors for this file. */ static void __attribute__((constructor)) @@ -935,6 +945,7 @@ team_destructor (void) crashes. */ pthread_key_delete (gomp_thread_destructor); } +#endif struct gomp_task_icv * gomp_new_icv (void) @@ -943,6 +954,8 @@ gomp_new_icv (void) struct gomp_task *task = gomp_malloc (sizeof (struct gomp_task)); gomp_init_task (task, NULL, &gomp_global_icv); thr->task = task; +#ifdef LIBGOMP_USE_PTHREADS pthread_setspecific (gomp_thread_destructor, thr); +#endif return &task->icv; } --- libgomp/config.h.in.jj 2015-04-21 08:38:01.000000000 +0200 +++ libgomp/config.h.in 2015-04-23 12:40:18.000000000 +0200 @@ -12,6 +12,9 @@ /* Define to 1 if the target supports __attribute__((visibility(...))). */ #undef HAVE_ATTRIBUTE_VISIBILITY +/* Define to 1 if neither alloca nor VLAs are usable. */ +#undef HAVE_BROKEN_ALLOCA + /* Define if the POSIX Semaphores do not work on your system. */ #undef HAVE_BROKEN_POSIX_SEMAPHORES @@ -39,6 +42,9 @@ /* Define if pthread_{,attr_}{g,s}etaffinity_np is supported. */ #undef HAVE_PTHREAD_AFFINITY_NP +/* Define to 1 if you have the <pthread.h> header file. */ +#undef HAVE_PTHREAD_H + /* Define to 1 if you have the <semaphore.h> header file. */ #undef HAVE_SEMAPHORE_H @@ -85,6 +91,9 @@ /* Define to 1 if GNU symbol versioning is used for libgomp. */ #undef LIBGOMP_GNU_SYMBOL_VERSIONING +/* Define to 1 if libgomp should use POSIX threads. */ +#undef LIBGOMP_USE_PTHREADS + /* Define to the sub-directory in which libtool stores uninstalled libraries. */ #undef LT_OBJDIR --- libgomp/Makefile.am.jj 2015-04-21 08:38:01.000000000 +0200 +++ libgomp/Makefile.am 2015-04-23 16:18:14.718252400 +0200 @@ -61,9 +61,12 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c single.c \ task.c team.c work.c lock.c mutex.c proc.c sem.c bar.c ptrlock.c \ - time.c fortran.c affinity.c target.c splay-tree.c libgomp-plugin.c \ + time.c fortran.c affinity.c +if USE_PTHREADS +libgomp_la_SOURCES += target.c splay-tree.c libgomp-plugin.c \ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \ oacc-plugin.c oacc-cuda.c +endif include $(top_srcdir)/plugin/Makefrag.am --- libgomp/Makefile.in.jj 2015-04-21 08:38:00.000000000 +0200 +++ libgomp/Makefile.in 2015-04-23 16:19:16.884244269 +0200 @@ -64,6 +64,10 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ target_triplet = @target@ +@USE_PTHREADS_TRUE@am__append_1 = target.c splay-tree.c libgomp-plugin.c \ +@USE_PTHREADS_TRUE@ oacc-parallel.c oacc-host.c oacc-init.c oacc-mem.c oacc-async.c \ +@USE_PTHREADS_TRUE@ oacc-plugin.c oacc-cuda.c + DIST_COMMON = $(top_srcdir)/plugin/Makefrag.am ChangeLog \ $(srcdir)/Makefile.in $(srcdir)/Makefile.am \ $(top_srcdir)/configure $(am__configure_deps) \ @@ -71,8 +75,8 @@ DIST_COMMON = $(top_srcdir)/plugin/Makef $(srcdir)/omp.h.in $(srcdir)/omp_lib.h.in \ $(srcdir)/omp_lib.f90.in $(srcdir)/libgomp_f.h.in \ $(srcdir)/libgomp.spec.in $(srcdir)/../depcomp -@PLUGIN_NVPTX_TRUE@am__append_1 = libgomp-plugin-nvptx.la -@USE_FORTRAN_TRUE@am__append_2 = openacc.f90 +@PLUGIN_NVPTX_TRUE@am__append_2 = libgomp-plugin-nvptx.la +@USE_FORTRAN_TRUE@am__append_3 = openacc.f90 subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ @@ -146,15 +150,16 @@ libgomp_plugin_nvptx_la_LINK = $(LIBTOOL @PLUGIN_NVPTX_TRUE@am_libgomp_plugin_nvptx_la_rpath = -rpath \ @PLUGIN_NVPTX_TRUE@ $(toolexeclibdir) libgomp_la_LIBADD = -@USE_FORTRAN_TRUE@am__objects_1 = openacc.lo +@USE_PTHREADS_TRUE@am__objects_1 = target.lo splay-tree.lo \ +@USE_PTHREADS_TRUE@ libgomp-plugin.lo oacc-parallel.lo \ +@USE_PTHREADS_TRUE@ oacc-host.lo oacc-init.lo oacc-mem.lo \ +@USE_PTHREADS_TRUE@ oacc-async.lo oacc-plugin.lo oacc-cuda.lo +@USE_FORTRAN_TRUE@am__objects_2 = openacc.lo am_libgomp_la_OBJECTS = alloc.lo barrier.lo critical.lo env.lo \ error.lo iter.lo iter_ull.lo loop.lo loop_ull.lo ordered.lo \ parallel.lo sections.lo single.lo task.lo team.lo work.lo \ lock.lo mutex.lo proc.lo sem.lo bar.lo ptrlock.lo time.lo \ - fortran.lo affinity.lo target.lo splay-tree.lo \ - libgomp-plugin.lo oacc-parallel.lo oacc-host.lo oacc-init.lo \ - oacc-mem.lo oacc-async.lo oacc-plugin.lo oacc-cuda.lo \ - $(am__objects_1) + fortran.lo affinity.lo $(am__objects_1) $(am__objects_2) libgomp_la_OBJECTS = $(am_libgomp_la_OBJECTS) DEFAULT_INCLUDES = -I.@am__isrc@ depcomp = $(SHELL) $(top_srcdir)/../depcomp @@ -373,7 +378,7 @@ libsubincludedir = $(libdir)/gcc/$(targe AM_CPPFLAGS = $(addprefix -I, $(search_path)) AM_CFLAGS = $(XCFLAGS) AM_LDFLAGS = $(XLDFLAGS) $(SECTION_LDFLAGS) $(OPT_LDFLAGS) -toolexeclib_LTLIBRARIES = libgomp.la $(am__append_1) \ +toolexeclib_LTLIBRARIES = libgomp.la $(am__append_2) \ libgomp-plugin-host_nonshm.la nodist_toolexeclib_HEADERS = libgomp.spec @@ -395,10 +400,8 @@ libgomp_la_LINK = $(LINK) $(libgomp_la_L libgomp_la_SOURCES = alloc.c barrier.c critical.c env.c error.c iter.c \ iter_ull.c loop.c loop_ull.c ordered.c parallel.c sections.c \ single.c task.c team.c work.c lock.c mutex.c proc.c sem.c \ - bar.c ptrlock.c time.c fortran.c affinity.c target.c \ - splay-tree.c libgomp-plugin.c oacc-parallel.c oacc-host.c \ - oacc-init.c oacc-mem.c oacc-async.c oacc-plugin.c oacc-cuda.c \ - $(am__append_2) + bar.c ptrlock.c time.c fortran.c affinity.c $(am__append_1) \ + $(am__append_3) # Nvidia PTX OpenACC plugin. @PLUGIN_NVPTX_TRUE@libgomp_plugin_nvptx_version_info = -version-info $(libtool_VERSION) --- libgomp/plugin/plugin-nvptx.c.jj 2015-04-21 08:38:00.000000000 +0200 +++ libgomp/plugin/plugin-nvptx.c 2015-04-21 16:55:25.247470080 +0200 @@ -978,8 +978,8 @@ event_add (enum ptx_event_type type, CUe void nvptx_exec (void (*fn), size_t mapnum, void **hostaddrs, void **devaddrs, - size_t *sizes, unsigned short *kinds, int num_gangs, int num_workers, - int vector_length, int async, void *targ_mem_desc) + size_t *sizes, unsigned short *kinds, int num_gangs, + int num_workers, int vector_length, int async, void *targ_mem_desc) { struct targ_fn_descriptor *targ_fn = (struct targ_fn_descriptor *) fn; CUfunction function; @@ -1137,7 +1137,6 @@ nvptx_host2dev (void *d, const void *h, CUresult r; CUdeviceptr pb; size_t ps; - struct nvptx_thread *nvthd = nvptx_thread (); if (!s) return 0; @@ -1162,7 +1161,8 @@ nvptx_host2dev (void *d, const void *h, GOMP_PLUGIN_fatal ("invalid size"); #ifndef DISABLE_ASYNC - if (nvthd->current_stream != nvthd->ptx_dev->null_stream) + struct nvptx_thread *nvthd = nvptx_thread (); + if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream) { CUevent *e; @@ -1202,7 +1202,6 @@ nvptx_dev2host (void *h, const void *d, CUresult r; CUdeviceptr pb; size_t ps; - struct nvptx_thread *nvthd = nvptx_thread (); if (!s) return 0; @@ -1227,7 +1226,8 @@ nvptx_dev2host (void *h, const void *d, GOMP_PLUGIN_fatal ("invalid size"); #ifndef DISABLE_ASYNC - if (nvthd->current_stream != nvthd->ptx_dev->null_stream) + struct nvptx_thread *nvthd = nvptx_thread (); + if (nvthd && nvthd->current_stream != nvthd->ptx_dev->null_stream) { CUevent *e; @@ -1559,7 +1559,8 @@ GOMP_OFFLOAD_get_name (void) unsigned int GOMP_OFFLOAD_get_caps (void) { - return GOMP_OFFLOAD_CAP_OPENACC_200; + return GOMP_OFFLOAD_CAP_OPENACC_200 + | GOMP_OFFLOAD_CAP_OPENMP_400; } int @@ -1759,7 +1760,7 @@ GOMP_OFFLOAD_openacc_parallel (void (*fn void *targ_mem_desc) { nvptx_exec (fn, mapnum, hostaddrs, devaddrs, sizes, kinds, num_gangs, - num_workers, vector_length, async, targ_mem_desc); + num_workers, vector_length, async, targ_mem_desc); } void @@ -1889,3 +1890,27 @@ GOMP_OFFLOAD_openacc_set_cuda_stream (in { return nvptx_set_cuda_stream (async, stream); } + +void +GOMP_OFFLOAD_run (int ord, void *tgt_fn, void *tgt_vars) +{ + CUfunction function = ((struct targ_fn_descriptor *) tgt_fn)->fn; + CUresult r; + struct ptx_device *ptx_dev = ptx_devices[ord]; + const char *maybe_abort_msg = "(perhaps abort was called)"; + void *args = &tgt_vars; + + r = cuLaunchKernel (function, + 1, 1, 1, + 1, 1, 1, + 0, ptx_dev->null_stream->stream, &args, 0); + if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuLaunchKernel error: %s", cuda_error (r)); + + r = cuCtxSynchronize (); + if (r == CUDA_ERROR_LAUNCH_FAILED) + GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s %s\n", cuda_error (r), + maybe_abort_msg); + else if (r != CUDA_SUCCESS) + GOMP_PLUGIN_fatal ("cuCtxSynchronize error: %s", cuda_error (r)); +} --- libgomp/task.c.jj 2015-04-21 08:38:00.000000000 +0200 +++ libgomp/task.c 2015-04-23 12:48:09.158446644 +0200 @@ -162,11 +162,22 @@ GOMP_task (void (*fn) (void *), void *da thr->task = &task; if (__builtin_expect (cpyfn != NULL, 0)) { +#ifdef HAVE_BROKEN_ALLOCA + char buf_fixed[128]; + char *buf = buf_fixed; + if (arg_size + arg_align - 1 > sizeof buf_fixed) + buf = gomp_malloc (arg_size + arg_align - 1); +#else char buf[arg_size + arg_align - 1]; +#endif char *arg = (char *) (((uintptr_t) buf + arg_align - 1) & ~(uintptr_t) (arg_align - 1)); cpyfn (arg, data); fn (arg); +#ifdef HAVE_BROKEN_ALLOCA + if (buf != buf_fixed) + free (buf); +#endif } else fn (data); --- libgomp/libgomp.h.jj 2015-04-21 08:38:00.000000000 +0200 +++ libgomp/libgomp.h 2015-04-23 12:08:50.410950510 +0200 @@ -40,7 +40,9 @@ #include "gstdint.h" #include "libgomp-plugin.h" +#ifdef HAVE_PTHREAD_H #include <pthread.h> +#endif #include <stdbool.h> #include <stdlib.h> #include <stdarg.h> @@ -508,14 +510,18 @@ static inline struct gomp_task_icv *gomp } /* The attributes to be used during thread creation. */ +#ifdef LIBGOMP_USE_PTHREADS extern pthread_attr_t gomp_thread_attr; +#endif /* Function prototypes. */ /* affinity.c */ extern void gomp_init_affinity (void); +#ifdef LIBGOMP_USE_PTHREADS extern void gomp_init_thread_affinity (pthread_attr_t *, unsigned int); +#endif extern void **gomp_affinity_alloc (unsigned long, bool); extern void gomp_affinity_init_place (void *); extern bool gomp_affinity_add_cpus (void *, unsigned long, unsigned long, --- libgomp/configure.ac.jj 2015-04-21 08:38:00.000000000 +0200 +++ libgomp/configure.ac 2015-04-23 16:16:09.358286266 +0200 @@ -179,6 +179,9 @@ case "$host" in *-*-rtems*) # RTEMS supports Pthreads, but the library is not available at GCC build time. ;; + nvptx*-*-*) + # NVPTX does not support Pthreads, has its own code replacement. + ;; *) # Check to see if -pthread or -lpthread is needed. Prefer the former. # In case the pthread.h system header is not found, this test will fail. @@ -268,6 +271,18 @@ CFLAGS="$save_CFLAGS $XCFLAGS" # had a chance to set XCFLAGS. LIBGOMP_CHECK_SYNC_BUILTINS +if test x$use_pthreads = xyes; then + AC_DEFINE(LIBGOMP_USE_PTHREADS, 1, + [Define to 1 if libgomp should use POSIX threads.]) +fi + +if test x$broken_alloca = xyes; then + AC_DEFINE(HAVE_BROKEN_ALLOCA, 1, + [Define to 1 if neither alloca nor VLAs are usable.]) +fi + +AM_CONDITIONAL([USE_PTHREADS], [test "x$use_pthreads" = xyes]) + XCFLAGS="$XCFLAGS$XPCFLAGS" AC_SUBST(config_path) --- libgomp/env.c.jj 2015-04-21 08:38:00.000000000 +0200 +++ libgomp/env.c 2015-04-23 12:18:03.238667435 +0200 @@ -82,6 +82,7 @@ int gomp_debug_var; char *goacc_device_type; int goacc_device_num; +#ifdef LIBGOMP_USE_PTHREADS /* Parse the OMP_SCHEDULE environment variable. */ static void @@ -1297,6 +1298,7 @@ initialize_env (void) goacc_runtime_initialize (); } +#endif /* The public OpenMP API routines that access these variables. */ --- libgomp/config/nvptx/mutex.h.jj 2015-04-23 14:22:11.549627818 +0200 +++ libgomp/config/nvptx/mutex.h 2015-04-23 14:22:06.834706015 +0200 @@ -0,0 +1,65 @@ +/* Copyright (C) 2005-2015 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This is a Linux specific implementation of a mutex synchronization + mechanism for libgomp. This type is private to the library. This + implementation uses atomic instructions and the futex syscall. */ + +#ifndef GOMP_MUTEX_H +#define GOMP_MUTEX_H 1 + +typedef int gomp_mutex_t; + +#define GOMP_MUTEX_INIT_0 1 + +extern void gomp_mutex_lock_slow (gomp_mutex_t *mutex, int); +extern void gomp_mutex_unlock_slow (gomp_mutex_t *mutex); + +static inline void +gomp_mutex_init (gomp_mutex_t *mutex) +{ + *mutex = 0; +} + +static inline void +gomp_mutex_destroy (gomp_mutex_t *mutex) +{ +} + +static inline void +gomp_mutex_lock (gomp_mutex_t *mutex) +{ + int oldval = 0; + while (__atomic_compare_exchange_n (mutex, &oldval, 1, false, + MEMMODEL_ACQUIRE, MEMMODEL_RELAXED)) + ; +} + +static inline void +gomp_mutex_unlock (gomp_mutex_t *mutex) +{ + __atomic_exchange_n (mutex, 0, MEMMODEL_RELEASE); +} +#endif /* GOMP_MUTEX_H */ --- libgomp/config/nvptx/sem.h.jj 2015-04-23 16:04:34.368646584 +0200 +++ libgomp/config/nvptx/sem.h 2015-04-21 11:23:31.672337605 +0200 @@ -0,0 +1,56 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This is the default POSIX 1003.1b implementation of a semaphore + synchronization mechanism for libgomp. This type is private to + the library. + + This is a bit heavy weight for what we need, in that we're not + interested in sem_wait as a cancelation point, but it's not too + bad for a default. */ + +#ifndef GOMP_SEM_H +#define GOMP_SEM_H 1 + +typedef int gomp_sem_t; + +static inline void gomp_sem_init (gomp_sem_t *sem, int value) +{ + *sem = 0; +} + +static inline void gomp_sem_wait (gomp_sem_t *sem) +{ +} + +static inline void gomp_sem_post (gomp_sem_t *sem) +{ +} + +static inline void gomp_sem_destroy (gomp_sem_t *sem) +{ +} + +#endif /* GOMP_SEM_H */ --- libgomp/config/nvptx/bar.c.jj 2015-04-23 16:09:36.208706337 +0200 +++ libgomp/config/nvptx/bar.c 2015-04-23 16:09:13.000000000 +0200 @@ -0,0 +1 @@ +/* To be implemented. */ --- libgomp/config/nvptx/affinity.c.jj 2015-04-23 16:10:08.981171168 +0200 +++ libgomp/config/nvptx/affinity.c 2015-04-23 16:09:13.000000000 +0200 @@ -0,0 +1 @@ +/* To be implemented. */ --- libgomp/config/nvptx/proc.c.jj 2015-04-23 16:09:05.611205989 +0200 +++ libgomp/config/nvptx/proc.c 2015-04-23 16:09:13.818071972 +0200 @@ -0,0 +1 @@ +/* To be implemented. */ --- libgomp/config/nvptx/lock.c.jj 2015-04-23 15:19:09.217347370 +0200 +++ libgomp/config/nvptx/lock.c 2015-04-23 16:06:21.354893215 +0200 @@ -0,0 +1,135 @@ +/* Copyright (C) 2005-2015 Free Software Foundation, Inc. + Contributed by Richard Henderson <r...@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This is a Linux specific implementation of the public OpenMP locking + primitives. This implementation uses atomic instructions and the futex + syscall. */ + +#include <string.h> +#include "libgomp.h" + + +/* The internal gomp_mutex_t and the external non-recursive omp_lock_t + have the same form. Re-use it. */ + +void +gomp_init_lock_30 (omp_lock_t *lock) +{ + gomp_mutex_init (lock); +} + +void +gomp_destroy_lock_30 (omp_lock_t *lock) +{ + gomp_mutex_destroy (lock); +} + +void +gomp_set_lock_30 (omp_lock_t *lock) +{ + gomp_mutex_lock (lock); +} + +void +gomp_unset_lock_30 (omp_lock_t *lock) +{ + gomp_mutex_unlock (lock); +} + +int +gomp_test_lock_30 (omp_lock_t *lock) +{ + int oldval = 0; + + return __atomic_compare_exchange_n (lock, &oldval, 1, false, + MEMMODEL_ACQUIRE, MEMMODEL_RELAXED); +} + +void +gomp_init_nest_lock_30 (omp_nest_lock_t *lock) +{ + memset (lock, '\0', sizeof (*lock)); +} + +void +gomp_destroy_nest_lock_30 (omp_nest_lock_t *lock) +{ +} + +void +gomp_set_nest_lock_30 (omp_nest_lock_t *lock) +{ + void *me = gomp_icv (true); + + if (lock->owner != me) + { + gomp_mutex_lock (&lock->lock); + lock->owner = me; + } + + lock->count++; +} + +void +gomp_unset_nest_lock_30 (omp_nest_lock_t *lock) +{ + if (--lock->count == 0) + { + lock->owner = NULL; + gomp_mutex_unlock (&lock->lock); + } +} + +int +gomp_test_nest_lock_30 (omp_nest_lock_t *lock) +{ + void *me = gomp_icv (true); + int oldval; + + if (lock->owner == me) + return ++lock->count; + + oldval = 0; + if (__atomic_compare_exchange_n (&lock->lock, &oldval, 1, false, + MEMMODEL_ACQUIRE, MEMMODEL_RELAXED)) + { + lock->owner = me; + lock->count = 1; + return 1; + } + + return 0; +} + +ialias (omp_init_lock) +ialias (omp_init_nest_lock) +ialias (omp_destroy_lock) +ialias (omp_destroy_nest_lock) +ialias (omp_set_lock) +ialias (omp_set_nest_lock) +ialias (omp_unset_lock) +ialias (omp_unset_nest_lock) +ialias (omp_test_lock) +ialias (omp_test_nest_lock) --- libgomp/config/nvptx/sem.c.jj 2015-04-23 16:04:53.082339890 +0200 +++ libgomp/config/nvptx/sem.c 2015-04-21 08:38:01.000000000 +0200 @@ -0,0 +1 @@ +/* Everything is in the header. */ --- libgomp/config/nvptx/time.c.jj 2015-04-23 16:09:54.143413466 +0200 +++ libgomp/config/nvptx/time.c 2015-04-23 16:09:13.000000000 +0200 @@ -0,0 +1 @@ +/* To be implemented. */ --- libgomp/config/nvptx/ptrlock.h.jj 2015-04-23 16:04:30.088716726 +0200 +++ libgomp/config/nvptx/ptrlock.h 2015-04-21 11:46:31.091467128 +0200 @@ -0,0 +1,66 @@ +/* Copyright (C) 2015 Free Software Foundation, Inc. + Contributed by Jakub Jelinek <ja...@redhat.com>. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This is a generic POSIX implementation of a mutex synchronization + mechanism for libgomp. This type is private to the library. */ + +#ifndef GOMP_PTRLOCK_H +#define GOMP_PTRLOCK_H 1 + +typedef struct { void *ptr; gomp_mutex_t lock; } gomp_ptrlock_t; + +static inline void gomp_ptrlock_init (gomp_ptrlock_t *ptrlock, void *ptr) +{ + ptrlock->ptr = ptr; + gomp_mutex_init (&ptrlock->lock); +} + +static inline void *gomp_ptrlock_get (gomp_ptrlock_t *ptrlock) +{ + if (ptrlock->ptr != NULL) + return ptrlock->ptr; + + gomp_mutex_lock (&ptrlock->lock); + if (ptrlock->ptr != NULL) + { + gomp_mutex_unlock (&ptrlock->lock); + return ptrlock->ptr; + } + + return NULL; +} + +static inline void gomp_ptrlock_set (gomp_ptrlock_t *ptrlock, void *ptr) +{ + ptrlock->ptr = ptr; + gomp_mutex_unlock (&ptrlock->lock); +} + +static inline void gomp_ptrlock_destroy (gomp_ptrlock_t *ptrlock) +{ + gomp_mutex_destroy (&ptrlock->lock); +} + +#endif /* GOMP_PTRLOCK_H */ --- libgomp/config/nvptx/ptrlock.c.jj 2015-04-23 16:04:27.253763188 +0200 +++ libgomp/config/nvptx/ptrlock.c 2015-04-21 08:38:01.000000000 +0200 @@ -0,0 +1 @@ +/* Everything is in the header. */ --- libgomp/config/nvptx/omp-lock.h.jj 2015-04-23 15:19:03.295444592 +0200 +++ libgomp/config/nvptx/omp-lock.h 2015-04-23 16:02:38.020553381 +0200 @@ -0,0 +1,12 @@ +/* This header is used during the build process to find the size and + alignment of the public OpenMP locks, so that we can export data + structures without polluting the namespace. + + When using the Linux futex primitive, non-recursive locks require + one int. Recursive locks require we identify the owning task + and so require in addition one int and a pointer. */ + +typedef int omp_lock_t; +typedef struct { int lock, count; void *owner; } omp_nest_lock_t; +typedef int omp_lock_25_t; +typedef omp_nest_lock_t omp_nest_lock_25_t; --- libgomp/config/nvptx/mutex.c.jj 2015-04-23 14:25:20.393497758 +0200 +++ libgomp/config/nvptx/mutex.c 2015-04-21 08:38:01.000000000 +0200 @@ -0,0 +1 @@ +/* Everything is in the header. */ --- libgomp/configure.jj 2015-04-21 11:08:08.347628799 +0200 +++ libgomp/configure 2015-04-23 16:18:33.517947530 +0200 @@ -619,6 +619,8 @@ link_gomp XLDFLAGS XCFLAGS config_path +USE_PTHREADS_FALSE +USE_PTHREADS_TRUE LIBGOMP_BUILD_VERSIONED_SHLIB_SUN_FALSE LIBGOMP_BUILD_VERSIONED_SHLIB_SUN_TRUE LIBGOMP_BUILD_VERSIONED_SHLIB_GNU_FALSE @@ -11118,7 +11120,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11121 "configure" +#line 11123 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -11224,7 +11226,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11227 "configure" +#line 11229 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -15038,6 +15040,9 @@ case "$host" in *-*-rtems*) # RTEMS supports Pthreads, but the library is not available at GCC build time. ;; + nvptx*-*-*) + # NVPTX does not support Pthreads, has its own code replacement. + ;; *) # Check to see if -pthread or -lpthread is needed. Prefer the former. # In case the pthread.h system header is not found, this test will fail. @@ -16353,6 +16358,27 @@ $as_echo "#define HAVE_SYNC_BUILTINS 1" fi +if test x$use_pthreads = xyes; then + +$as_echo "#define LIBGOMP_USE_PTHREADS 1" >>confdefs.h + +fi + +if test x$broken_alloca = xyes; then + +$as_echo "#define HAVE_BROKEN_ALLOCA 1" >>confdefs.h + +fi + + if test "x$use_pthreads" = xyes; then + USE_PTHREADS_TRUE= + USE_PTHREADS_FALSE='#' +else + USE_PTHREADS_TRUE='#' + USE_PTHREADS_FALSE= +fi + + XCFLAGS="$XCFLAGS$XPCFLAGS" @@ -16702,6 +16728,10 @@ if test -z "${LIBGOMP_BUILD_VERSIONED_SH as_fn_error "conditional \"LIBGOMP_BUILD_VERSIONED_SHLIB_SUN\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${USE_PTHREADS_TRUE}" && test -z "${USE_PTHREADS_FALSE}"; then + as_fn_error "conditional \"USE_PTHREADS\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${USE_FORTRAN_TRUE}" && test -z "${USE_FORTRAN_FALSE}"; then as_fn_error "conditional \"USE_FORTRAN\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 Jakub