diff --git a/CHANGELOG.md b/CHANGELOG.md index 599aa2ec9a9fabaf0a91bb9ecb761978dbb30037..026e6081ebbe0bda18250544a150815ef2333f6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ jni# Aparapi jni Changelog ## 1.3.2 * Fixed local arrays handling 1D and ND, to cope with arrays resizing across kernel executions +* Fixed aparapi now supports efficient execution on discrete GPU and other devices with dedicated memory ## 1.3.1 diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index c3c9e4cd4ad59f1c273ed1a20d690f543d45a4d4..5e51ce3d782b9391de612ef8581af0d37d2df919 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -28,4 +28,5 @@ Below are some of the specific details of various contributions. * Paul Miner issue #61 and #115 (JTP Speed up and fixes to explicit puts) June 13th 2013 & lgalluci for his fix for issue #121 (incorrect toString for 3D ranges) July 6th 2013 * Luis Mendes Issue #51 JVM crash when using multi-dimensional local arrays (refs #51) -* Luis Mendes submitted local arrays handling 1D and ND, to cope with arrays resizing across kernel executions \ No newline at end of file +* Luis Mendes submitted local arrays handling 1D and ND, to cope with arrays resizing across kernel executions +* Luis Mendes submitted #107 aparapi now supports efficient execution on discrete GPU and other devices \ No newline at end of file diff --git a/src/cpp/JNIHelper.h b/src/cpp/JNIHelper.h index d8253c193b9d44908630f376288a700341def1b7..7166fb9f9e73c8e1bb850646db2b1c35eb5977ac 100644 --- a/src/cpp/JNIHelper.h +++ b/src/cpp/JNIHelper.h @@ -232,6 +232,11 @@ class JNIHelper { return getInstanceField<jT>(jenv, instance, fieldName, getSignature((jT)0)); } + template<typename jT> + static jT getInstanceFieldWithException(JNIEnv *jenv, jobject instance, const char *fieldName) { + return getInstanceFieldWithException<jT>(jenv, instance, fieldName, getSignature((jT)0)); + } + template<typename jT> static jT getInstanceField(JNIEnv *jenv, jobject instance, const char *fieldName, const char *signature) { jT value = (jT)0; @@ -254,6 +259,25 @@ class JNIHelper { return(value); } + template<typename jT> + static jT getInstanceFieldWithException(JNIEnv *jenv, jobject instance, const char *fieldName, const char *signature) { + jT value = (jT)0; + try { + jclass theClass = jenv->GetObjectClass(instance); + if (theClass == NULL || jenv->ExceptionCheck()) + throw "bummer! getting class from instance\n"; + jfieldID fieldId = jenv->GetFieldID(theClass,fieldName, signature); + if (fieldId == NULL || jenv->ExceptionCheck()) + throw std::string("bummer getting ") + getType(value) + "field '" + fieldName + "' \n"; + getField(jenv, instance, fieldId, &value); + if (jenv->ExceptionCheck()) + throw std::string("bummer getting ") + getType(value) + "field '" + fieldName + "' \n"; + } catch(std::string& se) { + jenv->ExceptionClear(); + throw se; + } + return(value); + } static jfieldID GetFieldID(JNIEnv* jenv, jclass c, const char* name, const char* type) { jfieldID field = jenv->GetFieldID(c, name, type); diff --git a/src/cpp/invoke/OpenCLJNI.cpp b/src/cpp/invoke/OpenCLJNI.cpp index d72794ac0a37a7ee08421f260b8cf4703816425b..00127b215c0238fb59b9ec0dced3d8fbd1484551 100644 --- a/src/cpp/invoke/OpenCLJNI.cpp +++ b/src/cpp/invoke/OpenCLJNI.cpp @@ -68,6 +68,21 @@ jobject OpenCLDevice::getPlatformInstance(JNIEnv *jenv, jobject deviceInstance){ return(JNIHelper::getInstanceField<jobject>(jenv, deviceInstance, "platform", OpenCLPlatformClassArg )); } + +bool OpenCLDevice::isSharedMemory(JNIEnv *jenv, jobject deviceInstance) { + try { + jboolean value = (JNIHelper::getInstanceFieldWithException<jboolean>(jenv, deviceInstance, "sharedMemory")); + if (value != 0) { + return true; + } + } catch (std::string &se) { + //For backwards compatibility with older Aparapi versions. + fprintf(stderr, "Property sharedMemory not found for class OpenCLDevice, using default: true\n"); + return true; + } + return false; +} + cl_device_id OpenCLDevice::getDeviceId(JNIEnv *jenv, jobject deviceInstance){ return((cl_device_id)JNIHelper::getInstanceField<jlong>(jenv, deviceInstance, "deviceId")); } diff --git a/src/cpp/invoke/OpenCLJNI.h b/src/cpp/invoke/OpenCLJNI.h index 2d24a6e0e53ad13ae522f95a64c89210895e1b42..eb24ad9f130c2b8602139124369d4ce16fde6a03 100644 --- a/src/cpp/invoke/OpenCLJNI.h +++ b/src/cpp/invoke/OpenCLJNI.h @@ -63,6 +63,7 @@ class OpenCLDevice{ public: + static bool isSharedMemory(JNIEnv *jenv, jobject deviceInstance); static jobject getPlatformInstance(JNIEnv *jenv, jobject deviceInstance); static cl_device_id getDeviceId(JNIEnv *jenv, jobject deviceInstance); }; diff --git a/src/cpp/runKernel/Aparapi.cpp b/src/cpp/runKernel/Aparapi.cpp index c346271d2912e88d11a844fb29cd94d91791a82f..02d967620ae5982dc488becd453553f145b97908 100644 --- a/src/cpp/runKernel/Aparapi.cpp +++ b/src/cpp/runKernel/Aparapi.cpp @@ -365,7 +365,12 @@ void updateArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argP cl_int status = CL_SUCCESS; // if either this is the first run or user changed input array // or gc moved something, then we create buffers/args - cl_uint mask = CL_MEM_USE_HOST_PTR; + cl_uint mask = 0; + if (jniContext->isSharedMemory()) { + mask |= CL_MEM_USE_HOST_PTR; + } else { + mask |= CL_MEM_COPY_HOST_PTR; + } if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE; else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY; else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY; @@ -373,7 +378,7 @@ void updateArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argP arg->arrayBuffer->syncMinimalParams(jenv, arg); if (config->isVerbose()) { - strcpy(arg->arrayBuffer->memSpec,"CL_MEM_USE_HOST_PTR"); + strcpy(arg->arrayBuffer->memSpec, (mask & CL_MEM_COPY_HOST_PTR) != 0 ? "CL_MEM_COPY_HOST_PTR" : "CL_MEM_USE_HOST_PTR"); if (mask & CL_MEM_READ_WRITE) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_READ_WRITE"); if (mask & CL_MEM_READ_ONLY) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_READ_ONLY"); if (mask & CL_MEM_WRITE_ONLY) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_WRITE_ONLY"); @@ -411,12 +416,33 @@ void updateBuffer(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& arg AparapiBuffer* buffer = arg->aparapiBuffer; cl_int status = CL_SUCCESS; - cl_uint mask = CL_MEM_USE_HOST_PTR; + cl_uint mask = 0; + if (jniContext->isSharedMemory()) { + mask |= CL_MEM_USE_HOST_PTR; + } else { + mask |= CL_MEM_COPY_HOST_PTR; + } if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE; else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY; else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY; buffer->memMask = mask; + if (config->isVerbose()) { + std::string str = (mask & CL_MEM_COPY_HOST_PTR) != 0 ? "CL_MEM_COPY_HOST_PTR" : "CL_MEM_USE_HOST_PTR"; + if (mask & CL_MEM_READ_WRITE) { + str += "|CL_MEM_READ_WRITE"; + } + if (mask & CL_MEM_READ_ONLY) { + str += "|CL_MEM_READ_ONLY"; + } + if (mask & CL_MEM_WRITE_ONLY) { + str += "|CL_MEM_WRITE_ONLY"; + } + + fprintf(stderr, "%s %d clCreateBuffer(context, %s, size=%08lx bytes, address=%p, &status)\n", arg->name, + argIdx, str.c_str(), (unsigned long)buffer->lengthInBytes, buffer->data); + } + buffer->mem = clCreateBuffer(jniContext->context, buffer->memMask, buffer->lengthInBytes, buffer->data, &status); diff --git a/src/cpp/runKernel/JNIContext.cpp b/src/cpp/runKernel/JNIContext.cpp index ff9f875cfa2a8784c36f467feb173d47defd2dd4..363f388aa5f7afb43d9c0b0375defcd06e8d3689 100644 --- a/src/cpp/runKernel/JNIContext.cpp +++ b/src/cpp/runKernel/JNIContext.cpp @@ -34,6 +34,7 @@ JNIContext::JNIContext(JNIEnv *jenv, jobject _kernelObject, jobject _openCLDevic jobject platformInstance = OpenCLDevice::getPlatformInstance(jenv, openCLDeviceObject); cl_platform_id platformId = OpenCLPlatform::getPlatformId(jenv, platformInstance); deviceId = OpenCLDevice::getDeviceId(jenv, openCLDeviceObject); + sharedMemory = OpenCLDevice::isSharedMemory(jenv, openCLDeviceObject); cl_device_type returnedDeviceType; clGetDeviceInfo(deviceId, CL_DEVICE_TYPE, sizeof(returnedDeviceType), &returnedDeviceType, NULL); //fprintf(stderr, "device[%p] CL_DEVICE_TYPE = %x\n", deviceId, returnedDeviceType); diff --git a/src/cpp/runKernel/JNIContext.h b/src/cpp/runKernel/JNIContext.h index c76e864ef99cab54b0b8b30dee13690bdd39239b..cbf8f53140cb4c0175c2c820927d6a69900e1972 100644 --- a/src/cpp/runKernel/JNIContext.h +++ b/src/cpp/runKernel/JNIContext.h @@ -25,6 +25,7 @@ class JNIContext { private: + bool sharedMemory; jint flags; jboolean valid; public: @@ -75,6 +76,10 @@ public: return((flags&com_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC)==com_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC?JNI_TRUE:JNI_FALSE); } + bool isSharedMemory() { + return sharedMemory; + } + ~JNIContext(){ }