diff --git a/com.amd.aparapi.jni/build.xml b/com.amd.aparapi.jni/build.xml index 32e0f7c8adf52eb98efebc206169dd599b256fa8..04d13122e2652e47f6c3d22f20f9b5ca1bc6d0b7 100644 --- a/com.amd.aparapi.jni/build.xml +++ b/com.amd.aparapi.jni/build.xml @@ -475,6 +475,9 @@ First consider editing the properties in build.properties <arg value="-o" /> <arg value="${basedir}/dist/libaparapi_${x86_or_x86_64}.so" /> <arg value="src/cpp/aparapi.cpp" /> + <arg value="src/cpp/config.cpp" /> + <arg value="src/cpp/profileInfo.cpp" /> + <arg value="src/cpp/arrayBuffer.cpp" /> <arg value="src/cpp/opencljni.cpp" /> <arg value="src/cpp/jniHelper.cpp" /> <arg value="src/cpp/clHelper.cpp" /> @@ -497,6 +500,9 @@ First consider editing the properties in build.properties <arg value="-o" /> <arg value="${basedir}/dist/libaparapi_${x86_or_x86_64}.dylib" /> <arg value="src/cpp/aparapi.cpp" /> + <arg value="src/cpp/config.cpp" /> + <arg value="src/cpp/profileInfo.cpp" /> + <arg value="src/cpp/arrayBuffer.cpp" /> <arg value="src/cpp/opencljni.cpp" /> <arg value="src/cpp/jniHelper.cpp" /> <arg value="src/cpp/clHelper.cpp" /> @@ -520,6 +526,9 @@ First consider editing the properties in build.properties <arg value="/Iinclude" /> <arg value="/I${amd.app.sdk.dir}\include" /> <arg value="src\cpp\aparapi.cpp" /> + <arg value="src/cpp/config.cpp" /> + <arg value="src/cpp/profileInfo.cpp" /> + <arg value="src/cpp/arrayBuffer.cpp" /> <arg value="src\cpp\opencljni.cpp" /> <arg value="src\cpp\jniHelper.cpp" /> <arg value="src/cpp/clHelper.cpp" /> @@ -549,6 +558,9 @@ First consider editing the properties in build.properties <arg value="-o" /> <arg value="${basedir}\dist\aparapi_${x86_or_x86_64}.dll" /> <arg value="src\cpp\aparapi.cpp" /> + <arg value="src/cpp/config.cpp" /> + <arg value="src/cpp/profileInfo.cpp" /> + <arg value="src/cpp/arrayBuffer.cpp" /> <arg value="src\cpp\opencljni.cpp" /> <arg value="src/cpp/jniHelper.cpp" /> <arg value="src/cpp/clHelper.cpp" /> diff --git a/com.amd.aparapi.jni/src/cpp/aparapi.cpp b/com.amd.aparapi.jni/src/cpp/aparapi.cpp index a84afbab49e5c3a6c039381dee96670ba051d95c..97378246ec3fb201aa04c926d87c55eb0df3f650 100644 --- a/com.amd.aparapi.jni/src/cpp/aparapi.cpp +++ b/com.amd.aparapi.jni/src/cpp/aparapi.cpp @@ -36,14 +36,15 @@ and Security?s website at http://www.bis.doc.gov/. */ #include "common.h" -#include "jniHelper.h" +#include "config.h" +#include "profileInfo.h" +#include "arrayBuffer.h" #include "clHelper.h" #define APARAPI_SOURCE #include "aparapi.h" #include "com_amd_aparapi_KernelRunner.h" #include "opencljni.h" - class Range{ public: static jclass rangeClazz; @@ -129,125 +130,41 @@ jfieldID Range::localSize_2_FieldID=0; jfieldID Range::dimsFieldID=0; jfieldID Range::localIsDerivedFieldID=0; -class ProfileInfo{ - public: - jboolean valid; - jint type; //0 write, 1 execute, 2 read - char *name; - cl_ulong queued; - cl_ulong submit; - cl_ulong start; - cl_ulong end; -}; - class JNIContext ; // forward reference -class ArrayBuffer{ - public: - JNIContext *jniContext; - jobject javaArray; // The java array or direct buffer that this arg is mapped to - bool isArray; // true if above is an array - cl_uint javaArrayLength; // the number of elements for arrays (used only when ARRAYLENGTH bit is set for this arg) - cl_mem mem; // the opencl buffer - void *addr; // we use this temporarily whilst we pin the primitive array - cl_uint memMask; // the mask we used for createBuffer - jboolean isCopy; - jboolean isPinned; - char memSpec[128]; // The string form of the mask we used for create buffer. for debugging - ProfileInfo read; - ProfileInfo write; - - ArrayBuffer(JNIContext *jniContext): - jniContext(jniContext){ - } - - void unpinAbort(JNIEnv *jenv){ - jenv->ReleasePrimitiveArrayCritical((jarray)javaArray, addr,JNI_ABORT); - isPinned = JNI_FALSE; - } - void unpinCommit(JNIEnv *jenv){ - jenv->ReleasePrimitiveArrayCritical((jarray)javaArray, addr, 0); - isPinned = JNI_FALSE; - } - void pin(JNIEnv *jenv){ - void *ptr = addr; - addr = jenv->GetPrimitiveArrayCritical((jarray)javaArray,&isCopy); - isPinned = JNI_TRUE; - } -}; - - class KernelArg{ private: static jclass argClazz; static jfieldID nameFieldID; static jfieldID typeFieldID; - static jfieldID isStaticFieldID; static jfieldID sizeInBytesFieldID; - static jfieldID numElementsFieldID; + static jfieldID numElementsFieldID; public: - JNIContext *jniContext; static jfieldID javaArrayFieldID; - jobject argObj; + public: + JNIContext *jniContext; + jobject argObj; // the Java KernelRunner.KernelArg object that we are mirroring. + jobject javaArg; // global reference to the corresponding java KernelArg object we grabbed our own global reference so that the object won't be collected until we dispose! char *name; // used for debugging printfs - jfieldID fieldID; // The field that this arg represents in the kernel (java), used only for primitive updates jint type; // a bit mask determining the type of this arg - jboolean isStatic; // A flag indicating if the value is static - jint sizeInBytes; // bytes in the array or directBuf - jobject javaArg; // global reference to the corresponding java KernelArg object - union{ - cl_char c; - cl_double d; - cl_float f; - cl_int i; - cl_long j; - ArrayBuffer *arrayBuffer; - } value; - - KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj): - jniContext(jniContext), - argObj(argObj){ - javaArg = jenv->NewGlobalRef(argObj); // save a global ref to the java Arg Object - if (argClazz == 0){ - jclass c = jenv->GetObjectClass(argObj); - nameFieldID = jenv->GetFieldID(c, "name", "Ljava/lang/String;"); ASSERT_FIELD(name); - typeFieldID = jenv->GetFieldID(c, "type", "I"); ASSERT_FIELD(type); - isStaticFieldID = jenv->GetFieldID(c, "isStatic", "Z"); ASSERT_FIELD(isStatic); - javaArrayFieldID = jenv->GetFieldID(c, "javaArray", "Ljava/lang/Object;"); ASSERT_FIELD(javaArray); - sizeInBytesFieldID = jenv->GetFieldID(c, "sizeInBytes", "I"); ASSERT_FIELD(sizeInBytes); - numElementsFieldID = jenv->GetFieldID(c, "numElements", "I"); ASSERT_FIELD(numElements); - argClazz = c; - } - type = jenv->GetIntField(argObj, typeFieldID); - isStatic = jenv->GetBooleanField(argObj, isStaticFieldID); - jstring nameString = (jstring)jenv->GetObjectField(argObj, nameFieldID); - const char *nameChars = jenv->GetStringUTFChars(nameString, NULL); -#ifdef _WIN32 - name=_strdup(nameChars); -#else - name=strdup(nameChars); -#endif - jenv->ReleaseStringUTFChars(nameString, nameChars); - if (isArray()){ - value.arrayBuffer= new ArrayBuffer(jniContext); - }else{ - value.arrayBuffer=NULL; - } - } + + ArrayBuffer *arrayBuffer; + + KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj); // Uses JNIContext so cant inline here see below ~KernelArg(){ } void unpinAbort(JNIEnv *jenv){ - value.arrayBuffer->unpinAbort(jenv); + arrayBuffer->unpinAbort(jenv); } void unpinCommit(JNIEnv *jenv){ - value.arrayBuffer->unpinCommit(jenv); + arrayBuffer->unpinCommit(jenv); } void unpin(JNIEnv *jenv){ - //if (value.ref.isPinned == JNI_FALSE){ - // fprintf(stdout, "why are we unpinning buffer %s! isPinned = JNI_TRUE\n", name); - //} + //if (value.ref.isPinned == JNI_FALSE){ + // fprintf(stdout, "why are we unpinning buffer %s! isPinned = JNI_TRUE\n", name); + //} if (isMutableByKernel()){ // we only need to commit if the buffer has been written to // we use mode=0 in that case (rather than JNI_COMMIT) because that frees any copy buffer if it exists @@ -259,7 +176,7 @@ class KernelArg{ } } void pin(JNIEnv *jenv){ - value.arrayBuffer->pin(jenv); + arrayBuffer->pin(jenv); } int isArray(){ @@ -313,15 +230,15 @@ class KernelArg{ int isLocal(){ return (type&com_amd_aparapi_KernelRunner_ARG_LOCAL); } + int isStatic(){ + return (type&com_amd_aparapi_KernelRunner_ARG_STATIC); + } int isConstant(){ return (type&com_amd_aparapi_KernelRunner_ARG_CONSTANT); } int isAparapiBuf(){ return (type&com_amd_aparapi_KernelRunner_ARG_APARAPI_BUF); } - //int isAparapiBufHasArray(){ - // return (type&com_amd_aparapi_KernelRunner_ARG_APARAPI_BUF_HAS_ARRAY); - //} int isBackedByArray(){ return ( (isArray() && (isGlobal() || isConstant()))); } @@ -329,111 +246,42 @@ class KernelArg{ return(((isArray() && isGlobal()) || ((isAparapiBuf()&&isGlobal()))) && (isImplicit()&&isMutableByKernel())); } int needToEnqueueWrite(){ - // if (isExplicitWrite()){ - // fprintf(stderr, "%s isExplicitWrite()\n", name); - // } - // if (isExplicit()){ - // fprintf(stderr, "%s isExplicit()\n", name); - // } - // fprintf(stderr, "%s neetToEnqueueWrite = %d\n", name, returnValue); return ((isImplicit()&&isReadByKernel())||(isExplicit()&&isExplicitWrite())); } void syncType(JNIEnv* jenv){ type = jenv->GetIntField(javaArg, typeFieldID); } void syncSizeInBytes(JNIEnv* jenv){ - sizeInBytes = jenv->GetIntField(javaArg, sizeInBytesFieldID); + arrayBuffer->lengthInBytes = jenv->GetIntField(javaArg, sizeInBytesFieldID); } void syncJavaArrayLength(JNIEnv* jenv){ - value.arrayBuffer->javaArrayLength = jenv->GetIntField(javaArg, numElementsFieldID); + arrayBuffer->length = jenv->GetIntField(javaArg, numElementsFieldID); } void clearExplicitBufferBit(JNIEnv* jenv){ type &= ~com_amd_aparapi_KernelRunner_ARG_EXPLICIT_WRITE; jenv->SetIntField(javaArg, typeFieldID,type ); } + + void syncValue(JNIEnv *jenv); // Uses JNIContext so can't inline here we below. + cl_int setLocalBufferArg(JNIEnv *jenv, int argIdx, int argPos); // Uses JNIContext so can't inline here we below. + cl_int setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos ); // Uses JNIContext so can't inline here we below. }; jclass KernelArg::argClazz=(jclass)0; jfieldID KernelArg::nameFieldID=0; jfieldID KernelArg::typeFieldID=0; -jfieldID KernelArg::isStaticFieldID=0; jfieldID KernelArg::javaArrayFieldID=0; jfieldID KernelArg::sizeInBytesFieldID=0; jfieldID KernelArg::numElementsFieldID=0; -template <class T> class List; // forward - -template <class T> class Handle{ - private: - T value; - int line; - Handle<T> *next; - friend class List<T>; - public: - Handle(T _value, int _line): value(_value), line(_line),next(NULL){ - } -}; - -template <class T> class List{ - private: - char *name; - Handle<T> *head; - int count; - public: - List(char *_name): head(NULL), count(0), name(_name){ - } - void add(T _value, int _line){ - Handle<T> *handle = new Handle<T>(_value, _line); - handle->next = head; - head = handle; - count++; - //fprintf(stdout, "LINE %d added %s %0lx\n", _line, name, _value); - } - void remove(T _value, int _line){ - for (Handle<T> *ptr = head, *last=NULL; ptr != NULL; last=ptr, ptr = ptr->next){ - if (ptr->value == _value){ - if (last == NULL){ // head - head = ptr->next; - }else{ // !head - last->next = ptr->next; - } - delete ptr; - count--; - //fprintf(stdout, "LINE %d removed %s %0lx\n", _line, name, _value); - return; - } - } - fprintf(stderr, "LINE %d failed to find %s to remove %0lx\n", _line, name, _value); - } - void report(FILE *stream){ - if (head != NULL){ - fprintf(stream, "Resource report %d resources of type %s still in play ", count, name); - for (Handle<T> *ptr = head; ptr != NULL; ptr = ptr->next){ - fprintf(stream, " %0lx(%d)", ptr->value, ptr->line); - } - fprintf(stream, "\n"); - } - } -}; - -List<cl_command_queue> commandQueueList("cl_command_queue"); -List<cl_mem> memList("cl_mem"); -List<cl_event> readEventList("cl_event (read)"); -List<cl_event> executeEventList("cl_event (exec)"); -List<cl_event> writeEventList("cl_event (write)"); - class JNIContext{ private: jint flags; jboolean valid; - //cl_platform_id platform; - //cl_platform_id* platforms; - //cl_uint platformc; public: jobject kernelObject; jobject openCLDeviceObject; jclass kernelClass; - //cl_uint deviceIdc; cl_device_id deviceId; cl_int deviceType; cl_context context; @@ -452,13 +300,6 @@ class JNIContext{ jint passes; ProfileInfo *exec; FILE* profileFile; - // these map to camelCase form of CL_DEVICE_XXX_XXX For example CL_DEVICE_MAX_COMPUTE_UNITS == maxComputeUnits - // cl_uint maxComputeUnits; - // cl_uint maxWorkItemDimensions; - // size_t *maxWorkItemSizes; - // size_t maxWorkGroupSize; - // cl_ulong globalMemSize; - // cl_ulong localMemSize; static JNIContext* getJNIContext(jlong jniContextHandle){ return((JNIContext*)jniContextHandle); @@ -469,7 +310,6 @@ class JNIContext{ kernelClass((jclass)jenv->NewGlobalRef(jenv->GetObjectClass(_kernelObject))), openCLDeviceObject(jenv->NewGlobalRef(_openCLDeviceObject)), flags(_flags), - // platform(NULL), profileBaseTime(0), passes(0), exec(NULL), @@ -477,164 +317,297 @@ class JNIContext{ profileFile(NULL), valid(JNI_FALSE){ cl_int status = CL_SUCCESS; - // The device is passed to us. So just extract the device Id, platform Id etc and create a context. - // jobject platformInstance = OpenCLDevice::getPlatformInstance(jenv, openCLDeviceObject); cl_platform_id platformId = OpenCLPlatform::getPlatformId(jenv, platformInstance); deviceId = OpenCLDevice::getDeviceId(jenv, openCLDeviceObject); cl_device_type returnedDeviceType; clGetDeviceInfo(deviceId, CL_DEVICE_TYPE, sizeof(returnedDeviceType), &returnedDeviceType, NULL); - //fprintf(stderr, "device[%d] CL_DEVICE_TYPE = %x\n", deviceId, returnedDeviceType); + //fprintf(stderr, "device[%d] CL_DEVICE_TYPE = %x\n", deviceId, returnedDeviceType); - cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformId, 0 }; - cl_context_properties* cprops = (NULL == platformId) ? NULL : cps; - context = clCreateContextFromType( cprops, returnedDeviceType, NULL, NULL, &status); + cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformId, 0 }; + cl_context_properties* cprops = (NULL == platformId) ? NULL : cps; + context = clCreateContextFromType( cprops, returnedDeviceType, NULL, NULL, &status); ASSERT_CL_NO_RETURN("clCreateContextFromType()"); if (status == CL_SUCCESS){ valid = JNI_TRUE; } } -jboolean isValid(){ - return(valid); -} -jboolean isProfilingCSVEnabled(){ - return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_PROFILING_CSV)==com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_PROFILING_CSV?JNI_TRUE:JNI_FALSE); -} -jboolean isTrackingOpenCLResources(){ - return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING)==com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING?JNI_TRUE:JNI_FALSE); -} -jboolean isProfilingEnabled(){ - return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_PROFILING)==com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_PROFILING?JNI_TRUE:JNI_FALSE); -} -jboolean isUsingGPU(){ - return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_USE_GPU)==com_amd_aparapi_KernelRunner_JNI_FLAG_USE_GPU?JNI_TRUE:JNI_FALSE); -} -jboolean isVerbose(){ - return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_VERBOSE_JNI)==com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_VERBOSE_JNI?JNI_TRUE:JNI_FALSE); -} - -~JNIContext(){ -} + jboolean isValid(){ + return(valid); + } + jboolean isUsingGPU(){ + return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_USE_GPU)==com_amd_aparapi_KernelRunner_JNI_FLAG_USE_GPU?JNI_TRUE:JNI_FALSE); + } + ~JNIContext(){ + } -void dispose(JNIEnv *jenv){ - //fprintf(stdout, "dispose()\n"); - cl_int status = CL_SUCCESS; - jenv->DeleteGlobalRef(kernelObject); - jenv->DeleteGlobalRef(kernelClass); - if (context != 0){ - status = clReleaseContext(context); - //fprintf(stdout, "dispose context %0lx\n", context); - ASSERT_CL_NO_RETURN("clReleaseContext()"); - context = (cl_context)0; - } - if (commandQueue != 0){ - if (isTrackingOpenCLResources()){ - commandQueueList.remove((cl_command_queue)commandQueue, __LINE__); - } - status = clReleaseCommandQueue((cl_command_queue)commandQueue); - //fprintf(stdout, "dispose commandQueue %0lx\n", commandQueue); - ASSERT_CL_NO_RETURN("clReleaseCommandQueue()"); - commandQueue = (cl_command_queue)0; - } - if (program != 0){ - status = clReleaseProgram((cl_program)program); - //fprintf(stdout, "dispose program %0lx\n", program); - ASSERT_CL_NO_RETURN("clReleaseProgram()"); - program = (cl_program)0; - } - if (kernel != 0){ - status = clReleaseKernel((cl_kernel)kernel); - //fprintf(stdout, "dispose kernel %0lx\n", kernel); - ASSERT_CL_NO_RETURN("clReleaseKernel()"); - kernel = (cl_kernel)0; - } - if (argc> 0){ - for (int i=0; i< argc; i++){ - KernelArg *arg = args[i]; - if (!arg->isPrimitive()){ - if (arg->value.arrayBuffer != NULL){ - if (arg->value.arrayBuffer->mem != 0){ - if (isTrackingOpenCLResources()){ - memList.remove((cl_mem)arg->value.arrayBuffer->mem, __LINE__); + void dispose(JNIEnv *jenv){ + //fprintf(stdout, "dispose()\n"); + cl_int status = CL_SUCCESS; + jenv->DeleteGlobalRef(kernelObject); + jenv->DeleteGlobalRef(kernelClass); + if (context != 0){ + status = clReleaseContext(context); + //fprintf(stdout, "dispose context %0lx\n", context); + ASSERT_CL_NO_RETURN("clReleaseContext()"); + context = (cl_context)0; + } + if (commandQueue != 0){ + if (config->isTrackingOpenCLResources()){ + commandQueueList.remove((cl_command_queue)commandQueue, __LINE__, __FILE__); + } + status = clReleaseCommandQueue((cl_command_queue)commandQueue); + //fprintf(stdout, "dispose commandQueue %0lx\n", commandQueue); + ASSERT_CL_NO_RETURN("clReleaseCommandQueue()"); + commandQueue = (cl_command_queue)0; + } + if (program != 0){ + status = clReleaseProgram((cl_program)program); + //fprintf(stdout, "dispose program %0lx\n", program); + ASSERT_CL_NO_RETURN("clReleaseProgram()"); + program = (cl_program)0; + } + if (kernel != 0){ + status = clReleaseKernel((cl_kernel)kernel); + //fprintf(stdout, "dispose kernel %0lx\n", kernel); + ASSERT_CL_NO_RETURN("clReleaseKernel()"); + kernel = (cl_kernel)0; + } + if (argc> 0){ + for (int i=0; i< argc; i++){ + KernelArg *arg = args[i]; + if (!arg->isPrimitive()){ + if (arg->arrayBuffer != NULL){ + if (arg->arrayBuffer->mem != 0){ + if (config->isTrackingOpenCLResources()){ + memList.remove((cl_mem)arg->arrayBuffer->mem, __LINE__, __FILE__); + } + status = clReleaseMemObject((cl_mem)arg->arrayBuffer->mem); + //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->arrayBuffer->mem); + ASSERT_CL_NO_RETURN("clReleaseMemObject()"); + arg->arrayBuffer->mem = (cl_mem)0; + } + if (arg->arrayBuffer->javaArray != NULL) { + jenv->DeleteWeakGlobalRef((jweak) arg->arrayBuffer->javaArray); + } + delete arg->arrayBuffer; + arg->arrayBuffer = NULL; } - status = clReleaseMemObject((cl_mem)arg->value.arrayBuffer->mem); - //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->value.arrayBuffer->mem); - ASSERT_CL_NO_RETURN("clReleaseMemObject()"); - arg->value.arrayBuffer->mem = (cl_mem)0; } - if (arg->value.arrayBuffer->javaArray != NULL) { - jenv->DeleteWeakGlobalRef((jweak) arg->value.arrayBuffer->javaArray); + if (arg->name != NULL){ + free(arg->name); arg->name = NULL; } - delete arg->value.arrayBuffer; - arg->value.arrayBuffer = NULL; + if (arg->javaArg != NULL ) { + jenv->DeleteGlobalRef((jobject) arg->javaArg); + } + delete arg; arg=args[i]=NULL; } + delete[] args; args=NULL; + + // do we need to call clReleaseEvent on any of these that are still retained.... + delete []readEvents; readEvents =NULL; + delete []writeEvents; writeEvents = NULL; + delete []executeEvents; executeEvents = NULL; + + if (config->isProfilingEnabled()) { + if (config->isProfilingCSVEnabled()) { + if (profileFile != NULL && profileFile != stderr) { + fclose(profileFile); + } + } + delete[] readEventArgs; readEventArgs=0; + delete[] writeEventArgs; writeEventArgs=0; + } } - if (arg->name != NULL){ - free(arg->name); arg->name = NULL; - } - if (arg->javaArg != NULL ) { - jenv->DeleteGlobalRef((jobject) arg->javaArg); + if (config->isTrackingOpenCLResources()){ + fprintf(stderr, "after dispose{ \n"); + commandQueueList.report(stderr); + memList.report(stderr); + readEventList.report(stderr); + executeEventList.report(stderr); + writeEventList.report(stderr); + fprintf(stderr, "}\n"); } - delete arg; arg=args[i]=NULL; } - delete[] args; args=NULL; - - // do we need to call clReleaseEvent on any of these that are still retained.... - delete []readEvents; readEvents =NULL; - delete []writeEvents; writeEvents = NULL; - delete []executeEvents; executeEvents = NULL; - if (isProfilingEnabled()) { - if (isProfilingCSVEnabled()) { - if (profileFile != NULL && profileFile != stderr) { - fclose(profileFile); + /* + Release JNI critical pinned arrays before returning to java code + */ + void unpinAll(JNIEnv* jenv) { + for (int i=0; i< argc; i++){ + KernelArg *arg = args[i]; + if (arg->isBackedByArray()) { + arg->unpin(jenv); } } - delete[] readEventArgs; readEventArgs=0; - delete[] writeEventArgs; writeEventArgs=0; - } - } - if (isTrackingOpenCLResources()){ - fprintf(stderr, "after dispose{ \n"); - commandQueueList.report(stderr); - memList.report(stderr); - readEventList.report(stderr); - executeEventList.report(stderr); - writeEventList.report(stderr); - fprintf(stderr, "}\n"); - } -} - -/* - Release JNI critical pinned arrays before returning to java code - */ -void unpinAll(JNIEnv* jenv) { - for (int i=0; i< argc; i++){ - KernelArg *arg = args[i]; - if (arg->isBackedByArray()) { - arg->unpin(jenv); } - } -} }; +KernelArg::KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj): + jniContext(jniContext), + argObj(argObj){ + javaArg = jenv->NewGlobalRef(argObj); // save a global ref to the java Arg Object + if (argClazz == 0){ + jclass c = jenv->GetObjectClass(argObj); + nameFieldID = jenv->GetFieldID(c, "name", "Ljava/lang/String;"); ASSERT_FIELD(name); + typeFieldID = jenv->GetFieldID(c, "type", "I"); ASSERT_FIELD(type); + javaArrayFieldID = jenv->GetFieldID(c, "javaArray", "Ljava/lang/Object;"); ASSERT_FIELD(javaArray); + sizeInBytesFieldID = jenv->GetFieldID(c, "sizeInBytes", "I"); ASSERT_FIELD(sizeInBytes); + numElementsFieldID = jenv->GetFieldID(c, "numElements", "I"); ASSERT_FIELD(numElements); + argClazz = c; + } + type = jenv->GetIntField(argObj, typeFieldID); + jstring nameString = (jstring)jenv->GetObjectField(argObj, nameFieldID); + const char *nameChars = jenv->GetStringUTFChars(nameString, NULL); +#ifdef _WIN32 + name=_strdup(nameChars); +#else + name=strdup(nameChars); +#endif + jenv->ReleaseStringUTFChars(nameString, nameChars); + if (isArray()){ + arrayBuffer= new ArrayBuffer(); + } + } +cl_int KernelArg::setLocalBufferArg(JNIEnv *jenv, int argIdx, int argPos){ + if (config->isVerbose()){ + fprintf(stderr, "ISLOCAL, clSetKernelArg(jniContext->kernel, %d, %d, NULL);\n", argIdx, (int) arrayBuffer->lengthInBytes); + } + return(clSetKernelArg(jniContext->kernel, argPos, (int)arrayBuffer->lengthInBytes, NULL)); +} +cl_int KernelArg::setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos){ + cl_int status = CL_SUCCESS; + if (isFloat()){ + if (isStatic()){ + jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "F"); + jfloat f = jenv->GetStaticFloatField(jniContext->kernelClass, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg static primitive float '%s' index=%d pos=%d value=%f\n", + name, argIdx, argPos, f); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jfloat), &f); + }else{ + jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "F"); + jfloat f = jenv->GetFloatField(jniContext->kernelObject, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg primitive float '%s' index=%d pos=%d value=%f\n", + name, argIdx, argPos, f); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jfloat), &f); + } + }else if (isInt()){ + if (isStatic()){ + jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "I"); + jint i = jenv->GetStaticIntField(jniContext->kernelClass, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg static primitive int '%s' index=%d pos=%d value=%d\n", + name, argIdx, argPos, i); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &i); + }else{ + jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "I"); + jint i = jenv->GetIntField(jniContext->kernelObject, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg primitive int '%s' index=%d pos=%d value=%d\n", + name, argIdx, argPos, i); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &i); + } + }else if (isBoolean()){ + if (isStatic()){ + jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "Z"); + jboolean z = jenv->GetStaticBooleanField(jniContext->kernelClass, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg static primitive boolean '%s' index=%d pos=%d value=%d\n", + name, argIdx, argPos, z); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jboolean), &z); + }else{ + jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "Z"); + jboolean z = jenv->GetBooleanField(jniContext->kernelObject, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg primitive boolean '%s' index=%d pos=%d value=%d\n", + name, argIdx, argPos, z); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jboolean), &z); + } + }else if (isByte()){ + if (isStatic()){ + jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "B"); + jbyte b = jenv->GetStaticByteField(jniContext->kernelClass, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg static primitive byte '%s' index=%d pos=%d value=%d\n", + name, argIdx, argPos, b); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jbyte), &b); + }else{ + jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "B"); + jbyte b = jenv->GetByteField(jniContext->kernelObject, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg primitive byte '%s' index=%d pos=%d value=%d\n", + name, argIdx, argPos, b); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jbyte), &b); + } + }else if (isLong()){ + if (isStatic()){ + jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "J"); + jlong j = jenv->GetStaticLongField(jniContext->kernelClass, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg static primitive long '%s' index=%d pos=%d value=%ld\n", + name, argIdx, argPos, j); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jlong), &j); + }else{ + jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "J"); + jlong j = jenv->GetLongField(jniContext->kernelObject, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg primitive long '%s' index=%d pos=%d value=%ld\n", + name, argIdx, argPos, j); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jlong), &j); + } + }else if (isDouble()){ + if (isStatic()){ + jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "D"); + jdouble d = jenv->GetStaticDoubleField(jniContext->kernelClass, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg static primitive long '%s' index=%d pos=%d value=%lf\n", + name, argIdx, argPos, d); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jdouble), &d); + }else{ + jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "D"); + jdouble d = jenv->GetDoubleField(jniContext->kernelObject, fieldID); + if (config->isVerbose()){ + fprintf(stderr, "clSetKernelArg primitive long '%s' index=%d pos=%d value=%lf\n", + name, argIdx, argPos, d); + } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jdouble), &d); + } + } + return status; +} JNI_JAVA(jint, KernelRunner, disposeJNI) (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) { - cl_int status = CL_SUCCESS; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext != NULL){ - jniContext->dispose(jenv); - delete jniContext; - jniContext = NULL; + if (config== NULL){ + config = new Config(jenv); + } + cl_int status = CL_SUCCESS; + JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); + if (jniContext != NULL){ + jniContext->dispose(jenv); + delete jniContext; + jniContext = NULL; + } + return(status); } - return(status); -} void idump(char *str, void *ptr, int size){ int * iptr = (int *)ptr; @@ -666,15 +639,15 @@ jint writeProfileInfo(JNIContext* jniContext){ // Initialize the base time for this sample if (currSampleBaseTime == -1) { - currSampleBaseTime = arg->value.arrayBuffer->write.queued; + currSampleBaseTime = arg->arrayBuffer->write.queued; } fprintf(jniContext->profileFile, "%d write %s,", pos++, arg->name); fprintf(jniContext->profileFile, "%lu,%lu,%lu,%lu,", - (arg->value.arrayBuffer->write.queued - currSampleBaseTime)/1000, - (arg->value.arrayBuffer->write.submit - currSampleBaseTime)/1000, - (arg->value.arrayBuffer->write.start - currSampleBaseTime)/1000, - (arg->value.arrayBuffer->write.end - currSampleBaseTime)/1000); + (arg->arrayBuffer->write.queued - currSampleBaseTime)/1000, + (arg->arrayBuffer->write.submit - currSampleBaseTime)/1000, + (arg->arrayBuffer->write.start - currSampleBaseTime)/1000, + (arg->arrayBuffer->write.end - currSampleBaseTime)/1000); } } @@ -705,16 +678,16 @@ jint writeProfileInfo(JNIContext* jniContext){ // Initialize the base time for this sample if (currSampleBaseTime == -1) { - currSampleBaseTime = arg->value.arrayBuffer->read.queued; + currSampleBaseTime = arg->arrayBuffer->read.queued; } fprintf(jniContext->profileFile, "%d read %s,", pos++, arg->name); fprintf(jniContext->profileFile, "%lu,%lu,%lu,%lu,", - (arg->value.arrayBuffer->read.queued - currSampleBaseTime)/1000, - (arg->value.arrayBuffer->read.submit - currSampleBaseTime)/1000, - (arg->value.arrayBuffer->read.start - currSampleBaseTime)/1000, - (arg->value.arrayBuffer->read.end - currSampleBaseTime)/1000); + (arg->arrayBuffer->read.queued - currSampleBaseTime)/1000, + (arg->arrayBuffer->read.submit - currSampleBaseTime)/1000, + (arg->arrayBuffer->read.start - currSampleBaseTime)/1000, + (arg->arrayBuffer->read.end - currSampleBaseTime)/1000); } } } @@ -755,60 +728,59 @@ jint updateNonPrimitiveReferences(JNIEnv *jenv, jobject jobj, JNIContext* jniCon KernelArg *arg=jniContext->args[i]; arg->syncType(jenv); // make sure that the JNI arg reflects the latest type info from the instance. For example if the buffer is tagged as explicit and needs to be pushed - if (jniContext->isVerbose()){ + if (config->isVerbose()){ fprintf(stderr, "got type for %s: %08x\n", arg->name, arg->type); } if (!arg->isPrimitive()) { // Following used for all primitive arrays, object arrays and nio Buffers jarray newRef = (jarray)jenv->GetObjectField(arg->javaArg, KernelArg::javaArrayFieldID); - if (jniContext->isVerbose()){ - fprintf(stderr, "testing for Resync javaArray %s: old=%p, new=%p\n", arg->name, arg->value.arrayBuffer->javaArray, newRef); + if (config->isVerbose()){ + fprintf(stderr, "testing for Resync javaArray %s: old=%p, new=%p\n", arg->name, arg->arrayBuffer->javaArray, newRef); } - if (!jenv->IsSameObject(newRef, arg->value.arrayBuffer->javaArray)) { - if (jniContext->isVerbose()){ - fprintf(stderr, "Resync javaArray for %s: %p %p\n", arg->name, newRef, arg->value.arrayBuffer->javaArray); + if (!jenv->IsSameObject(newRef, arg->arrayBuffer->javaArray)) { + if (config->isVerbose()){ + fprintf(stderr, "Resync javaArray for %s: %p %p\n", arg->name, newRef, arg->arrayBuffer->javaArray); } // Free previous ref if any - if (arg->value.arrayBuffer->javaArray != NULL) { - jenv->DeleteWeakGlobalRef((jweak) arg->value.arrayBuffer->javaArray); - if (jniContext->isVerbose()){ - fprintf(stderr, "DeleteWeakGlobalRef for %s: %p\n", arg->name, arg->value.arrayBuffer->javaArray); + if (arg->arrayBuffer->javaArray != NULL) { + jenv->DeleteWeakGlobalRef((jweak) arg->arrayBuffer->javaArray); + if (config->isVerbose()){ + fprintf(stderr, "DeleteWeakGlobalRef for %s: %p\n", arg->name, arg->arrayBuffer->javaArray); } } // need to free opencl buffers, run will reallocate later - if (arg->value.arrayBuffer->mem != 0) { + if (arg->arrayBuffer->mem != 0) { //fprintf(stderr, "-->releaseMemObject[%d]\n", i); - if (jniContext->isTrackingOpenCLResources()){ - memList.remove(arg->value.arrayBuffer->mem,__LINE__); + if (config->isTrackingOpenCLResources()){ + memList.remove(arg->arrayBuffer->mem,__LINE__, __FILE__); } - status = clReleaseMemObject((cl_mem)arg->value.arrayBuffer->mem); + status = clReleaseMemObject((cl_mem)arg->arrayBuffer->mem); //fprintf(stderr, "<--releaseMemObject[%d]\n", i); ASSERT_CL("clReleaseMemObject()"); - arg->value.arrayBuffer->mem = (cl_mem)0; + arg->arrayBuffer->mem = (cl_mem)0; } - arg->value.arrayBuffer->addr = NULL; + arg->arrayBuffer->addr = NULL; // Capture new array ref from the kernel arg object if (newRef != NULL) { - arg->value.arrayBuffer->javaArray = (jarray)jenv->NewWeakGlobalRef((jarray)newRef); - if (jniContext->isVerbose()){ + arg->arrayBuffer->javaArray = (jarray)jenv->NewWeakGlobalRef((jarray)newRef); + if (config->isVerbose()){ fprintf(stderr, "NewWeakGlobalRef for %s, set to %p\n", arg->name, - arg->value.arrayBuffer->javaArray); + arg->arrayBuffer->javaArray); } } else { - arg->value.arrayBuffer->javaArray = NULL; + arg->arrayBuffer->javaArray = NULL; } - arg->value.arrayBuffer->isArray = true; - // Save the sizeInBytes which was set on the java side + // Save the lengthInBytes which was set on the java side arg->syncSizeInBytes(jenv); - if (jniContext->isVerbose()){ - fprintf(stderr, "updateNonPrimitiveReferences, args[%d].sizeInBytes=%d\n", i, arg->sizeInBytes); + if (config->isVerbose()){ + fprintf(stderr, "updateNonPrimitiveReferences, args[%d].lengthInBytes=%d\n", i, arg->arrayBuffer->lengthInBytes); } } // object has changed } @@ -821,14 +793,17 @@ jint updateNonPrimitiveReferences(JNIEnv *jenv, jobject jobj, JNIContext* jniCon JNI_JAVA(jint, KernelRunner, runKernelJNI) (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jobject _range, jboolean needSync, jint passes) { + if (config== NULL){ + config = new Config(jenv); + } - Range range(jenv, _range); + Range range(jenv, _range); - cl_int status = CL_SUCCESS; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); + cl_int status = CL_SUCCESS; + JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext->firstRun && jniContext->isProfilingEnabled()){ + if (jniContext->firstRun && config->isProfilingEnabled()){ cl_event firstEvent; #ifdef CL_VERSION_1_2 status = clEnqueueMarkerWithWaitList(jniContext->commandQueue, 0, NULL, &firstEvent); @@ -847,7 +822,7 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI) } status = clGetEventProfilingInfo(firstEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(jniContext->profileBaseTime), &(jniContext->profileBaseTime), NULL); if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clGetEventProfilingInfo#1"); + PRINT_CL_ERR(status, "clGetEventProfilingInfo#1"); return 0L; } clReleaseEvent(firstEvent); @@ -855,282 +830,237 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI) PRINT_CL_ERR(status, "clReleaseEvent() read event"); return 0L; } - if (jniContext->isVerbose()){ + if (config->isVerbose()){ fprintf(stderr, "profileBaseTime %lu \n", jniContext->profileBaseTime); - } + } } - // Need to capture array refs - if (jniContext->firstRun || needSync) { - updateNonPrimitiveReferences(jenv, jobj, jniContext ); - if (jniContext->isVerbose()){ - fprintf(stderr, "back from updateNonPrimitiveReferences\n"); + // Need to capture array refs + if (jniContext->firstRun || needSync) { + updateNonPrimitiveReferences(jenv, jobj, jniContext ); + if (config->isVerbose()){ + fprintf(stderr, "back from updateNonPrimitiveReferences\n"); + } } - } - int writeEventCount = 0; + int writeEventCount = 0; - // kernelArgPos is used to keep track of the kernel arg position, it can - // differ from "i" due to insertion of javaArrayLength args which are not - // fields read from the kernel object. - int kernelArgPos = 0; + // argPos is used to keep track of the kernel arg position, it can + // differ from "argIdx" due to insertion of javaArrayLength args which are not + // fields read from the kernel object. - for (int i=0; i< jniContext->argc; i++){ - KernelArg *arg = jniContext->args[i]; - arg->syncType(jenv); // make sure that the JNI arg reflects the latest type info from the instance. For example if the buffer is tagged as explicit and needs to be pushed + int argPos=0; + for (int argIdx=0; argIdx< jniContext->argc; argIdx++, argPos++){ + KernelArg *arg = jniContext->args[argIdx]; + arg->syncType(jenv); // make sure that the JNI arg reflects the latest type info from the instance. For example if the buffer is tagged as explicit and needs to be pushed - if (jniContext->isVerbose()){ - fprintf(stderr, "got type for arg %d, %s, type=%08x\n", i, arg->name, arg->type); - } - if (!arg->isPrimitive() && !arg->isLocal()) { - if (jniContext->isProfilingEnabled()){ - arg->value.arrayBuffer->read.valid = false; - arg->value.arrayBuffer->write.valid = false; + if (config->isVerbose()){ + fprintf(stderr, "got type for arg %d, %s, type=%08x\n", argIdx, arg->name, arg->type); } - // pin the arrays so that GC does not move them during the call + if (!arg->isPrimitive() && !arg->isLocal()) { + if (config->isProfilingEnabled()){ + arg->arrayBuffer->read.valid = false; + arg->arrayBuffer->write.valid = false; + } + // pin the arrays so that GC does not move them during the call - // get the C memory address for the region being transferred - // this uses different JNI calls for arrays vs. directBufs - void * prevAddr = arg->value.arrayBuffer->addr; - if (arg->value.arrayBuffer->isArray) { + // get the C memory address for the region being transferred + // this uses different JNI calls for arrays vs. directBufs + void * prevAddr = arg->arrayBuffer->addr; arg->pin(jenv); - } - if (jniContext->isVerbose()){ - fprintf(stderr, "runKernel: arrayOrBuf ref %p, oldAddr=%p, newAddr=%p, ref.mem=%p, isArray=%d\n", - arg->value.arrayBuffer->javaArray, - prevAddr, - arg->value.arrayBuffer->addr, - arg->value.arrayBuffer->mem, - arg->value.arrayBuffer->isArray ); - fprintf(stderr, "at memory addr %p, contents: ", arg->value.arrayBuffer->addr); - unsigned char *pb = (unsigned char *) arg->value.arrayBuffer->addr; - for (int k=0; k<8; k++) { - fprintf(stderr, "%02x ", pb[k]); + if (config->isVerbose()){ + fprintf(stderr, "runKernel: arrayOrBuf ref %p, oldAddr=%p, newAddr=%p, ref.mem=%p\n", + arg->arrayBuffer->javaArray, + prevAddr, + arg->arrayBuffer->addr, + arg->arrayBuffer->mem); + fprintf(stderr, "at memory addr %p, contents: ", arg->arrayBuffer->addr); + unsigned char *pb = (unsigned char *) arg->arrayBuffer->addr; + for (int k=0; k<8; k++) { + fprintf(stderr, "%02x ", pb[k]); + } + fprintf(stderr, "\n" ); } - fprintf(stderr, "\n" ); - } - // record whether object moved - // if we see that isCopy was returned by getPrimitiveArrayCritical, treat that as a move - bool objectMoved = (arg->value.arrayBuffer->addr != prevAddr) || arg->value.arrayBuffer->isCopy; + // record whether object moved + // if we see that isCopy was returned by getPrimitiveArrayCritical, treat that as a move + bool objectMoved = (arg->arrayBuffer->addr != prevAddr) || arg->arrayBuffer->isCopy; - if (jniContext->isVerbose()){ - if (arg->isExplicit() && arg->isExplicitWrite()){ - fprintf(stderr, "explicit write of %s\n", arg->name); + if (config->isVerbose()){ + if (arg->isExplicit() && arg->isExplicitWrite()){ + fprintf(stderr, "explicit write of %s\n", arg->name); + } } - } - if (jniContext->firstRun || (arg->value.arrayBuffer->mem == 0) || objectMoved ){ + if (jniContext->firstRun || (arg->arrayBuffer->mem == 0) || objectMoved ){ - if (arg->value.arrayBuffer->mem != 0 && objectMoved){ - // we need to release the old buffer - if (jniContext->isTrackingOpenCLResources()){ - memList.remove((cl_mem)arg->value.arrayBuffer->mem, __LINE__); + if (arg->arrayBuffer->mem != 0 && objectMoved){ + // we need to release the old buffer + if (config->isTrackingOpenCLResources()){ + memList.remove((cl_mem)arg->arrayBuffer->mem, __LINE__, __FILE__); + } + status = clReleaseMemObject((cl_mem)arg->arrayBuffer->mem); + //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->arrayBuffer->mem); + ASSERT_CL_NO_RETURN("clReleaseMemObject()"); + arg->arrayBuffer->mem = (cl_mem)0; } - status = clReleaseMemObject((cl_mem)arg->value.arrayBuffer->mem); - //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->value.arrayBuffer->mem); - ASSERT_CL_NO_RETURN("clReleaseMemObject()"); - arg->value.arrayBuffer->mem = (cl_mem)0; - } - // if either this is the first run or user changed input array - // or gc moved something, then we create buffers/args - cl_uint mask = CL_MEM_USE_HOST_PTR; - if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE; - else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY; - else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY; - arg->value.arrayBuffer->memMask = mask; - if (jniContext->isVerbose()){ - strcpy(arg->value.arrayBuffer->memSpec,"CL_MEM_USE_HOST_PTR"); - if (mask & CL_MEM_READ_WRITE) strcat(arg->value.arrayBuffer->memSpec,"|CL_MEM_READ_WRITE"); - if (mask & CL_MEM_READ_ONLY) strcat(arg->value.arrayBuffer->memSpec,"|CL_MEM_READ_ONLY"); - if (mask & CL_MEM_WRITE_ONLY) strcat(arg->value.arrayBuffer->memSpec,"|CL_MEM_WRITE_ONLY"); - - fprintf(stderr, "%s %d clCreateBuffer(context, %s, size=%08x bytes, address=%08x, &status)\n", arg->name, - i, arg->value.arrayBuffer->memSpec, arg->sizeInBytes, arg->value.arrayBuffer->addr); - } - arg->value.arrayBuffer->mem = clCreateBuffer(jniContext->context, arg->value.arrayBuffer->memMask, - arg->sizeInBytes, arg->value.arrayBuffer->addr, &status); + // if either this is the first run or user changed input array + // or gc moved something, then we create buffers/args + cl_uint mask = CL_MEM_USE_HOST_PTR; + if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE; + else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY; + else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY; + arg->arrayBuffer->memMask = mask; + if (config->isVerbose()){ + strcpy(arg->arrayBuffer->memSpec,"CL_MEM_USE_HOST_PTR"); + if (mask & CL_MEM_READ_WRITE) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_READ_WRITE"); + if (mask & CL_MEM_READ_ONLY) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_READ_ONLY"); + if (mask & CL_MEM_WRITE_ONLY) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_WRITE_ONLY"); + + fprintf(stderr, "%s %d clCreateBuffer(context, %s, size=%08x bytes, address=%08x, &status)\n", arg->name, + argIdx, arg->arrayBuffer->memSpec, arg->arrayBuffer->lengthInBytes, arg->arrayBuffer->addr); + } + arg->arrayBuffer->mem = clCreateBuffer(jniContext->context, arg->arrayBuffer->memMask, + arg->arrayBuffer->lengthInBytes, arg->arrayBuffer->addr, &status); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clCreateBuffer"); - jniContext->unpinAll(jenv); - return status; - } - if (jniContext->isTrackingOpenCLResources()){ - memList.add(arg->value.arrayBuffer->mem, __LINE__); - } + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clCreateBuffer"); + jniContext->unpinAll(jenv); + return status; + } + if (config->isTrackingOpenCLResources()){ + memList.add(arg->arrayBuffer->mem, __LINE__, __FILE__); + } - status = clSetKernelArg(jniContext->kernel, kernelArgPos++, sizeof(cl_mem), (void *)&(arg->value.arrayBuffer->mem)); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clSetKernelArg (array)"); - jniContext->unpinAll(jenv); - return status; - } + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(cl_mem), (void *)&(arg->arrayBuffer->mem)); + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clSetKernelArg (array)"); + jniContext->unpinAll(jenv); + return status; + } - // Add the array length if needed - if (arg->usesArrayLength()){ - arg->syncJavaArrayLength(jenv); + // Add the array length if needed + if (arg->usesArrayLength()){ + arg->syncJavaArrayLength(jenv); - status = clSetKernelArg(jniContext->kernel, kernelArgPos++, sizeof(jint), &(arg->value.arrayBuffer->javaArrayLength)); + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &(arg->arrayBuffer->length)); - if (jniContext->isVerbose()){ - fprintf(stderr, "runKernel arg %d %s, javaArrayLength = %d\n", i, arg->name, arg->value.arrayBuffer->javaArrayLength); + if (config->isVerbose()){ + fprintf(stderr, "runKernel arg %d %s, length = %d\n", argIdx, arg->name, arg->arrayBuffer->length); + } + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clSetKernelArg (array length)"); + jniContext->unpinAll(jenv); + return status; + } + argPos++; } - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clSetKernelArg (array length)"); - jniContext->unpinAll(jenv); - return status; + } else { + // Keep the arg position in sync if no updates were required + if (arg->usesArrayLength()){ + argPos++; } } - } else { - // Keep the arg position in sync if no updates were required - kernelArgPos++; - if (arg->usesArrayLength()){ - kernelArgPos++; - } - } - // we only enqueue a write if we know the kernel actually reads the buffer or if there is an explicit write pending - // the default behavior for Constant buffers is also that there is no write enqueued unless explicit + // we only enqueue a write if we know the kernel actually reads the buffer or if there is an explicit write pending + // the default behavior for Constant buffers is also that there is no write enqueued unless explicit + + if (arg->needToEnqueueWrite() && !arg->isConstant()){ + if (config->isVerbose()){ + fprintf(stderr, "%swriting %s%sbuffer argIndex=%d argPos=%d %s\n", + (arg->isExplicit() ? "explicitly " : ""), + (arg->isConstant() ? "constant " : ""), + (arg->isLocal() ? "local " : ""), + argIdx, + argPos, + arg->name); + } + if (config->isProfilingEnabled()) { + jniContext->writeEventArgs[writeEventCount]=argIdx; + } - if (arg->needToEnqueueWrite() && !arg->isConstant()){ - if (jniContext->isVerbose()){ + status = clEnqueueWriteBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0, + arg->arrayBuffer->lengthInBytes, arg->arrayBuffer->addr, 0, NULL, &(jniContext->writeEvents[writeEventCount])); + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clEnqueueWriteBuffer"); + jniContext->unpinAll(jenv); + return status; + } + if (config->isTrackingOpenCLResources()){ + writeEventList.add(jniContext->writeEvents[writeEventCount],__LINE__, __FILE__); + } + writeEventCount++; if (arg->isExplicit() && arg->isExplicitWrite()){ - fprintf(stderr, "writing explicit buffer %d %s\n", i, arg->name); + if (config->isVerbose()){ + fprintf(stderr, "clearing explicit buffer bit %d %s\n", argIdx, arg->name); + } + arg->clearExplicitBufferBit(jenv); } } - if (jniContext->isVerbose()){ - fprintf(stderr, "%s writing buffer %d %s\n", (arg->isExplicit() ? "explicitly" : ""), - i, arg->name); - } - if (jniContext->isProfilingEnabled()) { - jniContext->writeEventArgs[writeEventCount]=i; - } + } else if (arg->isLocal()){ + if (jniContext->firstRun){ // what if local buffer size has changed? We need a check for resize here. + status = arg->setLocalBufferArg(jenv, argIdx, argPos); + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clSetKernelArg() (local)"); + jniContext->unpinAll(jenv); + return status; + } - if (arg->isConstant()){ - fprintf(stderr, "writing constant buffer %s\n", arg->name); - } + // Add the array length if needed + if (arg->usesArrayLength()){ + arg->syncJavaArrayLength(jenv); - status = clEnqueueWriteBuffer(jniContext->commandQueue, arg->value.arrayBuffer->mem, CL_FALSE, 0, - arg->sizeInBytes, arg->value.arrayBuffer->addr, 0, NULL, &(jniContext->writeEvents[writeEventCount])); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clEnqueueWriteBuffer"); - jniContext->unpinAll(jenv); - return status; - } - if (jniContext->isTrackingOpenCLResources()){ - writeEventList.add(jniContext->writeEvents[writeEventCount],__LINE__); - } - writeEventCount++; - if (arg->isExplicit() && arg->isExplicitWrite()){ - if (jniContext->isVerbose()){ - fprintf(stderr, "clearing explicit buffer bit %d %s\n", i, arg->name); - } - arg->clearExplicitBufferBit(jenv); - } - } - } else if (arg->isLocal()){ - if (jniContext->firstRun){ - int bytes = arg->sizeInBytes; + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &(arg->arrayBuffer->length)); - if (jniContext->isVerbose()){ - fprintf(stderr, "ISLOCAL, clSetKernelArg(jniContext->kernel, %d, %d, NULL);\n", i, bytes); + if (config->isVerbose()){ + fprintf(stderr, "runKernel arg %d %s, javaArrayLength = %d\n", argIdx, arg->name, arg->arrayBuffer->length); + } + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clSetKernelArg (array length)"); + jniContext->unpinAll(jenv); + return status; + } + argPos++; + } + } else { + // Keep the arg position in sync if no updates were required + if (arg->usesArrayLength()){ + argPos++; + } } - status = clSetKernelArg(jniContext->kernel, kernelArgPos++, bytes, NULL); + }else{ // primitive arguments + status = arg->setPrimitiveArg(jenv, argIdx, argPos); if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clSetKernelArg() (local)"); + PRINT_CL_ERR(status, "clSetKernelArg()"); jniContext->unpinAll(jenv); return status; } - } else { - // Keep the arg position in sync if no updates were required - kernelArgPos++; - if (arg->usesArrayLength()){ - kernelArgPos++; - } - } - }else{ // primitive arguments - - // we need to reflectively sync the value out of the kernel object - if (arg->isFloat()){ - if (arg->isStatic){ - arg->value.f = jenv->GetStaticFloatField(jniContext->kernelClass, arg->fieldID); - }else{ - arg->value.f = jenv->GetFloatField(jniContext->kernelObject, arg->fieldID); - } - //fprintf(stderr, "float arg %d\n", arg->value.f); - }else if (arg->isInt()){ - if (arg->isStatic){ - arg->value.i = jenv->GetStaticIntField(jniContext->kernelClass, arg->fieldID); - }else{ - arg->value.i = jenv->GetIntField(jniContext->kernelObject, arg->fieldID); - } - //fprintf(stderr, "int arg %d\n", arg->value.i); - }else if (arg->isBoolean()){ - if (arg->isStatic){ - arg->value.c = jenv->GetStaticBooleanField(jniContext->kernelClass, arg->fieldID); - }else{ - arg->value.c = jenv->GetBooleanField(jniContext->kernelObject, arg->fieldID); - } - //fprintf(stderr, "boolean arg %d\n", arg->value.c); - }else if (arg->isByte()){ - if (arg->isStatic){ - arg->value.c = jenv->GetStaticByteField(jniContext->kernelClass, arg->fieldID); - }else{ - arg->value.c = jenv->GetByteField(jniContext->kernelObject, arg->fieldID); - } - //fprintf(stderr, "byte arg %d\n", arg->value.c); - }else if (arg->isLong()){ - if (arg->isStatic){ - arg->value.j = jenv->GetStaticLongField(jniContext->kernelClass, arg->fieldID); - }else{ - arg->value.j = jenv->GetLongField(jniContext->kernelObject, arg->fieldID); - } - //fprintf(stderr, "long arg %d\n", arg->value.c); - }else if (arg->isDouble()){ - if (arg->isStatic){ - arg->value.d = jenv->GetStaticDoubleField(jniContext->kernelClass, arg->fieldID); - }else{ - arg->value.d = jenv->GetDoubleField(jniContext->kernelObject, arg->fieldID); - } - //fprintf(stderr, "double arg %d\n", arg->value.c); - } - - if (jniContext->isVerbose()){ - fprintf(stderr, "clSetKernelArg %s: %d %d %d 0x%08x\n", arg->name, i, kernelArgPos, - arg->sizeInBytes, arg->value); - } - status = clSetKernelArg(jniContext->kernel, kernelArgPos++, arg->sizeInBytes, &(arg->value)); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clSetKernelArg() (value)"); - jniContext->unpinAll(jenv); - return status; } - } - } // for each arg + } // for each arg - // We will need to revisit the execution of multiple devices. - // POssibly cloning the range per device and mutating each to handle a unique subrange (of global) and - // maybe even pushing the offset into the range class. + // We will need to revisit the execution of multiple devices. + // POssibly cloning the range per device and mutating each to handle a unique subrange (of global) and + // maybe even pushing the offset into the range class. - // size_t globalSize_0AsSizeT = (range.globalDims[0] /jniContext->deviceIdc); - // size_t localSize_0AsSizeT = range.localDims[0]; + // size_t globalSize_0AsSizeT = (range.globalDims[0] /jniContext->deviceIdc); + // size_t localSize_0AsSizeT = range.localDims[0]; - // To support multiple passes we add a 'secret' final arg called 'passid' and just schedule multiple enqueuendrange kernels. Each of which having a separate value of passid - // - // - if (jniContext->exec){ // delete the last set - delete jniContext->exec; - } - jniContext->passes = passes; - jniContext->exec = new ProfileInfo[passes]; + // To support multiple passes we add a 'secret' final arg called 'passid' and just schedule multiple enqueuendrange kernels. Each of which having a separate value of passid + // + // + if (jniContext->exec){ // delete the last set + delete jniContext->exec; + jniContext->exec = NULL; + } + jniContext->passes = passes; + jniContext->exec = new ProfileInfo[passes]; - for (int passid=0; passid<passes; passid++){ + for (int passid=0; passid<passes; passid++){ //size_t offset = 1; // (size_t)((range.globalDims[0]/jniContext->deviceIdc)*dev); - status = clSetKernelArg(jniContext->kernel, kernelArgPos, sizeof(passid), &(passid)); + status = clSetKernelArg(jniContext->kernel, argPos, sizeof(passid), &(passid)); if (status != CL_SUCCESS) { PRINT_CL_ERR(status, "clSetKernelArg() (passid)"); jniContext->unpinAll(jenv); @@ -1159,15 +1089,15 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI) // we block and do supply executeEvents //fprintf(stderr, "setting passid to %d of %d not first not last\n", passid, passes); // - + status = clWaitForEvents(1, &jniContext->executeEvents[0]); if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clWaitForEvents() execute event"); - jniContext->unpinAll(jenv); - return status; + PRINT_CL_ERR(status, "clWaitForEvents() execute event"); + jniContext->unpinAll(jenv); + return status; } - if (jniContext->isTrackingOpenCLResources()){ - executeEventList.remove(jniContext->executeEvents[0],__LINE__); + if (config->isTrackingOpenCLResources()){ + executeEventList.remove(jniContext->executeEvents[0],__LINE__, __FILE__); } status = clReleaseEvent(jniContext->executeEvents[0]); if (status != CL_SUCCESS) { @@ -1176,9 +1106,9 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI) return status; } - + // We must capture any profile info for passid-1 so we must wait for the last execution to complete - if (passid == 1 && jniContext->isProfilingEnabled()) { + if (passid == 1 && config->isProfilingEnabled()) { // Now we can profile info for passid-1 status = profile(&jniContext->exec[passid-1], &jniContext->executeEvents[0], 1, NULL, jniContext->profileBaseTime); if (status != CL_SUCCESS) { @@ -1202,133 +1132,116 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI) if (status != CL_SUCCESS) { PRINT_CL_ERR(status, "clEnqueueNDRangeKernel()"); for(int i = 0; i<range.dims;i++) { - fprintf(stderr, "after clEnqueueNDRangeKernel, globalSize[%d] = %d, localSize[%d] = %d\n", - i, (int)range.globalDims[i], i, (int)range.localDims[i]); + fprintf(stderr, "after clEnqueueNDRangeKernel, globalSize[%d] = %d, localSize[%d] = %d\n", + i, (int)range.globalDims[i], i, (int)range.localDims[i]); } jniContext->unpinAll(jenv); return status; } - if(jniContext->isTrackingOpenCLResources()){ - executeEventList.add(jniContext->executeEvents[0],__LINE__); + if(config->isTrackingOpenCLResources()){ + executeEventList.add(jniContext->executeEvents[0],__LINE__, __FILE__); } - if (0){ // I dont think we need this - if (passid < passes-1){ - // we need to wait for the executions to complete... - status = clWaitForEvents(1, jniContext->executeEvents); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clWaitForEvents() execute events mid pass"); - jniContext->unpinAll(jenv); - return status; + + } + + // We will use readEventCount to track the number of reads. It will never be > jniContext->argc which is the size of readEvents[] and readEventArgs[] + // readEvents[] will be populated with the event's that we will wait on below. + // readArgEvents[] will map the readEvent to the arg that originated it + // So if we had + // arg[0] read_write array + // arg[1] read array + // arg[2] write array + // arg[3] primitive + // arg[4] read array + // At the end of the next loop + // readCount=3 + // readEvent[0] = new read event for arg0 + // readArgEvent[0] = 0 + // readEvent[1] = new read event for arg1 + // readArgEvent[1] = 1 + // readEvent[2] = new read event for arg4 + // readArgEvent[2] = 4 + + int readEventCount = 0; + + for (int i=0; i< jniContext->argc; i++){ + KernelArg *arg = jniContext->args[i]; + + if (arg->needToEnqueueRead()){ + if (arg->isConstant()){ + fprintf(stderr, "reading %s\n", arg->name); + } + if (config->isProfilingEnabled()) { + jniContext->readEventArgs[readEventCount]=i; + } + if (config->isVerbose()){ + fprintf(stderr, "reading buffer %d %s\n", i, arg->name); } - status = clReleaseEvent(jniContext->executeEvents[0]); + status = clEnqueueReadBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0, + arg->arrayBuffer->lengthInBytes,arg->arrayBuffer->addr , 1, jniContext->executeEvents, &(jniContext->readEvents[readEventCount])); if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clReleaseEvent() read event"); + PRINT_CL_ERR(status, "clEnqueueReadBuffer()"); jniContext->unpinAll(jenv); return status; } + if (config->isTrackingOpenCLResources()){ + readEventList.add(jniContext->readEvents[readEventCount],__LINE__, __FILE__); + } + readEventCount++; } } - } - // We will use readEventCount to track the number of reads. It will never be > jniContext->argc which is the size of readEvents[] and readEventArgs[] - // readEvents[] will be populated with the event's that we will wait on below. - // readArgEvents[] will map the readEvent to the arg that originated it - // So if we had - // arg[0] read_write array - // arg[1] read array - // arg[2] write array - // arg[3] primitive - // arg[4] read array - // At the end of the next loop - // readCount=3 - // readEvent[0] = new read event for arg0 - // readArgEvent[0] = 0 - // readEvent[1] = new read event for arg1 - // readArgEvent[1] = 1 - // readEvent[2] = new read event for arg4 - // readArgEvent[2] = 4 - - int readEventCount = 0; + // don't change the order here + // We wait for the reads which each depend on the execution, which depends on the writes ;) + // So after the reads have completed, we can release the execute and writes. - for (int i=0; i< jniContext->argc; i++){ - KernelArg *arg = jniContext->args[i]; - - if (arg->needToEnqueueRead()){ - if (arg->isConstant()){ - fprintf(stderr, "reading %s\n", arg->name); - } - if (jniContext->isProfilingEnabled()) { - jniContext->readEventArgs[readEventCount]=i; - } - if (jniContext->isVerbose()){ - fprintf(stderr, "reading buffer %d %s\n", i, arg->name); - } - - status = clEnqueueReadBuffer(jniContext->commandQueue, arg->value.arrayBuffer->mem, CL_FALSE, 0, - arg->sizeInBytes,arg->value.arrayBuffer->addr , 1, jniContext->executeEvents, &(jniContext->readEvents[readEventCount])); + if (readEventCount >0){ + status = clWaitForEvents(readEventCount, jniContext->readEvents); if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clEnqueueReadBuffer()"); + PRINT_CL_ERR(status, "clWaitForEvents() read events"); jniContext->unpinAll(jenv); return status; } - if (jniContext->isTrackingOpenCLResources()){ - readEventList.add(jniContext->readEvents[readEventCount],__LINE__); - } - readEventCount++; - } - } - // don't change the order here - // We wait for the reads which each depend on the execution, which depends on the writes ;) - // So after the reads have completed, we can release the execute and writes. - - if (readEventCount >0){ - status = clWaitForEvents(readEventCount, jniContext->readEvents); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clWaitForEvents() read events"); - jniContext->unpinAll(jenv); - return status; - } - - for (int i=0; i< readEventCount; i++){ - if (jniContext->isProfilingEnabled()) { - status = profile(&jniContext->args[jniContext->readEventArgs[i]]->value.arrayBuffer->read, &jniContext->readEvents[i], 0,jniContext->args[jniContext->readEventArgs[i]]->name, jniContext->profileBaseTime); + for (int i=0; i< readEventCount; i++){ + if (config->isProfilingEnabled()) { + status = profile(&jniContext->args[jniContext->readEventArgs[i]]->arrayBuffer->read, &jniContext->readEvents[i], 0,jniContext->args[jniContext->readEventArgs[i]]->name, jniContext->profileBaseTime); + if (status != CL_SUCCESS) { + jniContext->unpinAll(jenv); + return status; + } + } + status = clReleaseEvent(jniContext->readEvents[i]); if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clReleaseEvent() read event"); jniContext->unpinAll(jenv); return status; } + if (config->isTrackingOpenCLResources()){ + readEventList.remove(jniContext->readEvents[i],__LINE__, __FILE__); + } } - status = clReleaseEvent(jniContext->readEvents[i]); + } else { + // if readEventCount == 0 then we don't need any reads so we just wait for the executions to complete + status = clWaitForEvents(1, jniContext->executeEvents); if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clReleaseEvent() read event"); + PRINT_CL_ERR(status, "clWaitForEvents() execute event"); jniContext->unpinAll(jenv); return status; } - if (jniContext->isTrackingOpenCLResources()){ - readEventList.remove(jniContext->readEvents[i],__LINE__); - } } - } else { - // if readEventCount == 0 then we don't need any reads so we just wait for the executions to complete - status = clWaitForEvents(1, jniContext->executeEvents); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clWaitForEvents() execute event"); - jniContext->unpinAll(jenv); - return status; - } - } - if (jniContext->isTrackingOpenCLResources()){ - executeEventList.remove(jniContext->executeEvents[0],__LINE__); + if (config->isTrackingOpenCLResources()){ + executeEventList.remove(jniContext->executeEvents[0],__LINE__, __FILE__); } - if (jniContext->isProfilingEnabled()) { - status = profile(&jniContext->exec[passes-1], &jniContext->executeEvents[0], 1, NULL, jniContext->profileBaseTime); // multi gpu ? - if (status != CL_SUCCESS) { - jniContext->unpinAll(jenv); - return status; - } + if (config->isProfilingEnabled()) { + status = profile(&jniContext->exec[passes-1], &jniContext->executeEvents[0], 1, NULL, jniContext->profileBaseTime); // multi gpu ? + if (status != CL_SUCCESS) { + jniContext->unpinAll(jenv); + return status; + } } - // extract the execution status from the executeEvent + // extract the execution status from the executeEvent cl_int executeStatus; status = clGetEventInfo(jniContext->executeEvents[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &executeStatus, NULL); if (status != CL_SUCCESS) { @@ -1337,7 +1250,7 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI) return status; } if (executeStatus != CL_SUCCESS) { - // it should definitely not be negative, but since we did a wait above, it had better be CL_COMPLETE==CL_SUCCESS + // it should definitely not be negative, but since we did a wait above, it had better be CL_COMPLETE==CL_SUCCESS PRINT_CL_ERR(executeStatus, "Execution status of execute event"); jniContext->unpinAll(jenv); return executeStatus; @@ -1349,258 +1262,214 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI) return status; } - for (int i=0; i< writeEventCount; i++){ - if (jniContext->isProfilingEnabled()) { - profile(&jniContext->args[jniContext->writeEventArgs[i]]->value.arrayBuffer->write, &jniContext->writeEvents[i], 2, jniContext->args[jniContext->writeEventArgs[i]]->name, jniContext->profileBaseTime); + for (int i=0; i< writeEventCount; i++){ + if (config->isProfilingEnabled()) { + profile(&jniContext->args[jniContext->writeEventArgs[i]]->arrayBuffer->write, &jniContext->writeEvents[i], 2, jniContext->args[jniContext->writeEventArgs[i]]->name, jniContext->profileBaseTime); + } + status = clReleaseEvent(jniContext->writeEvents[i]); + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clReleaseEvent() write event"); + jniContext->unpinAll(jenv); + return status; + } + if (config->isTrackingOpenCLResources()){ + writeEventList.remove(jniContext->writeEvents[i],__LINE__, __FILE__); + } } - status = clReleaseEvent(jniContext->writeEvents[i]); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clReleaseEvent() write event"); - jniContext->unpinAll(jenv); - return status; + + jniContext->unpinAll(jenv); + + if (config->isProfilingCSVEnabled()) { + writeProfileInfo(jniContext); } - if (jniContext->isTrackingOpenCLResources()){ - writeEventList.remove(jniContext->writeEvents[i],__LINE__); + if (config->isTrackingOpenCLResources()){ + fprintf(stderr, "following execution of kernel{\n"); + commandQueueList.report(stderr); + memList.report(stderr); + readEventList.report(stderr); + executeEventList.report(stderr); + writeEventList.report(stderr); + fprintf(stderr, "}\n"); } - } - jniContext->unpinAll(jenv); + jniContext->firstRun = false; - if (jniContext->isProfilingCSVEnabled()) { - writeProfileInfo(jniContext); - } - if (jniContext->isTrackingOpenCLResources()){ - fprintf(stderr, "following execution of kernel{\n"); - commandQueueList.report(stderr); - memList.report(stderr); - readEventList.report(stderr); - executeEventList.report(stderr); - writeEventList.report(stderr); - fprintf(stderr, "}\n"); + //fprintf(stderr, "About to return %d from exec\n", status); + return(status); } - jniContext->firstRun = false; - - //fprintf(stderr, "About to return %d from exec\n", status); - return(status); -} - // we return the JNIContext from here JNI_JAVA(jlong, KernelRunner, initJNI) (JNIEnv *jenv, jclass clazz, jobject kernelObject, jobject openCLDeviceObject, jint flags) { - //fprintf(stdout, "init()\n"); - cl_int status = CL_SUCCESS; - JNIContext* jniContext = new JNIContext(jenv, kernelObject, openCLDeviceObject, flags); + if (config== NULL){ + config = new Config(jenv); + } + cl_int status = CL_SUCCESS; + JNIContext* jniContext = new JNIContext(jenv, kernelObject, openCLDeviceObject, flags); - if (jniContext->isValid()){ - - return((jlong)jniContext); - }else{ - return(0L); + if (jniContext->isValid()){ + + return((jlong)jniContext); + }else{ + return(0L); + } } -} JNI_JAVA(jlong, KernelRunner, buildProgramJNI) (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source) { - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext == NULL){ - return 0; - } + JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); + if (jniContext == NULL){ + return 0; + } - cl_int status = CL_SUCCESS; - - jniContext->program = CLHelper::compile(jenv, jniContext->context, 1, &jniContext->deviceId, source, NULL, &status); + cl_int status = CL_SUCCESS; - if(status == CL_BUILD_PROGRAM_FAILURE) { - return(0); - } + jniContext->program = CLHelper::compile(jenv, jniContext->context, 1, &jniContext->deviceId, source, NULL, &status); - jniContext->kernel = clCreateKernel(jniContext->program, "run", &status); - ASSERT_CL("clCreateKernel()"); + if(status == CL_BUILD_PROGRAM_FAILURE) { + return(0); + } - cl_command_queue_properties queue_props = 0; - if (jniContext->isProfilingEnabled()) { - queue_props |= CL_QUEUE_PROFILING_ENABLE; - } + jniContext->kernel = clCreateKernel(jniContext->program, "run", &status); + ASSERT_CL("clCreateKernel()"); - jniContext->commandQueue= clCreateCommandQueue(jniContext->context, (cl_device_id)jniContext->deviceId, - queue_props, - &status); - ASSERT_CL("clCreateCommandQueue()"); + cl_command_queue_properties queue_props = 0; + if (config->isProfilingEnabled()) { + queue_props |= CL_QUEUE_PROFILING_ENABLE; + } + + jniContext->commandQueue= clCreateCommandQueue(jniContext->context, (cl_device_id)jniContext->deviceId, + queue_props, + &status); + ASSERT_CL("clCreateCommandQueue()"); - commandQueueList.add(jniContext->commandQueue, __LINE__); + commandQueueList.add(jniContext->commandQueue, __LINE__, __FILE__); - if (jniContext->isProfilingCSVEnabled()) { - // compute profile filename + if (config->isProfilingCSVEnabled()) { + // compute profile filename #if defined (_WIN32) - jint pid = GetCurrentProcessId(); + jint pid = GetCurrentProcessId(); #else - pid_t pid = getpid(); + pid_t pid = getpid(); #endif - // indicate cpu or gpu - // timestamp - // kernel name + // indicate cpu or gpu + // timestamp + // kernel name - jclass classMethodAccess = jenv->FindClass("java/lang/Class"); - jmethodID getNameID=jenv->GetMethodID(classMethodAccess,"getName","()Ljava/lang/String;"); - jstring className = (jstring)jenv->CallObjectMethod(jniContext->kernelClass, getNameID); - const char *classNameChars = jenv->GetStringUTFChars(className, NULL); + jclass classMethodAccess = jenv->FindClass("java/lang/Class"); + jmethodID getNameID=jenv->GetMethodID(classMethodAccess,"getName","()Ljava/lang/String;"); + jstring className = (jstring)jenv->CallObjectMethod(jniContext->kernelClass, getNameID); + const char *classNameChars = jenv->GetStringUTFChars(className, NULL); #define TIME_STR_LEN 200 - char timeStr[TIME_STR_LEN]; - struct tm *tmp; - time_t t = time(NULL); - tmp = localtime(&t); - if (tmp == NULL) { - perror("localtime"); - } - //strftime(timeStr, TIME_STR_LEN, "%F.%H%M%S", tmp); %F seemed to cause a core dump - strftime(timeStr, TIME_STR_LEN, "%H%M%S", tmp); + char timeStr[TIME_STR_LEN]; + struct tm *tmp; + time_t t = time(NULL); + tmp = localtime(&t); + if (tmp == NULL) { + perror("localtime"); + } + //strftime(timeStr, TIME_STR_LEN, "%F.%H%M%S", tmp); %F seemed to cause a core dump + strftime(timeStr, TIME_STR_LEN, "%H%M%S", tmp); - char* fnameStr = new char[strlen(classNameChars) + strlen(timeStr) + 128]; + char* fnameStr = new char[strlen(classNameChars) + strlen(timeStr) + 128]; - //sprintf(fnameStr, "%s.%s.%d.%llx\n", classNameChars, timeStr, pid, jniContext); - sprintf(fnameStr, "aparapiprof.%s.%d.%016lx", timeStr, pid, (unsigned long)jniContext); - jenv->ReleaseStringUTFChars(className, classNameChars); + //sprintf(fnameStr, "%s.%s.%d.%llx\n", classNameChars, timeStr, pid, jniContext); + sprintf(fnameStr, "aparapiprof.%s.%d.%016lx", timeStr, pid, (unsigned long)jniContext); + jenv->ReleaseStringUTFChars(className, classNameChars); - FILE* profileFile = fopen(fnameStr, "w"); - if (profileFile != NULL) { - jniContext->profileFile = profileFile; - } else { - jniContext->profileFile = stderr; - fprintf(stderr, "Could not open profile data file %s, reverting to stderr\n", fnameStr); + FILE* profileFile = fopen(fnameStr, "w"); + if (profileFile != NULL) { + jniContext->profileFile = profileFile; + } else { + jniContext->profileFile = stderr; + fprintf(stderr, "Could not open profile data file %s, reverting to stderr\n", fnameStr); + } + delete []fnameStr; } - delete []fnameStr; - } - return((jlong)jniContext); -} + return((jlong)jniContext); + } // this is called once when the arg list is first determined for this kernel JNI_JAVA(jint, KernelRunner, setArgsJNI) (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jobjectArray argArray, jint argc) { - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - cl_int status = CL_SUCCESS; - if (jniContext != NULL){ - jniContext->argc = argc; - jniContext->args = new KernelArg*[jniContext->argc]; - jniContext->firstRun = true; - - // Step through the array of KernelArg's to capture the type data for the Kernel's data members. - for (jint i=0; i<jniContext->argc; i++){ - + if (config== NULL){ + config = new Config(jenv); + } + JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); + cl_int status = CL_SUCCESS; + if (jniContext != NULL){ + jniContext->argc = argc; + jniContext->args = new KernelArg*[jniContext->argc]; + jniContext->firstRun = true; - jobject argObj = jenv->GetObjectArrayElement(argArray, i); - KernelArg* arg = jniContext->args[i] = new KernelArg(jenv, jniContext, argObj); - if (jniContext->isVerbose()){ - if (arg->isExplicit()){ - fprintf(stderr, "%s is explicit!\n", arg->name); + // Step through the array of KernelArg's to capture the type data for the Kernel's data members. + for (jint i=0; i<jniContext->argc; i++){ + jobject argObj = jenv->GetObjectArrayElement(argArray, i); + KernelArg* arg = jniContext->args[i] = new KernelArg(jenv, jniContext, argObj); + if (config->isVerbose()){ + if (arg->isExplicit()){ + fprintf(stderr, "%s is explicit!\n", arg->name); + } } - } - if (arg->isPrimitive()) { - // for primitives, we cache the fieldID for that field in the kernel's arg object - if (arg->isFloat()){ - if (arg->isStatic){ - arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "F"); - }else{ - arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "F"); - } - arg->sizeInBytes = sizeof(jfloat); - }else if (arg->isInt()){ - if (arg->isStatic){ - arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "I"); - }else{ - arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "I"); - } - arg->sizeInBytes = sizeof(jint); - }else if (arg->isByte()){ - if (arg->isStatic){ - arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "B"); - }else{ - arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "B"); - } - arg->sizeInBytes = sizeof(jbyte); - }else if (arg->isBoolean()){ - if (arg->isStatic){ - arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "Z"); + if (config->isVerbose()){ + fprintf(stderr, "in setArgs arg %d %s type %08x\n", i, arg->name, arg->type); + if (arg->isLocal()){ + fprintf(stderr, "in setArgs arg %d %s is local\n", i, arg->name); + }else if (arg->isConstant()){ + fprintf(stderr, "in setArgs arg %d %s is constant\n", i, arg->name); }else{ - arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "Z"); + fprintf(stderr, "in setArgs arg %d %s is *not* local\n", i, arg->name); } - arg->sizeInBytes = sizeof(jboolean); - }else if (arg->isLong()){ - if (arg->isStatic){ - arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "J"); - }else{ - arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "J"); - } - arg->sizeInBytes = sizeof(jlong); - }else if (arg->isDouble()){ - if (arg->isStatic){ - arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "D"); - }else{ - arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "D"); - } - arg->sizeInBytes = sizeof(jdouble); } - }else{ // we will use an array - arg->value.arrayBuffer->mem = (cl_mem) 0; - arg->value.arrayBuffer->javaArray = 0; - arg->sizeInBytes = 0; - } - if (jniContext->isVerbose()){ - fprintf(stderr, "in setArgs arg %d %s type %08x\n", i, arg->name, arg->type); - if (arg->isLocal()){ - fprintf(stderr, "in setArgs arg %d %s is local\n", i, arg->name); - }else if (arg->isConstant()){ - fprintf(stderr, "in setArgs arg %d %s is constant\n", i, arg->name); - }else{ - fprintf(stderr, "in setArgs arg %d %s is *not* local\n", i, arg->name); + + //If an error occurred, return early so we report the first problem, not the last + if (jenv->ExceptionCheck() == JNI_TRUE) { + jniContext->argc = -1; + delete[] jniContext->args; + jniContext->args = NULL; + jniContext->firstRun = true; + return (status); } - } - //If an error occurred, return early so we report the first problem, not the last - if (jenv->ExceptionCheck() == JNI_TRUE) { - jniContext->argc = -1; - delete[] jniContext->args; - jniContext->args = NULL; - jniContext->firstRun = true; - return (status); } + // we will need an executeEvent buffer for all devices + jniContext->executeEvents = new cl_event[1]; + // We will need *at most* jniContext->argc read/write events + jniContext->readEvents = new cl_event[jniContext->argc]; + if (config->isProfilingEnabled()) { + jniContext->readEventArgs = new jint[jniContext->argc]; + } + jniContext->writeEvents = new cl_event[jniContext->argc]; + if (config->isProfilingEnabled()) { + jniContext->writeEventArgs = new jint[jniContext->argc]; + } } - // we will need an executeEvent buffer for all devices - jniContext->executeEvents = new cl_event[1]; - - // We will need *at most* jniContext->argc read/write events - jniContext->readEvents = new cl_event[jniContext->argc]; - if (jniContext->isProfilingEnabled()) { - jniContext->readEventArgs = new jint[jniContext->argc]; - } - jniContext->writeEvents = new cl_event[jniContext->argc]; - if (jniContext->isProfilingEnabled()) { - jniContext->writeEventArgs = new jint[jniContext->argc]; - } + return(status); } - return(status); -} JNI_JAVA(jstring, KernelRunner, getExtensionsJNI) - (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) { - jstring jextensions = NULL; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext != NULL){ - cl_int status = CL_SUCCESS; - jextensions = CLHelper::getExtensions(jenv, jniContext->deviceId, &status); + (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) { + if (config== NULL){ + config = new Config(jenv); + } + jstring jextensions = NULL; + JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); + if (jniContext != NULL){ + cl_int status = CL_SUCCESS; + jextensions = CLHelper::getExtensions(jenv, jniContext->deviceId, &status); + } + return jextensions; } - return jextensions; -} KernelArg* getArgForBuffer(JNIEnv* jenv, JNIContext* jniContext, jobject buffer) { cl_int status = CL_SUCCESS; @@ -1610,21 +1479,21 @@ KernelArg* getArgForBuffer(JNIEnv* jenv, JNIContext* jniContext, jobject buffer) for (jint i=0; returnArg == NULL && i<jniContext->argc; i++){ KernelArg *arg= jniContext->args[i]; if (arg->isArray()){ - jboolean isSame = jenv->IsSameObject(buffer, arg->value.arrayBuffer->javaArray); + jboolean isSame = jenv->IsSameObject(buffer, arg->arrayBuffer->javaArray); if (isSame){ - if (jniContext->isVerbose()){ + if (config->isVerbose()){ fprintf(stderr, "matched arg '%s'\n", arg->name); } returnArg = arg; }else{ - if (jniContext->isVerbose()){ + if (config->isVerbose()){ fprintf(stderr, "unmatched arg '%s'\n", arg->name); } - } + } } } if (returnArg==NULL){ - if (jniContext->isVerbose()){ + if (config->isVerbose()){ fprintf(stderr, "attempt to get arg for buffer that does not appear to be referenced from kernel\n"); } } @@ -1635,148 +1504,95 @@ KernelArg* getArgForBuffer(JNIEnv* jenv, JNIContext* jniContext, jobject buffer) // Called as a result of Kernel.get(someArray) JNI_JAVA(jint, KernelRunner, getJNI) (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jobject buffer) { - cl_int status = CL_SUCCESS; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext != NULL){ - KernelArg *arg= getArgForBuffer(jenv, jniContext, buffer); - if (arg != NULL){ - if (jniContext->isVerbose()){ - fprintf(stderr, "explicitly reading buffer %s\n", arg->name); - } - arg->pin(jenv); + if (config== NULL){ + config = new Config(jenv); + } + cl_int status = CL_SUCCESS; + JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); + if (jniContext != NULL){ + KernelArg *arg= getArgForBuffer(jenv, jniContext, buffer); + if (arg != NULL){ + if (config->isVerbose()){ + fprintf(stderr, "explicitly reading buffer %s\n", arg->name); + } + arg->pin(jenv); - status = clEnqueueReadBuffer(jniContext->commandQueue, arg->value.arrayBuffer->mem, CL_FALSE, 0, - arg->sizeInBytes,arg->value.arrayBuffer->addr , 0, NULL, &jniContext->readEvents[0]); - if (jniContext->isVerbose()){ - fprintf(stderr, "explicitly read %s ptr=%lx len=%d\n", arg->name, arg->value.arrayBuffer->addr,arg->sizeInBytes ); - } - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clEnqueueReadBuffer()"); - return status; - } - status = clWaitForEvents(1, jniContext->readEvents); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clWaitForEvents"); - return status; - } - if (jniContext->isProfilingEnabled()){ - status = profile(&arg->value.arrayBuffer->read, &jniContext->readEvents[0], 0,arg->name, jniContext->profileBaseTime); + status = clEnqueueReadBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0, + arg->arrayBuffer->lengthInBytes,arg->arrayBuffer->addr , 0, NULL, &jniContext->readEvents[0]); + if (config->isVerbose()){ + fprintf(stderr, "explicitly read %s ptr=%lx len=%d\n", arg->name, arg->arrayBuffer->addr,arg->arrayBuffer->lengthInBytes ); + } if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "profile "); + PRINT_CL_ERR(status, "clEnqueueReadBuffer()"); return status; } - } + status = clWaitForEvents(1, jniContext->readEvents); + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clWaitForEvents"); + return status; + } + if (config->isProfilingEnabled()){ + status = profile(&arg->arrayBuffer->read, &jniContext->readEvents[0], 0,arg->name, jniContext->profileBaseTime); + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "profile "); + return status; + } + } - clReleaseEvent(jniContext->readEvents[0]); - if (status != CL_SUCCESS) { - PRINT_CL_ERR(status, "clReleaseEvent() read event"); - return status; - } - // since this is an explicit buffer get, we expect the buffer to have changed so we commit - arg->unpin(jenv); // was unpinCommit + clReleaseEvent(jniContext->readEvents[0]); + if (status != CL_SUCCESS) { + PRINT_CL_ERR(status, "clReleaseEvent() read event"); + return status; + } + // since this is an explicit buffer get, we expect the buffer to have changed so we commit + arg->unpin(jenv); // was unpinCommit - }else{ - if (jniContext->isVerbose()){ - fprintf(stderr, "attempt to request to get a buffer that does not appear to be referenced from kernel\n"); + }else{ + if (config->isVerbose()){ + fprintf(stderr, "attempt to request to get a buffer that does not appear to be referenced from kernel\n"); + } } } + return 0; } - return 0; -} - - -jobject createProfileInfo(JNIEnv *jenv, ProfileInfo &profileInfo){ - jobject profileInstance = JNIHelper::createInstance(jenv, ProfileInfoClass , ArgsVoidReturn(StringClassArg IntArg LongArg LongArg LongArg LongArg), - ((jstring)(profileInfo.name==NULL?NULL:jenv->NewStringUTF(profileInfo.name))), - ((jint)profileInfo.type), - ((jlong)profileInfo.start), - ((jlong)profileInfo.end), - ((jlong)profileInfo.queued), - ((jlong)profileInfo.submit)); - return(profileInstance); -} JNI_JAVA(jobject, KernelRunner, getProfileInfoJNI) (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) { - cl_int status = CL_SUCCESS; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - jobject returnList = NULL; - if (jniContext != NULL){ - returnList = JNIHelper::createInstance(jenv, ArrayListClass, VoidReturn ); - if (jniContext->isProfilingEnabled()){ - - for (jint i=0; i<jniContext->argc; i++){ - KernelArg *arg= jniContext->args[i]; - if (arg->isArray()){ - if (arg->isMutableByKernel() && arg->value.arrayBuffer->write.valid){ - jobject writeProfileInfo = createProfileInfo(jenv, arg->value.arrayBuffer->write); - JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), writeProfileInfo); + if (config== NULL){ + config = new Config(jenv); + } + cl_int status = CL_SUCCESS; + JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); + jobject returnList = NULL; + if (jniContext != NULL){ + returnList = JNIHelper::createInstance(jenv, ArrayListClass, VoidReturn ); + if (config->isProfilingEnabled()){ + + for (jint i=0; i<jniContext->argc; i++){ + KernelArg *arg= jniContext->args[i]; + if (arg->isArray()){ + if (arg->isMutableByKernel() && arg->arrayBuffer->write.valid){ + jobject writeProfileInfo = arg->arrayBuffer->write.createProfileInfoInstance(jenv); + JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), writeProfileInfo); + } } } - } - for (jint pass=0; pass<jniContext->passes; pass++){ - jobject executeProfileInfo = createProfileInfo(jenv, jniContext->exec[pass]); - JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), executeProfileInfo); - } + for (jint pass=0; pass<jniContext->passes; pass++){ + jobject executeProfileInfo = jniContext->exec[pass].createProfileInfoInstance(jenv); + JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), executeProfileInfo); + } - for (jint i=0; i<jniContext->argc; i++){ - KernelArg *arg= jniContext->args[i]; - if (arg->isArray()){ - if (arg->isReadByKernel() && arg->value.arrayBuffer->read.valid){ - jobject readProfileInfo = createProfileInfo(jenv, arg->value.arrayBuffer->read); - JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), readProfileInfo); + for (jint i=0; i<jniContext->argc; i++){ + KernelArg *arg= jniContext->args[i]; + if (arg->isArray()){ + if (arg->isReadByKernel() && arg->arrayBuffer->read.valid){ + jobject readProfileInfo = arg->arrayBuffer->read.createProfileInfoInstance(jenv); + JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), readProfileInfo); + } } } } } + return returnList; } - return returnList; -} - - -/* -JNI_JAVA(jint, KernelRunner, getMaxComputeUnitsJNI) - (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) { - cl_int status = CL_SUCCESS; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext != NULL){ - return(jniContext->maxComputeUnits); - }else{ - return(0); - } -} - -JNI_JAVA(jint, KernelRunner, getMaxWorkItemDimensionsJNI) - (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) { - cl_int status = CL_SUCCESS; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext != NULL){ - return(jniContext->maxWorkItemDimensions); - }else{ - return(0); - } -} - -JNI_JAVA(jint, KernelRunner, getMaxWorkGroupSizeJNI) - (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) { - cl_int status = CL_SUCCESS; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext != NULL){ - return(jniContext->maxWorkGroupSize); - }else{ - return(0); - } -} - -JNI_JAVA(jint, KernelRunner, getMaxWorkItemSizeJNI) - (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jint _index) { - cl_int status = CL_SUCCESS; - JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle); - if (jniContext != NULL && _index >=0 && _index <= (int)(jniContext->maxWorkItemDimensions)){ - return(jniContext->maxWorkItemSizes[_index]); - }else{ - return(0); - } -} -*/ diff --git a/com.amd.aparapi.jni/src/cpp/arrayBuffer.cpp b/com.amd.aparapi.jni/src/cpp/arrayBuffer.cpp new file mode 100644 index 0000000000000000000000000000000000000000..179a82cd95efe49de2e4aafe67bcf69822ef5029 --- /dev/null +++ b/com.amd.aparapi.jni/src/cpp/arrayBuffer.cpp @@ -0,0 +1,64 @@ +/* + Copyright (c) 2010-2011, Advanced Micro Devices, Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following + disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export + laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 + through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of + the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce + Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export + Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in + Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) + export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced + direct product is subject to national security controls as identified on the Commerce Control List (currently + found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional + information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry + and Security?s website at http://www.bis.doc.gov/. + */ +#define ARRAYBUFFER_SOURCE +#include "arrayBuffer.h" + +ArrayBuffer::ArrayBuffer(): + javaArray((jobject) 0), + length(0), + lengthInBytes(0), + mem((cl_mem) 0), + addr(NULL), + memMask((cl_uint)0), + isCopy(false), + isPinned(false){ + } + +void ArrayBuffer::unpinAbort(JNIEnv *jenv){ + jenv->ReleasePrimitiveArrayCritical((jarray)javaArray, addr,JNI_ABORT); + isPinned = JNI_FALSE; +} +void ArrayBuffer::unpinCommit(JNIEnv *jenv){ + jenv->ReleasePrimitiveArrayCritical((jarray)javaArray, addr, 0); + isPinned = JNI_FALSE; +} +void ArrayBuffer::pin(JNIEnv *jenv){ + void *ptr = addr; + addr = jenv->GetPrimitiveArrayCritical((jarray)javaArray,&isCopy); + isPinned = JNI_TRUE; +} diff --git a/com.amd.aparapi.jni/src/cpp/arrayBuffer.h b/com.amd.aparapi.jni/src/cpp/arrayBuffer.h new file mode 100644 index 0000000000000000000000000000000000000000..b51f53311ae36a496dc4a33c31a5ed8cacc51700 --- /dev/null +++ b/com.amd.aparapi.jni/src/cpp/arrayBuffer.h @@ -0,0 +1,64 @@ +/* + Copyright (c) 2010-2011, Advanced Micro Devices, Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following + disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export + laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 + through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of + the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce + Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export + Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in + Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) + export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced + direct product is subject to national security controls as identified on the Commerce Control List (currently + found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional + information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry + and Security?s website at http://www.bis.doc.gov/. + */ + +#ifndef ARRAYBUFFER_H +#define ARRAYBUFFER_H +#include "common.h" +#include "profileInfo.h" + +class ArrayBuffer{ + public: + jobject javaArray; // The java array that this arg is mapped to + cl_uint length; // the number of elements for arrays (used only when ARRAYLENGTH bit is set for this arg) + jint lengthInBytes; // bytes in the array or directBuf + cl_mem mem; // the opencl buffer + void *addr; // the last address where we saw this java array object + cl_uint memMask; // the mask used for createBuffer + jboolean isCopy; + jboolean isPinned; + char memSpec[128]; // The string form of the mask we used for create buffer. for debugging + ProfileInfo read; + ProfileInfo write; + + ArrayBuffer(); + void unpinAbort(JNIEnv *jenv); + void unpinCommit(JNIEnv *jenv); + void pin(JNIEnv *jenv); +}; + +#endif // ARRAYBUFFER_H diff --git a/com.amd.aparapi.jni/src/cpp/clHelper.cpp b/com.amd.aparapi.jni/src/cpp/clHelper.cpp index bb3f0d209ec7f0646bb25e04a656beb94c8343a1..a276750937abf6dbb241a067792d17d04bb50aa1 100644 --- a/com.amd.aparapi.jni/src/cpp/clHelper.cpp +++ b/com.amd.aparapi.jni/src/cpp/clHelper.cpp @@ -152,4 +152,3 @@ jstring CLHelper::getExtensions(JNIEnv *jenv, cl_device_id deviceId, cl_int *sta } - diff --git a/com.amd.aparapi.jni/src/cpp/clHelper.h b/com.amd.aparapi.jni/src/cpp/clHelper.h index d4ec57ab7923690a50c222b6cc1a9d934bce879e..7f5f18e4cf321dfbdd8f1203487305741553dd81 100644 --- a/com.amd.aparapi.jni/src/cpp/clHelper.h +++ b/com.amd.aparapi.jni/src/cpp/clHelper.h @@ -68,5 +68,90 @@ class CLHelper{ static jstring getExtensions(JNIEnv *jenv, cl_device_id deviceId, cl_int *status); }; +template <typename T> class List; // forward + +template <typename T> class Ref{ + private: + T value; + int line; + char *fileName; + Ref<T> *next; + friend class List<T>; + public: + Ref(T _value, int _line, char* _fileName); +}; + +template <typename T> class List{ + private: + char *name; + Ref<T> *head; + int count; + public: + List(char *_name); + void add(T _value, int _line, char *_fileName); + void remove(T _value, int _line, char *_fileName); + void report(FILE *stream); +}; + +template <typename T> Ref<T>::Ref(T _value, int _line, char* _fileName): + value(_value), + line(_line), + fileName(_fileName), + next(NULL){ + } + +template <typename T> List<T>::List(char *_name): + head(NULL), + count(0), + name(_name){ + } + +template <typename T> void List<T>::add(T _value, int _line, char *_fileName){ + Ref<T> *handle = new Ref<T>(_value, _line, _fileName); + handle->next = head; + head = handle; + count++; +} + +template <typename T> void List<T>::remove(T _value, int _line, char *_fileName){ + for (Ref<T> *ptr = head, *last=NULL; ptr != NULL; last=ptr, ptr = ptr->next){ + if (ptr->value == _value){ + if (last == NULL){ // head + head = ptr->next; + }else{ // !head + last->next = ptr->next; + } + delete ptr; + count--; + return; + } + } + fprintf(stderr, "FILE %s LINE %d failed to find %s to remove %0lx\n", _fileName, _line, name, _value); +} + +template <typename T> void List<T>::report(FILE *stream){ + if (head != NULL){ + fprintf(stream, "Resource report %d resources of type %s still in play ", count, name); + for (Ref<T> *ptr = head; ptr != NULL; ptr = ptr->next){ + fprintf(stream, " %0lx(%d)", ptr->value, ptr->line); + } + fprintf(stream, "\n"); + } +} + +#ifdef CLHELPER_SOURCE +List<cl_command_queue> commandQueueList("cl_command_queue"); +List<cl_mem> memList("cl_mem"); +List<cl_event> readEventList("cl_event (read)"); +List<cl_event> executeEventList("cl_event (exec)"); +List<cl_event> writeEventList("cl_event (write)"); +#else +extern List<cl_command_queue> commandQueueList; +extern List<cl_mem> memList; +extern List<cl_event> readEventList; +extern List<cl_event> executeEventList; +extern List<cl_event> writeEventList; +#endif + #endif // CLHELPER_H diff --git a/com.amd.aparapi.jni/src/cpp/config.cpp b/com.amd.aparapi.jni/src/cpp/config.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a56334c559aa77cb0b7c837ac90bbe7637d656ad --- /dev/null +++ b/com.amd.aparapi.jni/src/cpp/config.cpp @@ -0,0 +1,77 @@ +/* + Copyright (c) 2010-2011, Advanced Micro Devices, Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following + disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export + laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 + through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of + the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce + Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export + Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in + Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) + export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced + direct product is subject to national security controls as identified on the Commerce Control List (currently + found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional + information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry + and Security?s website at http://www.bis.doc.gov/. + */ +#define CONFIG_SOURCE +#include "config.h" + +jboolean Config::getBoolean(JNIEnv *jenv, char *fieldName){ + jfieldID fieldID = jenv->GetStaticFieldID(configClass, fieldName, "Z"); + return(jenv->GetStaticBooleanField(configClass, fieldID)); +} + +Config::Config(JNIEnv *jenv){ + enableVerboseJNI = false; + configClass = jenv->FindClass("com/amd/aparapi/Config"); + if (configClass == NULL || jenv->ExceptionCheck()) { + jenv->ExceptionDescribe(); + jenv->ExceptionClear(); + fprintf(stderr, "bummer! getting Config from instance\n"); + }else{ + enableVerboseJNI = getBoolean(jenv, "enableVerboseJNI"); + enableVerboseJNIOpenCLResourceTracking = getBoolean(jenv, "enableVerboseJNIOpenCLResourceTracking"); + enableProfiling = getBoolean(jenv, "enableProfiling"); + enableProfilingCSV = getBoolean(jenv, "enableProfilingCSV"); + } + fprintf(stderr, "Config::enableVerboseJNI=%s\n",enableVerboseJNI?"true":"false"); + fprintf(stderr, "Config::enableVerboseJNIOpenCLResourceTracking=%s\n",enableVerboseJNIOpenCLResourceTracking?"true":"false"); + fprintf(stderr, "Config::enableProfiling=%s\n",enableProfiling?"true":"false"); + fprintf(stderr, "Config::enableProfilingCSV=%s\n",enableProfilingCSV?"true":"false"); +} + +jboolean Config::isVerbose(){ + return enableVerboseJNI; +} + +jboolean Config::isProfilingCSVEnabled(){ + return enableProfilingCSV; +} +jboolean Config::isTrackingOpenCLResources(){ + return enableVerboseJNIOpenCLResourceTracking; +} +jboolean Config::isProfilingEnabled(){ + return enableProfiling; +} diff --git a/com.amd.aparapi.jni/src/cpp/config.h b/com.amd.aparapi.jni/src/cpp/config.h new file mode 100644 index 0000000000000000000000000000000000000000..ccebf6be5d54639132cc57846503ca1428006b66 --- /dev/null +++ b/com.amd.aparapi.jni/src/cpp/config.h @@ -0,0 +1,64 @@ +/* + Copyright (c) 2010-2011, Advanced Micro Devices, Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following + disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export + laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 + through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of + the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce + Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export + Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in + Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) + export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced + direct product is subject to national security controls as identified on the Commerce Control List (currently + found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional + information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry + and Security?s website at http://www.bis.doc.gov/. + */ +#ifndef CONFIG_H +#define CONFIG_H +#include "common.h" + +class Config{ + public: + jboolean configured; + jclass configClass; + jboolean enableVerboseJNI; + jboolean enableVerboseJNIOpenCLResourceTracking; + jboolean enableProfiling; + jboolean enableProfilingCSV; + + jboolean getBoolean(JNIEnv *jenv, char *fieldName); + Config(JNIEnv *jenv); + jboolean isVerbose(); + jboolean isProfilingCSVEnabled(); + jboolean isTrackingOpenCLResources(); + jboolean isProfilingEnabled(); +}; + +#ifdef CONFIG_SOURCE +Config *config=NULL; +#else +extern Config *config; +#endif +#endif diff --git a/com.amd.aparapi.jni/src/cpp/profileInfo.cpp b/com.amd.aparapi.jni/src/cpp/profileInfo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..284d33a52ba8cb44d59490f16128ec42bca810a0 --- /dev/null +++ b/com.amd.aparapi.jni/src/cpp/profileInfo.cpp @@ -0,0 +1,60 @@ +/* + Copyright (c) 2010-2011, Advanced Micro Devices, Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following + disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export + laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 + through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of + the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce + Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export + Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in + Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) + export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced + direct product is subject to national security controls as identified on the Commerce Control List (currently + found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional + information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry + and Security?s website at http://www.bis.doc.gov/. + */ +#define PROFILEINFO_SOURCE +#include "profileInfo.h" + +ProfileInfo::ProfileInfo(): + valid(false), + type(-1), //-1 unknown, 0 write, 1 execute, 2 read + name(NULL), + queued((cl_ulong)0L), + submit((cl_ulong)0L), + start((cl_ulong)0L), + end((cl_ulong)0L) { +} + +jobject ProfileInfo::createProfileInfoInstance(JNIEnv *jenv){ + jobject profileInstance = JNIHelper::createInstance(jenv, ProfileInfoClass , ArgsVoidReturn(StringClassArg IntArg LongArg LongArg LongArg LongArg), + ((jstring)(name==NULL?NULL:jenv->NewStringUTF(name))), + ((jint)type), + ((jlong)start), + ((jlong)end), + ((jlong)queued), + ((jlong)submit)); + return(profileInstance); +} diff --git a/com.amd.aparapi.jni/src/cpp/profileInfo.h b/com.amd.aparapi.jni/src/cpp/profileInfo.h new file mode 100644 index 0000000000000000000000000000000000000000..a100922b51f1becc79549cdda0321060a4a19e96 --- /dev/null +++ b/com.amd.aparapi.jni/src/cpp/profileInfo.h @@ -0,0 +1,56 @@ +/* + Copyright (c) 2010-2011, Advanced Micro Devices, Inc. + All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, are permitted provided that the + following conditions are met: + + Redistributions of source code must retain the above copyright notice, this list of conditions and the following + disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided with the distribution. + + Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export + laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 + through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000. Further, pursuant to Section 740.6 of + the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce + Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export + Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in + Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) + export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced + direct product is subject to national security controls as identified on the Commerce Control List (currently + found in Supplement 1 to Part 774 of EAR). For the most current Country Group listings, or for additional + information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry + and Security?s website at http://www.bis.doc.gov/. + */ +#ifndef PROFILEINFO_H +#define PROFILEINFO_H +#include "common.h" +#include "jniHelper.h" + +class ProfileInfo{ + public: + jboolean valid; + jint type; //0 write, 1 execute, 2 read + char *name; + cl_ulong queued; + cl_ulong submit; + cl_ulong start; + cl_ulong end; + ProfileInfo(); + jobject createProfileInfoInstance(JNIEnv *jenv); +}; + +#endif diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java index 9436bff005e0a10d1520fa5d47f27bb073168144..06d0399360659b0a902d59424ea1917f7bfc2da4 100644 --- a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java +++ b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java @@ -41,6 +41,8 @@ import java.util.logging.Handler; import java.util.logging.Level; import java.util.logging.Logger; +import com.amd.aparapi.KernelRunner.UsedByJNICode; + /** * A central location for holding all runtime configurable properties as well as logging configuration. * @@ -71,7 +73,7 @@ class Config{ * Usage -Dcom.amd.aparapi.enableProfiling={true|false} * */ - static final boolean enableProfiling = Boolean.getBoolean(propPkgName + ".enableProfiling"); + @UsedByJNICode static final boolean enableProfiling = Boolean.getBoolean(propPkgName + ".enableProfiling"); /** * Allows the user to turn on OpenCL profiling for the JNI/OpenCL layer, this information will be written to CSV file @@ -79,7 +81,7 @@ class Config{ * Usage -Dcom.amd.aparapi.enableProfiling={true|false} * */ - static final boolean enableProfilingCSV = Boolean.getBoolean(propPkgName + ".enableProfilingCSV"); + @UsedByJNICode static final boolean enableProfilingCSV = Boolean.getBoolean(propPkgName + ".enableProfilingCSV"); /** * Allows the user to request that verbose JNI messages be dumped to stderr. @@ -87,7 +89,7 @@ class Config{ * Usage -Dcom.amd.aparapi.enableVerboseJNI={true|false} * */ - static final boolean enableVerboseJNI = Boolean.getBoolean(propPkgName + ".enableVerboseJNI"); + @UsedByJNICode static final boolean enableVerboseJNI = Boolean.getBoolean(propPkgName + ".enableVerboseJNI"); /** * Allows the user to request tracking of opencl resources. @@ -97,7 +99,7 @@ class Config{ * Usage -Dcom.amd.aparapi.enableOpenCLResourceTracking={true|false} * */ - static final boolean enableVerboseJNIOpenCLResourceTracking = Boolean.getBoolean(propPkgName + ".enableVerboseJNIOpenCLResourceTracking"); + @UsedByJNICode static final boolean enableVerboseJNIOpenCLResourceTracking = Boolean.getBoolean(propPkgName + ".enableVerboseJNIOpenCLResourceTracking"); /** * Allows the user to request that the execution mode of each kernel invocation be reported to stdout. * @@ -179,10 +181,7 @@ class Config{ e.printStackTrace(); } - if (dumpFlags) { - logger.fine("executionMode = " + executionMode); - - } + }; public interface InstructionListener{ @@ -213,6 +212,21 @@ class Config{ e.printStackTrace(); } } + if (dumpFlags) { + + System.out.println(propPkgName+".executionMode{GPU|CPU|JTP|SEQ}="+executionMode); + System.out.println(propPkgName+".logLevel{OFF|FINEST|FINER|FINE|WARNING|SEVERE|ALL}="+logger.getLevel()); + System.out.println(propPkgName+".enableProfiling{true|false}="+enableProfiling); + System.out.println(propPkgName+".enableProfilingCSV{true|false}="+enableProfilingCSV); + System.out.println(propPkgName+".enableVerboseJNI{true|false}="+enableVerboseJNI); + System.out.println(propPkgName+".enableVerboseJNIOpenCLResourceTracking{true|false}="+enableVerboseJNIOpenCLResourceTracking); + System.out.println(propPkgName+".enableShowGeneratedOpenCL{true|false}="+enableShowGeneratedOpenCL); + System.out.println(propPkgName+".enableExecutionModeReporting{true|false}="+enableExecutionModeReporting); + System.out.println(propPkgName+".enableInstructionDecodeViewer{true|false}="+enableInstructionDecodeViewer); + System.out.println(propPkgName+".instructionListenerClassName{<class name which extends com.amd.aparapi.Config.InstructionListener>}="+instructionListenerClassName); + + + } } } diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java index fc4bdc9fbc59f3af6390309903e1305c5c4ece1c..75e5a9731f7c2c9e0eafe0cb07eb393c8b5d936c 100644 --- a/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java +++ b/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java @@ -321,6 +321,17 @@ class KernelRunner{ * @author rlamothe */ @UsedByJNICode public static final int ARG_CHAR = 1 << 21; + + /** + * This 'bit' indicates that a particular <code>KernelArg</code> represents a <code>static</code> field (array or primitive). + * + * + * @see com.amd.aparapi.annotations.UsedByJNICode + * @see com.amd.aparapi.KernelRunner.KernelArg + * + * @author gfrost + */ + @UsedByJNICode public static final int ARG_STATIC = 1 << 22; static final String CL_KHR_FP64 = "cl_khr_fp64"; @@ -354,7 +365,7 @@ class KernelRunner{ * * @author gfrost */ - @UsedByJNICode public static final int JNI_FLAG_ENABLE_PROFILING = 1 << 0; + //@UsedByJNICode public static final int JNI_FLAG_ENABLE_PROFILING = 1 << 0; /** * This 'bit' indicates that we wish to store profiling information in a CSV file from JNI code. @@ -364,7 +375,7 @@ class KernelRunner{ * * @author gfrost */ - @UsedByJNICode public static final int JNI_FLAG_ENABLE_PROFILING_CSV = 1 << 1; + // @UsedByJNICode public static final int JNI_FLAG_ENABLE_PROFILING_CSV = 1 << 1; /** * This 'bit' indicates that we want to execute on the GPU. @@ -388,7 +399,7 @@ class KernelRunner{ * @author gfrost */ - @UsedByJNICode public static final int JNI_FLAG_ENABLE_VERBOSE_JNI = 1 << 3; + // @UsedByJNICode public static final int JNI_FLAG_ENABLE_VERBOSE_JNI = 1 << 3; /** * This 'bit' indicates that we wish to enable OpenCL resource tracking by JNI layer to be written to stderr.<br/> @@ -399,7 +410,7 @@ class KernelRunner{ * @author gfrost */ - @UsedByJNICode @Annotations.Experimental public static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4; + // @UsedByJNICode @Annotations.Experimental public static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4; /** @@ -441,11 +452,7 @@ class KernelRunner{ */ @UsedByJNICode public int type; - /** - * True if the field is static - */ - @UsedByJNICode public boolean isStatic; - + /** * Name of the field */ @@ -1391,10 +1398,10 @@ class KernelRunner{ } } - jniFlags |= (Config.enableProfiling ? JNI_FLAG_ENABLE_PROFILING : 0); - jniFlags |= (Config.enableProfilingCSV ? JNI_FLAG_ENABLE_PROFILING_CSV | JNI_FLAG_ENABLE_PROFILING : 0); - jniFlags |= (Config.enableVerboseJNI ? JNI_FLAG_ENABLE_VERBOSE_JNI : 0); - jniFlags |= (Config.enableVerboseJNIOpenCLResourceTracking ? JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING :0); + // jniFlags |= (Config.enableProfiling ? JNI_FLAG_ENABLE_PROFILING : 0); + // jniFlags |= (Config.enableProfilingCSV ? JNI_FLAG_ENABLE_PROFILING_CSV | JNI_FLAG_ENABLE_PROFILING : 0); + // jniFlags |= (Config.enableVerboseJNI ? JNI_FLAG_ENABLE_VERBOSE_JNI : 0); + // jniFlags |= (Config.enableVerboseJNIOpenCLResourceTracking ? JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING :0); // jniFlags |= (kernel.getExecutionMode().equals(EXECUTION_MODE.GPU) ? JNI_FLAG_USE_GPU : 0); // Init the device to check capabilities before emitting the // code that requires the capabilities. @@ -1461,7 +1468,10 @@ class KernelRunner{ args[i] = new KernelArg(); args[i].name = field.getName(); args[i].field = field; - args[i].isStatic = (field.getModifiers() & Modifier.STATIC) == Modifier.STATIC; + if ((field.getModifiers() & Modifier.STATIC)== Modifier.STATIC){ + args[i].type |= ARG_STATIC; + } + Class<?> type = field.getType(); if (type.isArray()) {