diff --git a/com.amd.aparapi.jni/build.xml b/com.amd.aparapi.jni/build.xml
index 32e0f7c8adf52eb98efebc206169dd599b256fa8..04d13122e2652e47f6c3d22f20f9b5ca1bc6d0b7 100644
--- a/com.amd.aparapi.jni/build.xml
+++ b/com.amd.aparapi.jni/build.xml
@@ -475,6 +475,9 @@ First consider editing the properties in build.properties
          <arg value="-o" />
          <arg value="${basedir}/dist/libaparapi_${x86_or_x86_64}.so" />
          <arg value="src/cpp/aparapi.cpp" />
+         <arg value="src/cpp/config.cpp" />
+         <arg value="src/cpp/profileInfo.cpp" />
+         <arg value="src/cpp/arrayBuffer.cpp" />
          <arg value="src/cpp/opencljni.cpp" />
          <arg value="src/cpp/jniHelper.cpp" />
          <arg value="src/cpp/clHelper.cpp" />
@@ -497,6 +500,9 @@ First consider editing the properties in build.properties
          <arg value="-o" />
          <arg value="${basedir}/dist/libaparapi_${x86_or_x86_64}.dylib" />
          <arg value="src/cpp/aparapi.cpp" />
+         <arg value="src/cpp/config.cpp" />
+         <arg value="src/cpp/profileInfo.cpp" />
+         <arg value="src/cpp/arrayBuffer.cpp" />
          <arg value="src/cpp/opencljni.cpp" />
          <arg value="src/cpp/jniHelper.cpp" />
          <arg value="src/cpp/clHelper.cpp" />
@@ -520,6 +526,9 @@ First consider editing the properties in build.properties
          <arg value="/Iinclude" />
          <arg value="/I${amd.app.sdk.dir}\include" />
          <arg value="src\cpp\aparapi.cpp" />
+         <arg value="src/cpp/config.cpp" />
+         <arg value="src/cpp/profileInfo.cpp" />
+         <arg value="src/cpp/arrayBuffer.cpp" />
          <arg value="src\cpp\opencljni.cpp" />
          <arg value="src\cpp\jniHelper.cpp" />
          <arg value="src/cpp/clHelper.cpp" />
@@ -549,6 +558,9 @@ First consider editing the properties in build.properties
          <arg value="-o" />
          <arg value="${basedir}\dist\aparapi_${x86_or_x86_64}.dll" />
          <arg value="src\cpp\aparapi.cpp" />
+         <arg value="src/cpp/config.cpp" />
+         <arg value="src/cpp/profileInfo.cpp" />
+         <arg value="src/cpp/arrayBuffer.cpp" />
          <arg value="src\cpp\opencljni.cpp" />
          <arg value="src/cpp/jniHelper.cpp" />
          <arg value="src/cpp/clHelper.cpp" />
diff --git a/com.amd.aparapi.jni/src/cpp/aparapi.cpp b/com.amd.aparapi.jni/src/cpp/aparapi.cpp
index a84afbab49e5c3a6c039381dee96670ba051d95c..97378246ec3fb201aa04c926d87c55eb0df3f650 100644
--- a/com.amd.aparapi.jni/src/cpp/aparapi.cpp
+++ b/com.amd.aparapi.jni/src/cpp/aparapi.cpp
@@ -36,14 +36,15 @@
    and Security?s website at http://www.bis.doc.gov/. 
    */
 #include "common.h"
-#include "jniHelper.h"
+#include "config.h"
+#include "profileInfo.h"
+#include "arrayBuffer.h"
 #include "clHelper.h"
 #define APARAPI_SOURCE
 #include "aparapi.h"
 #include "com_amd_aparapi_KernelRunner.h"
 #include "opencljni.h"
 
-
 class Range{
    public:
       static jclass rangeClazz;
@@ -129,125 +130,41 @@ jfieldID  Range::localSize_2_FieldID=0;
 jfieldID  Range::dimsFieldID=0;
 jfieldID  Range::localIsDerivedFieldID=0; 
 
-class ProfileInfo{
-   public:
-      jboolean valid;
-      jint type; //0 write, 1 execute, 2 read
-      char *name;
-      cl_ulong queued;
-      cl_ulong submit;
-      cl_ulong start;
-      cl_ulong end;
-};
-
 class JNIContext ; // forward reference
 
-class ArrayBuffer{
-   public:
-      JNIContext *jniContext;
-      jobject javaArray;        // The java array or direct buffer that this arg is mapped to 
-      bool  isArray;            // true if above is an array
-      cl_uint javaArrayLength;  // the number of elements for arrays (used only when ARRAYLENGTH bit is set for this arg)
-      cl_mem mem;               // the opencl buffer 
-      void *addr;               // we use this temporarily whilst we pin the primitive array
-      cl_uint memMask;          // the mask we used for createBuffer
-      jboolean isCopy;
-      jboolean isPinned;
-      char memSpec[128];       // The string form of the mask we used for create buffer. for debugging
-      ProfileInfo read;
-      ProfileInfo write;
-
-      ArrayBuffer(JNIContext *jniContext):
-         jniContext(jniContext){
-      }
-
-      void unpinAbort(JNIEnv *jenv){
-         jenv->ReleasePrimitiveArrayCritical((jarray)javaArray, addr,JNI_ABORT);
-         isPinned = JNI_FALSE;
-      }
-      void unpinCommit(JNIEnv *jenv){
-         jenv->ReleasePrimitiveArrayCritical((jarray)javaArray, addr, 0);
-         isPinned = JNI_FALSE;
-      }
-      void pin(JNIEnv *jenv){
-		   void *ptr = addr;
-         addr = jenv->GetPrimitiveArrayCritical((jarray)javaArray,&isCopy);
-         isPinned = JNI_TRUE;
-      }
-};
-
-
 class KernelArg{
    private:
       static jclass argClazz;
       static jfieldID nameFieldID;
       static jfieldID typeFieldID; 
-      static jfieldID isStaticFieldID; 
       static jfieldID sizeInBytesFieldID;
-      static jfieldID numElementsFieldID; 
+      static jfieldID numElementsFieldID;
    public:
-      JNIContext *jniContext;
       static jfieldID javaArrayFieldID; 
-      jobject argObj;
+   public:
+      JNIContext *jniContext;  
+      jobject argObj;    // the Java KernelRunner.KernelArg object that we are mirroring.
+      jobject javaArg;   // global reference to the corresponding java KernelArg object we grabbed our own global reference so that the object won't be collected until we dispose!
       char *name;        // used for debugging printfs
-      jfieldID fieldID;  // The field that this arg represents in the kernel (java), used only for primitive updates
       jint type;         // a bit mask determining the type of this arg
-      jboolean isStatic; // A flag indicating if the value is static
-      jint sizeInBytes;  // bytes in the array or directBuf
-      jobject javaArg;   // global reference to the corresponding java KernelArg object
-      union{
-         cl_char c;
-         cl_double d;
-         cl_float f;
-         cl_int i;
-         cl_long j;
-         ArrayBuffer *arrayBuffer;
-      } value;
-
-      KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj):
-         jniContext(jniContext),
-         argObj(argObj){
-            javaArg = jenv->NewGlobalRef(argObj);   // save a global ref to the java Arg Object
-            if (argClazz == 0){
-               jclass c = jenv->GetObjectClass(argObj); 
-               nameFieldID = jenv->GetFieldID(c, "name", "Ljava/lang/String;"); ASSERT_FIELD(name);
-               typeFieldID = jenv->GetFieldID(c, "type", "I"); ASSERT_FIELD(type);
-               isStaticFieldID = jenv->GetFieldID(c, "isStatic", "Z"); ASSERT_FIELD(isStatic);
-               javaArrayFieldID = jenv->GetFieldID(c, "javaArray", "Ljava/lang/Object;"); ASSERT_FIELD(javaArray);
-               sizeInBytesFieldID = jenv->GetFieldID(c, "sizeInBytes", "I"); ASSERT_FIELD(sizeInBytes);
-               numElementsFieldID = jenv->GetFieldID(c, "numElements", "I"); ASSERT_FIELD(numElements);
-               argClazz  = c;
-            }
-            type = jenv->GetIntField(argObj, typeFieldID);
-            isStatic = jenv->GetBooleanField(argObj, isStaticFieldID);
-            jstring nameString  = (jstring)jenv->GetObjectField(argObj, nameFieldID);
-            const char *nameChars = jenv->GetStringUTFChars(nameString, NULL);
-#ifdef _WIN32
-            name=_strdup(nameChars);
-#else
-            name=strdup(nameChars);
-#endif
-            jenv->ReleaseStringUTFChars(nameString, nameChars);
-            if (isArray()){
-               value.arrayBuffer= new ArrayBuffer(jniContext);
-            }else{
-               value.arrayBuffer=NULL;
-            }
-         }
+
+      ArrayBuffer *arrayBuffer;
+
+      KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj); // Uses JNIContext so cant inline here see below
 
       ~KernelArg(){
       }
 
       void unpinAbort(JNIEnv *jenv){
-         value.arrayBuffer->unpinAbort(jenv);
+         arrayBuffer->unpinAbort(jenv);
       }
       void unpinCommit(JNIEnv *jenv){
-         value.arrayBuffer->unpinCommit(jenv);
+         arrayBuffer->unpinCommit(jenv);
       }
       void unpin(JNIEnv *jenv){
-		 //if  (value.ref.isPinned == JNI_FALSE){		 
-       //     fprintf(stdout, "why are we unpinning buffer %s! isPinned = JNI_TRUE\n", name);
-		 //}
+         //if  (value.ref.isPinned == JNI_FALSE){		 
+         //     fprintf(stdout, "why are we unpinning buffer %s! isPinned = JNI_TRUE\n", name);
+         //}
          if (isMutableByKernel()){
             // we only need to commit if the buffer has been written to
             // we use mode=0 in that case (rather than JNI_COMMIT) because that frees any copy buffer if it exists
@@ -259,7 +176,7 @@ class KernelArg{
          }
       }
       void pin(JNIEnv *jenv){
-         value.arrayBuffer->pin(jenv);
+         arrayBuffer->pin(jenv);
       }
 
       int isArray(){
@@ -313,15 +230,15 @@ class KernelArg{
       int isLocal(){
          return (type&com_amd_aparapi_KernelRunner_ARG_LOCAL);
       }
+      int isStatic(){
+         return (type&com_amd_aparapi_KernelRunner_ARG_STATIC);
+      }
       int isConstant(){
          return (type&com_amd_aparapi_KernelRunner_ARG_CONSTANT);
       }
       int isAparapiBuf(){
          return (type&com_amd_aparapi_KernelRunner_ARG_APARAPI_BUF);
       }
-      //int isAparapiBufHasArray(){
-      //   return (type&com_amd_aparapi_KernelRunner_ARG_APARAPI_BUF_HAS_ARRAY);
-      //}
       int isBackedByArray(){
          return ( (isArray() && (isGlobal() || isConstant())));
       }
@@ -329,111 +246,42 @@ class KernelArg{
          return(((isArray() && isGlobal()) || ((isAparapiBuf()&&isGlobal()))) && (isImplicit()&&isMutableByKernel()));
       }
       int needToEnqueueWrite(){
-		//  if (isExplicitWrite()){
-		//	  fprintf(stderr, "%s isExplicitWrite()\n", name);
-		//  }
-		 //  if (isExplicit()){
-			//  fprintf(stderr, "%s isExplicit()\n", name);
-		//  }
-		//  fprintf(stderr, "%s neetToEnqueueWrite = %d\n", name, returnValue);
          return ((isImplicit()&&isReadByKernel())||(isExplicit()&&isExplicitWrite()));
       }
       void syncType(JNIEnv* jenv){
          type = jenv->GetIntField(javaArg, typeFieldID);
       }
       void syncSizeInBytes(JNIEnv* jenv){
-         sizeInBytes = jenv->GetIntField(javaArg, sizeInBytesFieldID);
+         arrayBuffer->lengthInBytes = jenv->GetIntField(javaArg, sizeInBytesFieldID);
       }
       void syncJavaArrayLength(JNIEnv* jenv){
-         value.arrayBuffer->javaArrayLength = jenv->GetIntField(javaArg, numElementsFieldID);
+         arrayBuffer->length = jenv->GetIntField(javaArg, numElementsFieldID);
       }
       void clearExplicitBufferBit(JNIEnv* jenv){
          type &= ~com_amd_aparapi_KernelRunner_ARG_EXPLICIT_WRITE;
          jenv->SetIntField(javaArg, typeFieldID,type );
       }
+
+      void syncValue(JNIEnv *jenv); // Uses JNIContext so can't inline here we below.  
+      cl_int setLocalBufferArg(JNIEnv *jenv, int argIdx, int argPos); // Uses JNIContext so can't inline here we below.  
+      cl_int setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos ); // Uses JNIContext so can't inline here we below.  
 };
 
 jclass KernelArg::argClazz=(jclass)0;
 jfieldID KernelArg::nameFieldID=0;
 jfieldID KernelArg::typeFieldID=0; 
-jfieldID KernelArg::isStaticFieldID=0; 
 jfieldID KernelArg::javaArrayFieldID=0; 
 jfieldID KernelArg::sizeInBytesFieldID=0;
 jfieldID KernelArg::numElementsFieldID=0; 
 
-template <class T> class List; // forward
-
-template <class T> class Handle{
-   private:
-       T value;
-       int line;
-       Handle<T> *next;
-       friend class List<T>;
-   public:
-       Handle(T _value, int _line): value(_value), line(_line),next(NULL){
-       }
-};
-
-template <class T> class List{
-   private:
-       char *name;
-       Handle<T> *head;
-       int count;
-   public:
-       List(char *_name): head(NULL), count(0), name(_name){
-       }
-       void add(T _value, int _line){
-          Handle<T> *handle = new Handle<T>(_value, _line);
-          handle->next = head; 
-          head = handle;
-          count++;
-          //fprintf(stdout, "LINE %d added %s %0lx\n", _line, name, _value);
-       }
-       void remove(T _value, int _line){
-          for (Handle<T> *ptr = head, *last=NULL; ptr != NULL; last=ptr, ptr = ptr->next){
-             if (ptr->value == _value){
-                if (last == NULL){ // head 
-                   head = ptr->next;
-                }else{ // !head
-                   last->next = ptr->next;
-                }
-                delete ptr;
-                count--;
-                //fprintf(stdout, "LINE %d removed %s %0lx\n", _line, name, _value);
-                return;
-             }
-          }
-          fprintf(stderr, "LINE %d failed to find %s to remove %0lx\n", _line, name, _value);
-       }
-       void report(FILE *stream){
-          if (head != NULL){
-             fprintf(stream, "Resource report %d resources of type %s still in play ", count, name);
-             for (Handle<T> *ptr = head; ptr != NULL; ptr = ptr->next){
-                fprintf(stream, " %0lx(%d)", ptr->value, ptr->line);
-             }
-             fprintf(stream, "\n");
-          }
-       }
-};
-
-List<cl_command_queue> commandQueueList("cl_command_queue");
-List<cl_mem> memList("cl_mem");
-List<cl_event> readEventList("cl_event (read)");
-List<cl_event> executeEventList("cl_event (exec)");
-List<cl_event> writeEventList("cl_event (write)");
-
 class JNIContext{
    private: 
       jint flags;
       jboolean valid;
-      //cl_platform_id platform;
-      //cl_platform_id* platforms;
-      //cl_uint platformc;
    public:
       jobject kernelObject;
       jobject openCLDeviceObject;
       jclass kernelClass;
-      //cl_uint deviceIdc;
       cl_device_id deviceId;
       cl_int deviceType;
       cl_context context;
@@ -452,13 +300,6 @@ class JNIContext{
       jint passes;
       ProfileInfo *exec;
       FILE* profileFile;
-      // these map to camelCase form of CL_DEVICE_XXX_XXX  For example CL_DEVICE_MAX_COMPUTE_UNITS == maxComputeUnits
-     // cl_uint maxComputeUnits;
-     // cl_uint maxWorkItemDimensions;
-     // size_t *maxWorkItemSizes;
-     // size_t maxWorkGroupSize;
-     // cl_ulong globalMemSize;
-     // cl_ulong localMemSize;
 
       static JNIContext* getJNIContext(jlong jniContextHandle){
          return((JNIContext*)jniContextHandle);
@@ -469,7 +310,6 @@ class JNIContext{
          kernelClass((jclass)jenv->NewGlobalRef(jenv->GetObjectClass(_kernelObject))), 
          openCLDeviceObject(jenv->NewGlobalRef(_openCLDeviceObject)),
          flags(_flags),
-       //  platform(NULL),
          profileBaseTime(0),
          passes(0),
          exec(NULL),
@@ -477,164 +317,297 @@ class JNIContext{
          profileFile(NULL), 
          valid(JNI_FALSE){
             cl_int status = CL_SUCCESS;
-            // The device is passed to us.  So just extract the device Id, platform Id etc and create a context.
-            //
             jobject platformInstance = OpenCLDevice::getPlatformInstance(jenv, openCLDeviceObject);
             cl_platform_id platformId = OpenCLPlatform::getPlatformId(jenv, platformInstance);
             deviceId = OpenCLDevice::getDeviceId(jenv, openCLDeviceObject);
             cl_device_type returnedDeviceType;
             clGetDeviceInfo(deviceId, CL_DEVICE_TYPE,  sizeof(returnedDeviceType), &returnedDeviceType, NULL);
-      //fprintf(stderr, "device[%d] CL_DEVICE_TYPE = %x\n", deviceId, returnedDeviceType);
+            //fprintf(stderr, "device[%d] CL_DEVICE_TYPE = %x\n", deviceId, returnedDeviceType);
 
 
-      cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformId, 0 };
-      cl_context_properties* cprops = (NULL == platformId) ? NULL : cps;
-      context = clCreateContextFromType( cprops, returnedDeviceType, NULL, NULL, &status); 
+            cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platformId, 0 };
+            cl_context_properties* cprops = (NULL == platformId) ? NULL : cps;
+            context = clCreateContextFromType( cprops, returnedDeviceType, NULL, NULL, &status); 
             ASSERT_CL_NO_RETURN("clCreateContextFromType()");
             if (status == CL_SUCCESS){
                valid = JNI_TRUE;
             }
          }
 
-jboolean isValid(){
-   return(valid);
-}
-jboolean isProfilingCSVEnabled(){
-   return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_PROFILING_CSV)==com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_PROFILING_CSV?JNI_TRUE:JNI_FALSE);
-}
-jboolean isTrackingOpenCLResources(){
-   return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING)==com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING?JNI_TRUE:JNI_FALSE);
-}
-jboolean isProfilingEnabled(){
-   return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_PROFILING)==com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_PROFILING?JNI_TRUE:JNI_FALSE);
-}
-jboolean isUsingGPU(){
-   return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_USE_GPU)==com_amd_aparapi_KernelRunner_JNI_FLAG_USE_GPU?JNI_TRUE:JNI_FALSE);
-}
-jboolean isVerbose(){
-   return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_VERBOSE_JNI)==com_amd_aparapi_KernelRunner_JNI_FLAG_ENABLE_VERBOSE_JNI?JNI_TRUE:JNI_FALSE);
-}
-
-~JNIContext(){
-}
+      jboolean isValid(){
+         return(valid);
+      }
+      jboolean isUsingGPU(){
+         return((flags&com_amd_aparapi_KernelRunner_JNI_FLAG_USE_GPU)==com_amd_aparapi_KernelRunner_JNI_FLAG_USE_GPU?JNI_TRUE:JNI_FALSE);
+      }
+      ~JNIContext(){
+      }
 
-void dispose(JNIEnv *jenv){
-   //fprintf(stdout, "dispose()\n");
-   cl_int status = CL_SUCCESS;
-   jenv->DeleteGlobalRef(kernelObject);
-   jenv->DeleteGlobalRef(kernelClass);
-   if (context != 0){
-      status = clReleaseContext(context);
-      //fprintf(stdout, "dispose context %0lx\n", context);
-      ASSERT_CL_NO_RETURN("clReleaseContext()");
-      context = (cl_context)0;
-   }
-   if (commandQueue != 0){
-      if (isTrackingOpenCLResources()){
-          commandQueueList.remove((cl_command_queue)commandQueue, __LINE__);
-      }
-      status = clReleaseCommandQueue((cl_command_queue)commandQueue);
-      //fprintf(stdout, "dispose commandQueue %0lx\n", commandQueue);
-      ASSERT_CL_NO_RETURN("clReleaseCommandQueue()");
-      commandQueue = (cl_command_queue)0;
-   }
-   if (program != 0){
-      status = clReleaseProgram((cl_program)program);
-      //fprintf(stdout, "dispose program %0lx\n", program);
-      ASSERT_CL_NO_RETURN("clReleaseProgram()");
-      program = (cl_program)0;
-   }
-   if (kernel != 0){
-      status = clReleaseKernel((cl_kernel)kernel);
-      //fprintf(stdout, "dispose kernel %0lx\n", kernel);
-      ASSERT_CL_NO_RETURN("clReleaseKernel()");
-      kernel = (cl_kernel)0;
-   }
-   if (argc> 0){
-      for (int i=0; i< argc; i++){
-         KernelArg *arg = args[i];
-         if (!arg->isPrimitive()){
-            if (arg->value.arrayBuffer != NULL){
-               if (arg->value.arrayBuffer->mem != 0){
-                  if (isTrackingOpenCLResources()){
-                     memList.remove((cl_mem)arg->value.arrayBuffer->mem, __LINE__);
+      void dispose(JNIEnv *jenv){
+         //fprintf(stdout, "dispose()\n");
+         cl_int status = CL_SUCCESS;
+         jenv->DeleteGlobalRef(kernelObject);
+         jenv->DeleteGlobalRef(kernelClass);
+         if (context != 0){
+            status = clReleaseContext(context);
+            //fprintf(stdout, "dispose context %0lx\n", context);
+            ASSERT_CL_NO_RETURN("clReleaseContext()");
+            context = (cl_context)0;
+         }
+         if (commandQueue != 0){
+            if (config->isTrackingOpenCLResources()){
+               commandQueueList.remove((cl_command_queue)commandQueue, __LINE__, __FILE__);
+            }
+            status = clReleaseCommandQueue((cl_command_queue)commandQueue);
+            //fprintf(stdout, "dispose commandQueue %0lx\n", commandQueue);
+            ASSERT_CL_NO_RETURN("clReleaseCommandQueue()");
+            commandQueue = (cl_command_queue)0;
+         }
+         if (program != 0){
+            status = clReleaseProgram((cl_program)program);
+            //fprintf(stdout, "dispose program %0lx\n", program);
+            ASSERT_CL_NO_RETURN("clReleaseProgram()");
+            program = (cl_program)0;
+         }
+         if (kernel != 0){
+            status = clReleaseKernel((cl_kernel)kernel);
+            //fprintf(stdout, "dispose kernel %0lx\n", kernel);
+            ASSERT_CL_NO_RETURN("clReleaseKernel()");
+            kernel = (cl_kernel)0;
+         }
+         if (argc> 0){
+            for (int i=0; i< argc; i++){
+               KernelArg *arg = args[i];
+               if (!arg->isPrimitive()){
+                  if (arg->arrayBuffer != NULL){
+                     if (arg->arrayBuffer->mem != 0){
+                        if (config->isTrackingOpenCLResources()){
+                           memList.remove((cl_mem)arg->arrayBuffer->mem, __LINE__, __FILE__);
+                        }
+                        status = clReleaseMemObject((cl_mem)arg->arrayBuffer->mem);
+                        //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->arrayBuffer->mem);
+                        ASSERT_CL_NO_RETURN("clReleaseMemObject()");
+                        arg->arrayBuffer->mem = (cl_mem)0;
+                     }
+                     if (arg->arrayBuffer->javaArray != NULL)  {
+                        jenv->DeleteWeakGlobalRef((jweak) arg->arrayBuffer->javaArray);
+                     }
+                     delete arg->arrayBuffer;
+                     arg->arrayBuffer = NULL;
                   }
-                  status = clReleaseMemObject((cl_mem)arg->value.arrayBuffer->mem);
-                  //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->value.arrayBuffer->mem);
-                  ASSERT_CL_NO_RETURN("clReleaseMemObject()");
-                  arg->value.arrayBuffer->mem = (cl_mem)0;
                }
-               if (arg->value.arrayBuffer->javaArray != NULL)  {
-                  jenv->DeleteWeakGlobalRef((jweak) arg->value.arrayBuffer->javaArray);
+               if (arg->name != NULL){
+                  free(arg->name); arg->name = NULL;
                }
-               delete arg->value.arrayBuffer;
-               arg->value.arrayBuffer = NULL;
+               if (arg->javaArg != NULL ) {
+                  jenv->DeleteGlobalRef((jobject) arg->javaArg);
+               }
+               delete arg; arg=args[i]=NULL;
             }
+            delete[] args; args=NULL;
+
+            // do we need to call clReleaseEvent on any of these that are still retained....
+            delete []readEvents; readEvents =NULL;
+            delete []writeEvents; writeEvents = NULL;
+            delete []executeEvents; executeEvents = NULL;
+
+            if (config->isProfilingEnabled()) {
+               if (config->isProfilingCSVEnabled()) {
+                  if (profileFile != NULL && profileFile != stderr) {
+                     fclose(profileFile);
+                  }
+               }
+               delete[] readEventArgs; readEventArgs=0;
+               delete[] writeEventArgs; writeEventArgs=0;
+            } 
          }
-         if (arg->name != NULL){
-            free(arg->name); arg->name = NULL;
-         }
-         if (arg->javaArg != NULL ) {
-            jenv->DeleteGlobalRef((jobject) arg->javaArg);
+         if (config->isTrackingOpenCLResources()){
+            fprintf(stderr, "after dispose{ \n");
+            commandQueueList.report(stderr);
+            memList.report(stderr); 
+            readEventList.report(stderr); 
+            executeEventList.report(stderr); 
+            writeEventList.report(stderr); 
+            fprintf(stderr, "}\n");
          }
-         delete arg; arg=args[i]=NULL;
       }
-      delete[] args; args=NULL;
-
-      // do we need to call clReleaseEvent on any of these that are still retained....
-      delete []readEvents; readEvents =NULL;
-      delete []writeEvents; writeEvents = NULL;
-      delete []executeEvents; executeEvents = NULL;
 
-      if (isProfilingEnabled()) {
-         if (isProfilingCSVEnabled()) {
-            if (profileFile != NULL && profileFile != stderr) {
-               fclose(profileFile);
+      /*
+         Release JNI critical pinned arrays before returning to java code
+         */
+      void unpinAll(JNIEnv* jenv) {
+         for (int i=0; i< argc; i++){
+            KernelArg *arg = args[i];
+            if (arg->isBackedByArray()) {
+               arg->unpin(jenv);
             }
          }
-         delete[] readEventArgs; readEventArgs=0;
-         delete[] writeEventArgs; writeEventArgs=0;
-      } 
-   }
-   if (isTrackingOpenCLResources()){
-   fprintf(stderr, "after dispose{ \n");
-   commandQueueList.report(stderr);
-   memList.report(stderr); 
-   readEventList.report(stderr); 
-   executeEventList.report(stderr); 
-   writeEventList.report(stderr); 
-   fprintf(stderr, "}\n");
-   }
-}
-
-/*
-   Release JNI critical pinned arrays before returning to java code
-   */
-void unpinAll(JNIEnv* jenv) {
-   for (int i=0; i< argc; i++){
-      KernelArg *arg = args[i];
-      if (arg->isBackedByArray()) {
-         arg->unpin(jenv);
       }
-   }
-}
 
 
 };
 
+KernelArg::KernelArg(JNIEnv *jenv, JNIContext *jniContext, jobject argObj):
+   jniContext(jniContext),
+   argObj(argObj){
+      javaArg = jenv->NewGlobalRef(argObj);   // save a global ref to the java Arg Object
+      if (argClazz == 0){
+         jclass c = jenv->GetObjectClass(argObj); 
+         nameFieldID = jenv->GetFieldID(c, "name", "Ljava/lang/String;"); ASSERT_FIELD(name);
+         typeFieldID = jenv->GetFieldID(c, "type", "I"); ASSERT_FIELD(type);
+         javaArrayFieldID = jenv->GetFieldID(c, "javaArray", "Ljava/lang/Object;"); ASSERT_FIELD(javaArray);
+         sizeInBytesFieldID = jenv->GetFieldID(c, "sizeInBytes", "I"); ASSERT_FIELD(sizeInBytes);
+         numElementsFieldID = jenv->GetFieldID(c, "numElements", "I"); ASSERT_FIELD(numElements);
+         argClazz  = c;
+      }
+      type = jenv->GetIntField(argObj, typeFieldID);
+      jstring nameString  = (jstring)jenv->GetObjectField(argObj, nameFieldID);
+      const char *nameChars = jenv->GetStringUTFChars(nameString, NULL);
+#ifdef _WIN32
+      name=_strdup(nameChars);
+#else
+      name=strdup(nameChars);
+#endif
+      jenv->ReleaseStringUTFChars(nameString, nameChars);
+      if (isArray()){
+         arrayBuffer= new ArrayBuffer();
+      }
+   }
 
+cl_int KernelArg::setLocalBufferArg(JNIEnv *jenv, int argIdx, int argPos){
+   if (config->isVerbose()){
+       fprintf(stderr, "ISLOCAL, clSetKernelArg(jniContext->kernel, %d, %d, NULL);\n", argIdx, (int) arrayBuffer->lengthInBytes);
+   }
+   return(clSetKernelArg(jniContext->kernel, argPos, (int)arrayBuffer->lengthInBytes, NULL));
+}
+cl_int KernelArg::setPrimitiveArg(JNIEnv *jenv, int argIdx, int argPos){
+   cl_int status = CL_SUCCESS;
+   if (isFloat()){
+      if (isStatic()){
+         jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "F");
+         jfloat f = jenv->GetStaticFloatField(jniContext->kernelClass, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg static primitive float '%s' index=%d pos=%d value=%f\n",
+                 name, argIdx, argPos, f); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jfloat), &f);
+      }else{
+         jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "F");
+         jfloat f = jenv->GetFloatField(jniContext->kernelObject, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg primitive float '%s' index=%d pos=%d value=%f\n",
+                 name, argIdx, argPos, f); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jfloat), &f);
+      }
+   }else if (isInt()){
+      if (isStatic()){
+         jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "I");
+         jint i = jenv->GetStaticIntField(jniContext->kernelClass, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg static primitive int '%s' index=%d pos=%d value=%d\n",
+                 name, argIdx, argPos, i); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &i);
+      }else{
+         jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "I");
+         jint i = jenv->GetIntField(jniContext->kernelObject, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg primitive int '%s' index=%d pos=%d value=%d\n",
+                 name, argIdx, argPos, i); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &i);
+      }
+   }else if (isBoolean()){
+      if (isStatic()){
+         jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "Z");
+         jboolean z = jenv->GetStaticBooleanField(jniContext->kernelClass, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg static primitive boolean '%s' index=%d pos=%d value=%d\n",
+                 name, argIdx, argPos, z); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jboolean), &z);
+      }else{
+         jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "Z");
+         jboolean z = jenv->GetBooleanField(jniContext->kernelObject, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg primitive boolean '%s' index=%d pos=%d value=%d\n",
+                 name, argIdx, argPos, z); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jboolean), &z);
+      }
+   }else if (isByte()){
+      if (isStatic()){
+         jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "B");
+         jbyte b = jenv->GetStaticByteField(jniContext->kernelClass, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg static primitive byte '%s' index=%d pos=%d value=%d\n",
+                 name, argIdx, argPos, b); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jbyte), &b);
+      }else{
+         jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "B");
+         jbyte b = jenv->GetByteField(jniContext->kernelObject, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg primitive byte '%s' index=%d pos=%d value=%d\n",
+                 name, argIdx, argPos, b); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jbyte), &b);
+      }
+   }else if (isLong()){
+      if (isStatic()){
+         jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "J");
+         jlong j = jenv->GetStaticLongField(jniContext->kernelClass, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg static primitive long '%s' index=%d pos=%d value=%ld\n",
+                 name, argIdx, argPos, j); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jlong), &j);
+      }else{
+         jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "J");
+         jlong j = jenv->GetLongField(jniContext->kernelObject, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg primitive long '%s' index=%d pos=%d value=%ld\n",
+                 name, argIdx, argPos, j); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jlong), &j);
+      }
+   }else if (isDouble()){
+      if (isStatic()){
+         jfieldID fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, name, "D");
+         jdouble d  = jenv->GetStaticDoubleField(jniContext->kernelClass, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg static primitive long '%s' index=%d pos=%d value=%lf\n",
+                 name, argIdx, argPos, d); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jdouble), &d);
+      }else{
+         jfieldID fieldID = jenv->GetFieldID(jniContext->kernelClass, name, "D");
+         jdouble d = jenv->GetDoubleField(jniContext->kernelObject, fieldID);
+         if (config->isVerbose()){
+            fprintf(stderr, "clSetKernelArg primitive long '%s' index=%d pos=%d value=%lf\n",
+                 name, argIdx, argPos, d); 
+         }
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jdouble), &d);
+      }
+   }
+   return status;
+}
 
 JNI_JAVA(jint, KernelRunner, disposeJNI)
    (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) {
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   if (jniContext != NULL){
-      jniContext->dispose(jenv);
-      delete jniContext;
-      jniContext = NULL;
+      if (config== NULL){
+         config = new Config(jenv);
+      }
+      cl_int status = CL_SUCCESS;
+      JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
+      if (jniContext != NULL){
+         jniContext->dispose(jenv);
+         delete jniContext;
+         jniContext = NULL;
+      }
+      return(status);
    }
-   return(status);
-}
 
 void idump(char *str, void *ptr, int size){
    int * iptr = (int *)ptr;
@@ -666,15 +639,15 @@ jint writeProfileInfo(JNIContext* jniContext){
 
          // Initialize the base time for this sample
          if (currSampleBaseTime == -1) {
-            currSampleBaseTime = arg->value.arrayBuffer->write.queued;
+            currSampleBaseTime = arg->arrayBuffer->write.queued;
          } 
          fprintf(jniContext->profileFile, "%d write %s,", pos++, arg->name);
 
          fprintf(jniContext->profileFile, "%lu,%lu,%lu,%lu,",  
-               (arg->value.arrayBuffer->write.queued - currSampleBaseTime)/1000, 
-               (arg->value.arrayBuffer->write.submit - currSampleBaseTime)/1000, 
-               (arg->value.arrayBuffer->write.start - currSampleBaseTime)/1000, 
-               (arg->value.arrayBuffer->write.end - currSampleBaseTime)/1000);
+               (arg->arrayBuffer->write.queued - currSampleBaseTime)/1000, 
+               (arg->arrayBuffer->write.submit - currSampleBaseTime)/1000, 
+               (arg->arrayBuffer->write.start - currSampleBaseTime)/1000, 
+               (arg->arrayBuffer->write.end - currSampleBaseTime)/1000);
       }
    }
 
@@ -705,16 +678,16 @@ jint writeProfileInfo(JNIContext* jniContext){
 
             // Initialize the base time for this sample
             if (currSampleBaseTime == -1) {
-               currSampleBaseTime = arg->value.arrayBuffer->read.queued;
+               currSampleBaseTime = arg->arrayBuffer->read.queued;
             }
 
             fprintf(jniContext->profileFile, "%d read %s,", pos++, arg->name);
 
             fprintf(jniContext->profileFile, "%lu,%lu,%lu,%lu,",  
-                  (arg->value.arrayBuffer->read.queued - currSampleBaseTime)/1000, 
-                  (arg->value.arrayBuffer->read.submit - currSampleBaseTime)/1000, 
-                  (arg->value.arrayBuffer->read.start - currSampleBaseTime)/1000, 
-                  (arg->value.arrayBuffer->read.end - currSampleBaseTime)/1000);
+                  (arg->arrayBuffer->read.queued - currSampleBaseTime)/1000, 
+                  (arg->arrayBuffer->read.submit - currSampleBaseTime)/1000, 
+                  (arg->arrayBuffer->read.start - currSampleBaseTime)/1000, 
+                  (arg->arrayBuffer->read.end - currSampleBaseTime)/1000);
          }
       }
    }
@@ -755,60 +728,59 @@ jint updateNonPrimitiveReferences(JNIEnv *jenv, jobject jobj, JNIContext* jniCon
          KernelArg *arg=jniContext->args[i];
          arg->syncType(jenv); // make sure that the JNI arg reflects the latest type info from the instance.  For example if the buffer is tagged as explicit and needs to be pushed
 
-         if (jniContext->isVerbose()){
+         if (config->isVerbose()){
             fprintf(stderr, "got type for %s: %08x\n", arg->name, arg->type);
          }
          if (!arg->isPrimitive()) {
             // Following used for all primitive arrays, object arrays and nio Buffers
             jarray newRef = (jarray)jenv->GetObjectField(arg->javaArg, KernelArg::javaArrayFieldID);
-            if (jniContext->isVerbose()){
-               fprintf(stderr, "testing for Resync javaArray %s: old=%p, new=%p\n", arg->name, arg->value.arrayBuffer->javaArray, newRef);         
+            if (config->isVerbose()){
+               fprintf(stderr, "testing for Resync javaArray %s: old=%p, new=%p\n", arg->name, arg->arrayBuffer->javaArray, newRef);         
             }
 
-            if (!jenv->IsSameObject(newRef, arg->value.arrayBuffer->javaArray)) {
-               if (jniContext->isVerbose()){
-                  fprintf(stderr, "Resync javaArray for %s: %p  %p\n", arg->name, newRef, arg->value.arrayBuffer->javaArray);         
+            if (!jenv->IsSameObject(newRef, arg->arrayBuffer->javaArray)) {
+               if (config->isVerbose()){
+                  fprintf(stderr, "Resync javaArray for %s: %p  %p\n", arg->name, newRef, arg->arrayBuffer->javaArray);         
                }
                // Free previous ref if any
-               if (arg->value.arrayBuffer->javaArray != NULL) {
-                  jenv->DeleteWeakGlobalRef((jweak) arg->value.arrayBuffer->javaArray);
-                  if (jniContext->isVerbose()){
-                     fprintf(stderr, "DeleteWeakGlobalRef for %s: %p\n", arg->name, arg->value.arrayBuffer->javaArray);         
+               if (arg->arrayBuffer->javaArray != NULL) {
+                  jenv->DeleteWeakGlobalRef((jweak) arg->arrayBuffer->javaArray);
+                  if (config->isVerbose()){
+                     fprintf(stderr, "DeleteWeakGlobalRef for %s: %p\n", arg->name, arg->arrayBuffer->javaArray);         
                   }
                }
 
                // need to free opencl buffers, run will reallocate later
-               if (arg->value.arrayBuffer->mem != 0) {
+               if (arg->arrayBuffer->mem != 0) {
                   //fprintf(stderr, "-->releaseMemObject[%d]\n", i);
-                  if (jniContext->isTrackingOpenCLResources()){
-                     memList.remove(arg->value.arrayBuffer->mem,__LINE__);
+                  if (config->isTrackingOpenCLResources()){
+                     memList.remove(arg->arrayBuffer->mem,__LINE__, __FILE__);
                   }
-                  status = clReleaseMemObject((cl_mem)arg->value.arrayBuffer->mem);
+                  status = clReleaseMemObject((cl_mem)arg->arrayBuffer->mem);
                   //fprintf(stderr, "<--releaseMemObject[%d]\n", i);
                   ASSERT_CL("clReleaseMemObject()");
-                  arg->value.arrayBuffer->mem = (cl_mem)0;
+                  arg->arrayBuffer->mem = (cl_mem)0;
                }
 
-               arg->value.arrayBuffer->addr = NULL;
+               arg->arrayBuffer->addr = NULL;
 
                // Capture new array ref from the kernel arg object
 
                if (newRef != NULL) {
-                  arg->value.arrayBuffer->javaArray = (jarray)jenv->NewWeakGlobalRef((jarray)newRef);
-                  if (jniContext->isVerbose()){
+                  arg->arrayBuffer->javaArray = (jarray)jenv->NewWeakGlobalRef((jarray)newRef);
+                  if (config->isVerbose()){
                      fprintf(stderr, "NewWeakGlobalRef for %s, set to %p\n", arg->name,
-                           arg->value.arrayBuffer->javaArray);         
+                           arg->arrayBuffer->javaArray);         
                   }
                } else {
-                  arg->value.arrayBuffer->javaArray = NULL;
+                  arg->arrayBuffer->javaArray = NULL;
                }
-               arg->value.arrayBuffer->isArray = true;
 
-               // Save the sizeInBytes which was set on the java side
+               // Save the lengthInBytes which was set on the java side
                arg->syncSizeInBytes(jenv);
 
-               if (jniContext->isVerbose()){
-                  fprintf(stderr, "updateNonPrimitiveReferences, args[%d].sizeInBytes=%d\n", i, arg->sizeInBytes);
+               if (config->isVerbose()){
+                  fprintf(stderr, "updateNonPrimitiveReferences, args[%d].lengthInBytes=%d\n", i, arg->arrayBuffer->lengthInBytes);
                }
             } // object has changed
          }
@@ -821,14 +793,17 @@ jint updateNonPrimitiveReferences(JNIEnv *jenv, jobject jobj, JNIContext* jniCon
 
 JNI_JAVA(jint, KernelRunner, runKernelJNI)
    (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jobject _range, jboolean needSync, jint passes) {
+      if (config== NULL){
+         config = new Config(jenv);
+      }
 
-   Range range(jenv, _range);
+      Range range(jenv, _range);
 
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
+      cl_int status = CL_SUCCESS;
+      JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
 
 
-    if (jniContext->firstRun && jniContext->isProfilingEnabled()){
+      if (jniContext->firstRun && config->isProfilingEnabled()){
          cl_event firstEvent;
 #ifdef CL_VERSION_1_2
          status = clEnqueueMarkerWithWaitList(jniContext->commandQueue, 0, NULL, &firstEvent);
@@ -847,7 +822,7 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI)
          }
          status = clGetEventProfilingInfo(firstEvent, CL_PROFILING_COMMAND_QUEUED, sizeof(jniContext->profileBaseTime), &(jniContext->profileBaseTime), NULL);
          if (status != CL_SUCCESS) {
-			 PRINT_CL_ERR(status, "clGetEventProfilingInfo#1");
+            PRINT_CL_ERR(status, "clGetEventProfilingInfo#1");
             return 0L;
          }
          clReleaseEvent(firstEvent);
@@ -855,282 +830,237 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI)
             PRINT_CL_ERR(status, "clReleaseEvent() read event");
             return 0L;
          }
-		 if (jniContext->isVerbose()){
+         if (config->isVerbose()){
             fprintf(stderr, "profileBaseTime %lu \n", jniContext->profileBaseTime);
-		 }
+         }
       }
 
 
-   // Need to capture array refs
-   if (jniContext->firstRun || needSync) {
-      updateNonPrimitiveReferences(jenv, jobj, jniContext );
-      if (jniContext->isVerbose()){
-         fprintf(stderr, "back from updateNonPrimitiveReferences\n");
+      // Need to capture array refs
+      if (jniContext->firstRun || needSync) {
+         updateNonPrimitiveReferences(jenv, jobj, jniContext );
+         if (config->isVerbose()){
+            fprintf(stderr, "back from updateNonPrimitiveReferences\n");
+         }
       }
-   }
 
-   int writeEventCount = 0;
+      int writeEventCount = 0;
 
-   // kernelArgPos is used to keep track of the kernel arg position, it can 
-   // differ from "i" due to insertion of javaArrayLength args which are not
-   // fields read from the kernel object.
-   int kernelArgPos = 0;
+      // argPos is used to keep track of the kernel arg position, it can 
+      // differ from "argIdx" due to insertion of javaArrayLength args which are not
+      // fields read from the kernel object.
 
-   for (int i=0; i< jniContext->argc; i++){
-      KernelArg *arg = jniContext->args[i];
-      arg->syncType(jenv); // make sure that the JNI arg reflects the latest type info from the instance.  For example if the buffer is tagged as explicit and needs to be pushed
+      int argPos=0;
+      for (int argIdx=0; argIdx< jniContext->argc; argIdx++, argPos++){
+         KernelArg *arg = jniContext->args[argIdx];
+         arg->syncType(jenv); // make sure that the JNI arg reflects the latest type info from the instance.  For example if the buffer is tagged as explicit and needs to be pushed
 
-      if (jniContext->isVerbose()){
-         fprintf(stderr, "got type for arg %d, %s, type=%08x\n", i, arg->name, arg->type);
-      }
-      if (!arg->isPrimitive() && !arg->isLocal()) {
-         if (jniContext->isProfilingEnabled()){
-            arg->value.arrayBuffer->read.valid = false;
-            arg->value.arrayBuffer->write.valid = false;
+         if (config->isVerbose()){
+            fprintf(stderr, "got type for arg %d, %s, type=%08x\n", argIdx, arg->name, arg->type);
          }
-         // pin the arrays so that GC does not move them during the call
+         if (!arg->isPrimitive() && !arg->isLocal()) {
+            if (config->isProfilingEnabled()){
+               arg->arrayBuffer->read.valid = false;
+               arg->arrayBuffer->write.valid = false;
+            }
+            // pin the arrays so that GC does not move them during the call
 
-         // get the C memory address for the region being transferred
-         // this uses different JNI calls for arrays vs. directBufs
-         void * prevAddr =  arg->value.arrayBuffer->addr;
-         if (arg->value.arrayBuffer->isArray) {
+            // get the C memory address for the region being transferred
+            // this uses different JNI calls for arrays vs. directBufs
+            void * prevAddr =  arg->arrayBuffer->addr;
             arg->pin(jenv);
-         }
 
-         if (jniContext->isVerbose()){
-            fprintf(stderr, "runKernel: arrayOrBuf ref %p, oldAddr=%p, newAddr=%p, ref.mem=%p, isArray=%d\n",
-                  arg->value.arrayBuffer->javaArray, 
-                  prevAddr,
-                  arg->value.arrayBuffer->addr,
-                  arg->value.arrayBuffer->mem,
-                  arg->value.arrayBuffer->isArray );
-            fprintf(stderr, "at memory addr %p, contents: ", arg->value.arrayBuffer->addr);
-            unsigned char *pb = (unsigned char *) arg->value.arrayBuffer->addr;
-            for (int k=0; k<8; k++) {
-               fprintf(stderr, "%02x ", pb[k]);
+            if (config->isVerbose()){
+               fprintf(stderr, "runKernel: arrayOrBuf ref %p, oldAddr=%p, newAddr=%p, ref.mem=%p\n",
+                     arg->arrayBuffer->javaArray, 
+                     prevAddr,
+                     arg->arrayBuffer->addr,
+                     arg->arrayBuffer->mem);
+               fprintf(stderr, "at memory addr %p, contents: ", arg->arrayBuffer->addr);
+               unsigned char *pb = (unsigned char *) arg->arrayBuffer->addr;
+               for (int k=0; k<8; k++) {
+                  fprintf(stderr, "%02x ", pb[k]);
+               }
+               fprintf(stderr, "\n" );
             }
-            fprintf(stderr, "\n" );
-         }
-         // record whether object moved 
-         // if we see that isCopy was returned by getPrimitiveArrayCritical, treat that as a move
-         bool objectMoved = (arg->value.arrayBuffer->addr != prevAddr) || arg->value.arrayBuffer->isCopy;
+            // record whether object moved 
+            // if we see that isCopy was returned by getPrimitiveArrayCritical, treat that as a move
+            bool objectMoved = (arg->arrayBuffer->addr != prevAddr) || arg->arrayBuffer->isCopy;
 
-         if (jniContext->isVerbose()){
-            if (arg->isExplicit() && arg->isExplicitWrite()){
-               fprintf(stderr, "explicit write of %s\n",  arg->name);
+            if (config->isVerbose()){
+               if (arg->isExplicit() && arg->isExplicitWrite()){
+                  fprintf(stderr, "explicit write of %s\n",  arg->name);
+               }
             }
-         }
 
-         if (jniContext->firstRun || (arg->value.arrayBuffer->mem == 0) || objectMoved ){
+            if (jniContext->firstRun || (arg->arrayBuffer->mem == 0) || objectMoved ){
 
-            if (arg->value.arrayBuffer->mem != 0 && objectMoved){
-               // we need to release the old buffer 
-               if (jniContext->isTrackingOpenCLResources()){
-                  memList.remove((cl_mem)arg->value.arrayBuffer->mem, __LINE__);
+               if (arg->arrayBuffer->mem != 0 && objectMoved){
+                  // we need to release the old buffer 
+                  if (config->isTrackingOpenCLResources()){
+                     memList.remove((cl_mem)arg->arrayBuffer->mem, __LINE__, __FILE__);
+                  }
+                  status = clReleaseMemObject((cl_mem)arg->arrayBuffer->mem);
+                  //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->arrayBuffer->mem);
+                  ASSERT_CL_NO_RETURN("clReleaseMemObject()");
+                  arg->arrayBuffer->mem = (cl_mem)0;
                }
-               status = clReleaseMemObject((cl_mem)arg->value.arrayBuffer->mem);
-               //fprintf(stdout, "dispose arg %d %0lx\n", i, arg->value.arrayBuffer->mem);
-               ASSERT_CL_NO_RETURN("clReleaseMemObject()");
-               arg->value.arrayBuffer->mem = (cl_mem)0;
-            }
-            // if either this is the first run or user changed input array
-            // or gc moved something, then we create buffers/args
-            cl_uint mask = CL_MEM_USE_HOST_PTR;
-            if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE;
-            else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY;
-            else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY;
-            arg->value.arrayBuffer->memMask = mask;
-            if (jniContext->isVerbose()){
-               strcpy(arg->value.arrayBuffer->memSpec,"CL_MEM_USE_HOST_PTR");
-               if (mask & CL_MEM_READ_WRITE) strcat(arg->value.arrayBuffer->memSpec,"|CL_MEM_READ_WRITE");
-               if (mask & CL_MEM_READ_ONLY) strcat(arg->value.arrayBuffer->memSpec,"|CL_MEM_READ_ONLY");
-               if (mask & CL_MEM_WRITE_ONLY) strcat(arg->value.arrayBuffer->memSpec,"|CL_MEM_WRITE_ONLY");
-
-               fprintf(stderr, "%s %d clCreateBuffer(context, %s, size=%08x bytes, address=%08x, &status)\n", arg->name, 
-                     i, arg->value.arrayBuffer->memSpec, arg->sizeInBytes, arg->value.arrayBuffer->addr);
-            }
-            arg->value.arrayBuffer->mem = clCreateBuffer(jniContext->context, arg->value.arrayBuffer->memMask, 
-                  arg->sizeInBytes, arg->value.arrayBuffer->addr, &status);
+               // if either this is the first run or user changed input array
+               // or gc moved something, then we create buffers/args
+               cl_uint mask = CL_MEM_USE_HOST_PTR;
+               if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE;
+               else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY;
+               else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY;
+               arg->arrayBuffer->memMask = mask;
+               if (config->isVerbose()){
+                  strcpy(arg->arrayBuffer->memSpec,"CL_MEM_USE_HOST_PTR");
+                  if (mask & CL_MEM_READ_WRITE) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_READ_WRITE");
+                  if (mask & CL_MEM_READ_ONLY) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_READ_ONLY");
+                  if (mask & CL_MEM_WRITE_ONLY) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_WRITE_ONLY");
+
+                  fprintf(stderr, "%s %d clCreateBuffer(context, %s, size=%08x bytes, address=%08x, &status)\n", arg->name, 
+                        argIdx, arg->arrayBuffer->memSpec, arg->arrayBuffer->lengthInBytes, arg->arrayBuffer->addr);
+               }
+               arg->arrayBuffer->mem = clCreateBuffer(jniContext->context, arg->arrayBuffer->memMask, 
+                     arg->arrayBuffer->lengthInBytes, arg->arrayBuffer->addr, &status);
 
-            if (status != CL_SUCCESS) {
-               PRINT_CL_ERR(status, "clCreateBuffer");
-               jniContext->unpinAll(jenv);
-               return status;
-            }
-            if (jniContext->isTrackingOpenCLResources()){
-               memList.add(arg->value.arrayBuffer->mem, __LINE__);
-            }
+               if (status != CL_SUCCESS) {
+                  PRINT_CL_ERR(status, "clCreateBuffer");
+                  jniContext->unpinAll(jenv);
+                  return status;
+               }
+               if (config->isTrackingOpenCLResources()){
+                  memList.add(arg->arrayBuffer->mem, __LINE__, __FILE__);
+               }
 
-            status = clSetKernelArg(jniContext->kernel, kernelArgPos++, sizeof(cl_mem), (void *)&(arg->value.arrayBuffer->mem));                  
-            if (status != CL_SUCCESS) {
-               PRINT_CL_ERR(status, "clSetKernelArg (array)");
-               jniContext->unpinAll(jenv);
-               return status;
-            }
+               status = clSetKernelArg(jniContext->kernel, argPos, sizeof(cl_mem), (void *)&(arg->arrayBuffer->mem));                  
+               if (status != CL_SUCCESS) {
+                  PRINT_CL_ERR(status, "clSetKernelArg (array)");
+                  jniContext->unpinAll(jenv);
+                  return status;
+               }
 
-            // Add the array length if needed
-            if (arg->usesArrayLength()){
-               arg->syncJavaArrayLength(jenv);
+               // Add the array length if needed
+               if (arg->usesArrayLength()){
+                  arg->syncJavaArrayLength(jenv);
 
-               status = clSetKernelArg(jniContext->kernel, kernelArgPos++, sizeof(jint), &(arg->value.arrayBuffer->javaArrayLength));
+                  status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &(arg->arrayBuffer->length));
 
-               if (jniContext->isVerbose()){
-                  fprintf(stderr, "runKernel arg %d %s, javaArrayLength = %d\n", i, arg->name, arg->value.arrayBuffer->javaArrayLength);
+                  if (config->isVerbose()){
+                     fprintf(stderr, "runKernel arg %d %s, length = %d\n", argIdx, arg->name, arg->arrayBuffer->length);
+                  }
+                  if (status != CL_SUCCESS) {
+                     PRINT_CL_ERR(status, "clSetKernelArg (array length)");
+                     jniContext->unpinAll(jenv);
+                     return status;
+                  }
+                  argPos++;
                }
-               if (status != CL_SUCCESS) {
-                  PRINT_CL_ERR(status, "clSetKernelArg (array length)");
-                  jniContext->unpinAll(jenv);
-                  return status;
+            } else {
+               // Keep the arg position in sync if no updates were required
+               if (arg->usesArrayLength()){
+                  argPos++;
                }
             }
-         } else {
-            // Keep the arg position in sync if no updates were required
-            kernelArgPos++;
-            if (arg->usesArrayLength()){
-               kernelArgPos++;
-            }
-         }
 
-         // we only enqueue a write if we know the kernel actually reads the buffer or if there is an explicit write pending
-         // the default behavior for Constant buffers is also that there is no write enqueued unless explicit
+            // we only enqueue a write if we know the kernel actually reads the buffer or if there is an explicit write pending
+            // the default behavior for Constant buffers is also that there is no write enqueued unless explicit
+
+            if (arg->needToEnqueueWrite() && !arg->isConstant()){
+               if (config->isVerbose()){
+                  fprintf(stderr, "%swriting %s%sbuffer argIndex=%d argPos=%d %s\n",  
+                        (arg->isExplicit() ? "explicitly " : ""), 
+                        (arg->isConstant() ? "constant " : ""), 
+                        (arg->isLocal() ? "local " : ""), 
+                        argIdx,
+                        argPos,
+                        arg->name);
+               }
+               if (config->isProfilingEnabled()) {
+                  jniContext->writeEventArgs[writeEventCount]=argIdx;
+               }
 
-         if (arg->needToEnqueueWrite() && !arg->isConstant()){
-            if (jniContext->isVerbose()){
+               status = clEnqueueWriteBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0, 
+                     arg->arrayBuffer->lengthInBytes, arg->arrayBuffer->addr, 0, NULL, &(jniContext->writeEvents[writeEventCount]));
+               if (status != CL_SUCCESS) {
+                  PRINT_CL_ERR(status, "clEnqueueWriteBuffer");
+                  jniContext->unpinAll(jenv);
+                  return status;
+               }
+               if (config->isTrackingOpenCLResources()){
+                  writeEventList.add(jniContext->writeEvents[writeEventCount],__LINE__, __FILE__);
+               }
+               writeEventCount++;
                if (arg->isExplicit() && arg->isExplicitWrite()){
-                  fprintf(stderr, "writing explicit buffer %d %s\n", i, arg->name);
+                  if (config->isVerbose()){
+                     fprintf(stderr, "clearing explicit buffer bit %d %s\n", argIdx, arg->name);
+                  }
+                  arg->clearExplicitBufferBit(jenv);
                }
             }
-            if (jniContext->isVerbose()){
-               fprintf(stderr, "%s writing buffer %d %s\n",  (arg->isExplicit() ? "explicitly" : ""), 
-                     i, arg->name);
-            }
-            if (jniContext->isProfilingEnabled()) {
-               jniContext->writeEventArgs[writeEventCount]=i;
-            }
+         } else if (arg->isLocal()){
+            if (jniContext->firstRun){ // what if local buffer size has changed?  We need a check for resize here.
+               status = arg->setLocalBufferArg(jenv, argIdx, argPos);
+               if (status != CL_SUCCESS) {
+                  PRINT_CL_ERR(status, "clSetKernelArg() (local)");
+                  jniContext->unpinAll(jenv);
+                  return status;
+               }
 
-            if (arg->isConstant()){
-               fprintf(stderr, "writing constant buffer %s\n", arg->name);
-            }
+                // Add the array length if needed
+               if (arg->usesArrayLength()){
+                  arg->syncJavaArrayLength(jenv);
 
-            status = clEnqueueWriteBuffer(jniContext->commandQueue, arg->value.arrayBuffer->mem, CL_FALSE, 0, 
-                  arg->sizeInBytes, arg->value.arrayBuffer->addr, 0, NULL, &(jniContext->writeEvents[writeEventCount]));
-            if (status != CL_SUCCESS) {
-               PRINT_CL_ERR(status, "clEnqueueWriteBuffer");
-               jniContext->unpinAll(jenv);
-               return status;
-            }
-            if (jniContext->isTrackingOpenCLResources()){
-               writeEventList.add(jniContext->writeEvents[writeEventCount],__LINE__);
-            }
-            writeEventCount++;
-            if (arg->isExplicit() && arg->isExplicitWrite()){
-               if (jniContext->isVerbose()){
-                  fprintf(stderr, "clearing explicit buffer bit %d %s\n", i, arg->name);
-               }
-               arg->clearExplicitBufferBit(jenv);
-            }
-         }
-      } else if (arg->isLocal()){
-         if (jniContext->firstRun){
-            int bytes = arg->sizeInBytes;
+                  status = clSetKernelArg(jniContext->kernel, argPos, sizeof(jint), &(arg->arrayBuffer->length));
 
-            if (jniContext->isVerbose()){
-               fprintf(stderr, "ISLOCAL, clSetKernelArg(jniContext->kernel, %d, %d, NULL);\n", i, bytes);
+                  if (config->isVerbose()){
+                     fprintf(stderr, "runKernel arg %d %s, javaArrayLength = %d\n", argIdx, arg->name, arg->arrayBuffer->length);
+                  }
+                  if (status != CL_SUCCESS) {
+                     PRINT_CL_ERR(status, "clSetKernelArg (array length)");
+                     jniContext->unpinAll(jenv);
+                     return status;
+                  }
+                  argPos++;
+               }
+            } else {
+               // Keep the arg position in sync if no updates were required
+               if (arg->usesArrayLength()){
+                  argPos++;
+               }
             }
-            status = clSetKernelArg(jniContext->kernel, kernelArgPos++, bytes, NULL);
+         }else{  // primitive arguments
+            status = arg->setPrimitiveArg(jenv, argIdx, argPos);
             if (status != CL_SUCCESS) {
-               PRINT_CL_ERR(status, "clSetKernelArg() (local)");
+               PRINT_CL_ERR(status, "clSetKernelArg()");
                jniContext->unpinAll(jenv);
                return status;
             }
-         } else {
-            // Keep the arg position in sync if no updates were required
-            kernelArgPos++;
-            if (arg->usesArrayLength()){
-               kernelArgPos++;
-            }
-         }
-      }else{  // primitive arguments
-
-         // we need to reflectively sync the value out of the kernel object
-         if (arg->isFloat()){
-            if (arg->isStatic){
-               arg->value.f = jenv->GetStaticFloatField(jniContext->kernelClass, arg->fieldID);
-            }else{
-               arg->value.f = jenv->GetFloatField(jniContext->kernelObject, arg->fieldID);
-            }
-            //fprintf(stderr, "float arg %d\n", arg->value.f); 
-         }else if (arg->isInt()){
-            if (arg->isStatic){
-               arg->value.i = jenv->GetStaticIntField(jniContext->kernelClass, arg->fieldID);
-            }else{
-               arg->value.i = jenv->GetIntField(jniContext->kernelObject, arg->fieldID);
-            }
-            //fprintf(stderr, "int arg %d\n", arg->value.i); 
-         }else if (arg->isBoolean()){
-            if (arg->isStatic){
-               arg->value.c = jenv->GetStaticBooleanField(jniContext->kernelClass, arg->fieldID);
-            }else{
-               arg->value.c = jenv->GetBooleanField(jniContext->kernelObject, arg->fieldID);
-            }
-            //fprintf(stderr, "boolean arg %d\n", arg->value.c); 
-         }else if (arg->isByte()){
-            if (arg->isStatic){
-               arg->value.c = jenv->GetStaticByteField(jniContext->kernelClass, arg->fieldID);
-            }else{
-               arg->value.c = jenv->GetByteField(jniContext->kernelObject, arg->fieldID);
-            }
-            //fprintf(stderr, "byte arg %d\n", arg->value.c); 
-         }else if (arg->isLong()){
-            if (arg->isStatic){
-               arg->value.j = jenv->GetStaticLongField(jniContext->kernelClass, arg->fieldID);
-            }else{
-               arg->value.j = jenv->GetLongField(jniContext->kernelObject, arg->fieldID);
-            }
-            //fprintf(stderr, "long arg %d\n", arg->value.c); 
-         }else if (arg->isDouble()){
-            if (arg->isStatic){
-               arg->value.d = jenv->GetStaticDoubleField(jniContext->kernelClass, arg->fieldID);
-            }else{
-               arg->value.d = jenv->GetDoubleField(jniContext->kernelObject, arg->fieldID);
-            }
-            //fprintf(stderr, "double arg %d\n", arg->value.c); 
-         }
-
-         if (jniContext->isVerbose()){
-            fprintf(stderr, "clSetKernelArg %s: %d %d %d 0x%08x\n", arg->name, i, kernelArgPos, 
-                  arg->sizeInBytes, arg->value);         
-         }
-         status = clSetKernelArg(jniContext->kernel, kernelArgPos++, arg->sizeInBytes, &(arg->value));
-         if (status != CL_SUCCESS) {
-            PRINT_CL_ERR(status, "clSetKernelArg() (value)");
-            jniContext->unpinAll(jenv);
-            return status;
          }
-      }
-   }  // for each arg
+      }  // for each arg
 
 
 
-   // We will need to revisit the execution of multiple devices.  
-   // POssibly cloning the range per device and mutating each to handle a unique subrange (of global) and
-   // maybe even pushing the offset into the range class.
+      // We will need to revisit the execution of multiple devices.  
+      // POssibly cloning the range per device and mutating each to handle a unique subrange (of global) and
+      // maybe even pushing the offset into the range class.
 
-   //   size_t globalSize_0AsSizeT = (range.globalDims[0] /jniContext->deviceIdc);
-   //   size_t localSize_0AsSizeT = range.localDims[0];
+      //   size_t globalSize_0AsSizeT = (range.globalDims[0] /jniContext->deviceIdc);
+      //   size_t localSize_0AsSizeT = range.localDims[0];
 
-   // To support multiple passes we add a 'secret' final arg called 'passid' and just schedule multiple enqueuendrange kernels.  Each of which having a separate value of passid
-   //
-   //
-   if (jniContext->exec){      // delete the last set
-      delete jniContext->exec;
-   } 
-   jniContext->passes = passes;
-   jniContext->exec = new ProfileInfo[passes];
+      // To support multiple passes we add a 'secret' final arg called 'passid' and just schedule multiple enqueuendrange kernels.  Each of which having a separate value of passid
+      //
+      //
+      if (jniContext->exec){      // delete the last set
+         delete jniContext->exec;
+         jniContext->exec = NULL;
+      } 
+      jniContext->passes = passes;
+      jniContext->exec = new ProfileInfo[passes];
 
-   for (int passid=0; passid<passes; passid++){
+      for (int passid=0; passid<passes; passid++){
          //size_t offset = 1; // (size_t)((range.globalDims[0]/jniContext->deviceIdc)*dev);
-         status = clSetKernelArg(jniContext->kernel, kernelArgPos, sizeof(passid), &(passid));
+         status = clSetKernelArg(jniContext->kernel, argPos, sizeof(passid), &(passid));
          if (status != CL_SUCCESS) {
             PRINT_CL_ERR(status, "clSetKernelArg() (passid)");
             jniContext->unpinAll(jenv);
@@ -1159,15 +1089,15 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI)
             // we block and do supply executeEvents 
             //fprintf(stderr, "setting passid to %d of %d not first not last\n", passid, passes);
             //
-            
+
             status = clWaitForEvents(1, &jniContext->executeEvents[0]);
             if (status != CL_SUCCESS) {
-                PRINT_CL_ERR(status, "clWaitForEvents() execute event");
-                jniContext->unpinAll(jenv);
-                return status;
+               PRINT_CL_ERR(status, "clWaitForEvents() execute event");
+               jniContext->unpinAll(jenv);
+               return status;
             }
-            if (jniContext->isTrackingOpenCLResources()){
-               executeEventList.remove(jniContext->executeEvents[0],__LINE__);
+            if (config->isTrackingOpenCLResources()){
+               executeEventList.remove(jniContext->executeEvents[0],__LINE__, __FILE__);
             }
             status = clReleaseEvent(jniContext->executeEvents[0]);
             if (status != CL_SUCCESS) {
@@ -1176,9 +1106,9 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI)
                return status;
             }
 
-            
+
             // We must capture any profile info for passid-1  so we must wait for the last execution to complete
-            if (passid == 1 && jniContext->isProfilingEnabled()) {
+            if (passid == 1 && config->isProfilingEnabled()) {
                // Now we can profile info for passid-1 
                status = profile(&jniContext->exec[passid-1], &jniContext->executeEvents[0], 1, NULL, jniContext->profileBaseTime);
                if (status != CL_SUCCESS) {
@@ -1202,133 +1132,116 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI)
          if (status != CL_SUCCESS) {
             PRINT_CL_ERR(status, "clEnqueueNDRangeKernel()");
             for(int i = 0; i<range.dims;i++) {
-                fprintf(stderr, "after clEnqueueNDRangeKernel, globalSize[%d] = %d, localSize[%d] = %d\n",
-                       i, (int)range.globalDims[i], i, (int)range.localDims[i]);
+               fprintf(stderr, "after clEnqueueNDRangeKernel, globalSize[%d] = %d, localSize[%d] = %d\n",
+                     i, (int)range.globalDims[i], i, (int)range.localDims[i]);
             }
             jniContext->unpinAll(jenv);
             return status;
          }
-         if(jniContext->isTrackingOpenCLResources()){
-            executeEventList.add(jniContext->executeEvents[0],__LINE__);
+         if(config->isTrackingOpenCLResources()){
+            executeEventList.add(jniContext->executeEvents[0],__LINE__, __FILE__);
          }
-      if (0){  // I dont think we need this
-         if (passid < passes-1){
-            // we need to wait for the executions to complete...
-            status = clWaitForEvents(1,  jniContext->executeEvents);
-            if (status != CL_SUCCESS) {
-               PRINT_CL_ERR(status, "clWaitForEvents() execute events mid pass");
-               jniContext->unpinAll(jenv);
-               return status;
+       
+      }
+
+      // We will use readEventCount to track the number of reads. It will never be > jniContext->argc which is the size of readEvents[] and readEventArgs[]
+      // readEvents[] will be populated with the event's that we will wait on below.  
+      // readArgEvents[] will map the readEvent to the arg that originated it
+      // So if we had
+      //    arg[0]  read_write array
+      //    arg[1]  read array
+      //    arg[2]  write array
+      //    arg[3]  primitive
+      //    arg[4]  read array
+      // At the end of the next loop 
+      //    readCount=3
+      //    readEvent[0] = new read event for arg0
+      //    readArgEvent[0] = 0
+      //    readEvent[1] = new read event for arg1
+      //    readArgEvent[1] = 1
+      //    readEvent[2] = new read event for arg4
+      //    readArgEvent[2] = 4
+
+      int readEventCount = 0; 
+
+      for (int i=0; i< jniContext->argc; i++){
+         KernelArg *arg = jniContext->args[i];
+
+         if (arg->needToEnqueueRead()){
+            if (arg->isConstant()){
+               fprintf(stderr, "reading %s\n", arg->name);
+            }
+            if (config->isProfilingEnabled()) {
+               jniContext->readEventArgs[readEventCount]=i;
+            }
+            if (config->isVerbose()){
+               fprintf(stderr, "reading buffer %d %s\n", i, arg->name);
             }
 
-            status = clReleaseEvent(jniContext->executeEvents[0]);
+            status = clEnqueueReadBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0, 
+                  arg->arrayBuffer->lengthInBytes,arg->arrayBuffer->addr , 1, jniContext->executeEvents, &(jniContext->readEvents[readEventCount]));
             if (status != CL_SUCCESS) {
-               PRINT_CL_ERR(status, "clReleaseEvent() read event");
+               PRINT_CL_ERR(status, "clEnqueueReadBuffer()");
                jniContext->unpinAll(jenv);
                return status;
             }
+            if (config->isTrackingOpenCLResources()){
+               readEventList.add(jniContext->readEvents[readEventCount],__LINE__, __FILE__);
+            }
+            readEventCount++;
          }
       }
-   }
 
-   // We will use readEventCount to track the number of reads. It will never be > jniContext->argc which is the size of readEvents[] and readEventArgs[]
-   // readEvents[] will be populated with the event's that we will wait on below.  
-   // readArgEvents[] will map the readEvent to the arg that originated it
-   // So if we had
-   //    arg[0]  read_write array
-   //    arg[1]  read array
-   //    arg[2]  write array
-   //    arg[3]  primitive
-   //    arg[4]  read array
-   // At the end of the next loop 
-   //    readCount=3
-   //    readEvent[0] = new read event for arg0
-   //    readArgEvent[0] = 0
-   //    readEvent[1] = new read event for arg1
-   //    readArgEvent[1] = 1
-   //    readEvent[2] = new read event for arg4
-   //    readArgEvent[2] = 4
-
-   int readEventCount = 0; 
+      // don't change the order here
+      // We wait for the reads which each depend on the execution, which depends on the writes ;)
+      // So after the reads have completed, we can release the execute and writes.
 
-   for (int i=0; i< jniContext->argc; i++){
-      KernelArg *arg = jniContext->args[i];
-
-      if (arg->needToEnqueueRead()){
-         if (arg->isConstant()){
-            fprintf(stderr, "reading %s\n", arg->name);
-         }
-         if (jniContext->isProfilingEnabled()) {
-            jniContext->readEventArgs[readEventCount]=i;
-         }
-         if (jniContext->isVerbose()){
-            fprintf(stderr, "reading buffer %d %s\n", i, arg->name);
-         }
-
-         status = clEnqueueReadBuffer(jniContext->commandQueue, arg->value.arrayBuffer->mem, CL_FALSE, 0, 
-               arg->sizeInBytes,arg->value.arrayBuffer->addr , 1, jniContext->executeEvents, &(jniContext->readEvents[readEventCount]));
+      if (readEventCount >0){
+         status = clWaitForEvents(readEventCount, jniContext->readEvents);
          if (status != CL_SUCCESS) {
-            PRINT_CL_ERR(status, "clEnqueueReadBuffer()");
+            PRINT_CL_ERR(status, "clWaitForEvents() read events");
             jniContext->unpinAll(jenv);
             return status;
          }
-         if (jniContext->isTrackingOpenCLResources()){
-            readEventList.add(jniContext->readEvents[readEventCount],__LINE__);
-         }
-         readEventCount++;
-      }
-   }
 
-   // don't change the order here
-   // We wait for the reads which each depend on the execution, which depends on the writes ;)
-   // So after the reads have completed, we can release the execute and writes.
-
-   if (readEventCount >0){
-      status = clWaitForEvents(readEventCount, jniContext->readEvents);
-      if (status != CL_SUCCESS) {
-         PRINT_CL_ERR(status, "clWaitForEvents() read events");
-         jniContext->unpinAll(jenv);
-         return status;
-      }
-
-      for (int i=0; i< readEventCount; i++){
-         if (jniContext->isProfilingEnabled()) {
-            status = profile(&jniContext->args[jniContext->readEventArgs[i]]->value.arrayBuffer->read, &jniContext->readEvents[i], 0,jniContext->args[jniContext->readEventArgs[i]]->name, jniContext->profileBaseTime);
+         for (int i=0; i< readEventCount; i++){
+            if (config->isProfilingEnabled()) {
+               status = profile(&jniContext->args[jniContext->readEventArgs[i]]->arrayBuffer->read, &jniContext->readEvents[i], 0,jniContext->args[jniContext->readEventArgs[i]]->name, jniContext->profileBaseTime);
+               if (status != CL_SUCCESS) {
+                  jniContext->unpinAll(jenv);
+                  return status;
+               }
+            }
+            status = clReleaseEvent(jniContext->readEvents[i]);
             if (status != CL_SUCCESS) {
+               PRINT_CL_ERR(status, "clReleaseEvent() read event");
                jniContext->unpinAll(jenv);
                return status;
             }
+            if (config->isTrackingOpenCLResources()){
+               readEventList.remove(jniContext->readEvents[i],__LINE__, __FILE__);
+            }
          }
-         status = clReleaseEvent(jniContext->readEvents[i]);
+      } else {
+         // if readEventCount == 0 then we don't need any reads so we just wait for the executions to complete
+         status = clWaitForEvents(1, jniContext->executeEvents);
          if (status != CL_SUCCESS) {
-            PRINT_CL_ERR(status, "clReleaseEvent() read event");
+            PRINT_CL_ERR(status, "clWaitForEvents() execute event");
             jniContext->unpinAll(jenv);
             return status;
          }
-         if (jniContext->isTrackingOpenCLResources()){
-            readEventList.remove(jniContext->readEvents[i],__LINE__);
-         }
       }
-   } else {
-      // if readEventCount == 0 then we don't need any reads so we just wait for the executions to complete
-      status = clWaitForEvents(1, jniContext->executeEvents);
-      if (status != CL_SUCCESS) {
-         PRINT_CL_ERR(status, "clWaitForEvents() execute event");
-         jniContext->unpinAll(jenv);
-         return status;
-      }
-   }
-      if (jniContext->isTrackingOpenCLResources()){
-         executeEventList.remove(jniContext->executeEvents[0],__LINE__);
+      if (config->isTrackingOpenCLResources()){
+         executeEventList.remove(jniContext->executeEvents[0],__LINE__, __FILE__);
       }
-      if (jniContext->isProfilingEnabled()) {
-        status = profile(&jniContext->exec[passes-1], &jniContext->executeEvents[0], 1, NULL, jniContext->profileBaseTime); // multi gpu ?
-        if (status != CL_SUCCESS) {
-           jniContext->unpinAll(jenv);
-           return status;
-        }
+      if (config->isProfilingEnabled()) {
+         status = profile(&jniContext->exec[passes-1], &jniContext->executeEvents[0], 1, NULL, jniContext->profileBaseTime); // multi gpu ?
+         if (status != CL_SUCCESS) {
+            jniContext->unpinAll(jenv);
+            return status;
+         }
       }
-       // extract the execution status from the executeEvent
+      // extract the execution status from the executeEvent
       cl_int executeStatus;
       status = clGetEventInfo(jniContext->executeEvents[0], CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &executeStatus, NULL);
       if (status != CL_SUCCESS) {
@@ -1337,7 +1250,7 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI)
          return status;
       }
       if (executeStatus != CL_SUCCESS) {
-          // it should definitely not be negative, but since we did a wait above, it had better be CL_COMPLETE==CL_SUCCESS
+         // it should definitely not be negative, but since we did a wait above, it had better be CL_COMPLETE==CL_SUCCESS
          PRINT_CL_ERR(executeStatus, "Execution status of execute event");
          jniContext->unpinAll(jenv);
          return executeStatus;
@@ -1349,258 +1262,214 @@ JNI_JAVA(jint, KernelRunner, runKernelJNI)
          return status;
       }
 
-   for (int i=0; i< writeEventCount; i++){
-      if (jniContext->isProfilingEnabled()) {
-         profile(&jniContext->args[jniContext->writeEventArgs[i]]->value.arrayBuffer->write, &jniContext->writeEvents[i], 2, jniContext->args[jniContext->writeEventArgs[i]]->name, jniContext->profileBaseTime);
+      for (int i=0; i< writeEventCount; i++){
+         if (config->isProfilingEnabled()) {
+            profile(&jniContext->args[jniContext->writeEventArgs[i]]->arrayBuffer->write, &jniContext->writeEvents[i], 2, jniContext->args[jniContext->writeEventArgs[i]]->name, jniContext->profileBaseTime);
+         }
+         status = clReleaseEvent(jniContext->writeEvents[i]);
+         if (status != CL_SUCCESS) {
+            PRINT_CL_ERR(status, "clReleaseEvent() write event");
+            jniContext->unpinAll(jenv);
+            return status;
+         }
+         if (config->isTrackingOpenCLResources()){
+            writeEventList.remove(jniContext->writeEvents[i],__LINE__, __FILE__);
+         }
       }
-      status = clReleaseEvent(jniContext->writeEvents[i]);
-      if (status != CL_SUCCESS) {
-         PRINT_CL_ERR(status, "clReleaseEvent() write event");
-         jniContext->unpinAll(jenv);
-         return status;
+
+      jniContext->unpinAll(jenv);
+
+      if (config->isProfilingCSVEnabled()) {
+         writeProfileInfo(jniContext);
       }
-      if (jniContext->isTrackingOpenCLResources()){
-         writeEventList.remove(jniContext->writeEvents[i],__LINE__);
+      if (config->isTrackingOpenCLResources()){
+         fprintf(stderr, "following execution of kernel{\n");
+         commandQueueList.report(stderr);
+         memList.report(stderr); 
+         readEventList.report(stderr); 
+         executeEventList.report(stderr); 
+         writeEventList.report(stderr); 
+         fprintf(stderr, "}\n");
       }
-   }
 
-   jniContext->unpinAll(jenv);
+      jniContext->firstRun = false;
 
-   if (jniContext->isProfilingCSVEnabled()) {
-      writeProfileInfo(jniContext);
-   }
-   if (jniContext->isTrackingOpenCLResources()){
-      fprintf(stderr, "following execution of kernel{\n");
-      commandQueueList.report(stderr);
-      memList.report(stderr); 
-      readEventList.report(stderr); 
-      executeEventList.report(stderr); 
-      writeEventList.report(stderr); 
-   fprintf(stderr, "}\n");
+      //fprintf(stderr, "About to return %d from exec\n", status);
+      return(status);
    }
 
-   jniContext->firstRun = false;
-
-   //fprintf(stderr, "About to return %d from exec\n", status);
-   return(status);
-}
-
 
 // we return the JNIContext from here 
 JNI_JAVA(jlong, KernelRunner, initJNI)
    (JNIEnv *jenv, jclass clazz, jobject kernelObject, jobject openCLDeviceObject, jint flags) {
-   //fprintf(stdout, "init()\n");
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = new JNIContext(jenv, kernelObject, openCLDeviceObject, flags);
+      if (config== NULL){
+         config = new Config(jenv);
+      }
+      cl_int status = CL_SUCCESS;
+      JNIContext* jniContext = new JNIContext(jenv, kernelObject, openCLDeviceObject, flags);
 
-   if (jniContext->isValid()){
-     
-      return((jlong)jniContext);
-   }else{
-      return(0L);
+      if (jniContext->isValid()){
+
+         return((jlong)jniContext);
+      }else{
+         return(0L);
+      }
    }
-}
 
 
 JNI_JAVA(jlong, KernelRunner, buildProgramJNI)
    (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source) {
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   if (jniContext == NULL){
-      return 0;
-   }
+      JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
+      if (jniContext == NULL){
+         return 0;
+      }
 
-   cl_int status = CL_SUCCESS;
-   
-   jniContext->program = CLHelper::compile(jenv, jniContext->context,  1, &jniContext->deviceId, source, NULL, &status);
+      cl_int status = CL_SUCCESS;
 
-   if(status == CL_BUILD_PROGRAM_FAILURE) {
-      return(0);
-   }
+      jniContext->program = CLHelper::compile(jenv, jniContext->context,  1, &jniContext->deviceId, source, NULL, &status);
 
-   jniContext->kernel = clCreateKernel(jniContext->program, "run", &status);
-   ASSERT_CL("clCreateKernel()");
+      if(status == CL_BUILD_PROGRAM_FAILURE) {
+         return(0);
+      }
 
-   cl_command_queue_properties queue_props = 0;
-   if (jniContext->isProfilingEnabled()) {
-      queue_props |= CL_QUEUE_PROFILING_ENABLE;
-   }
+      jniContext->kernel = clCreateKernel(jniContext->program, "run", &status);
+      ASSERT_CL("clCreateKernel()");
 
-   jniContext->commandQueue= clCreateCommandQueue(jniContext->context, (cl_device_id)jniContext->deviceId,
-       queue_props,
-       &status);
-   ASSERT_CL("clCreateCommandQueue()");
+      cl_command_queue_properties queue_props = 0;
+      if (config->isProfilingEnabled()) {
+         queue_props |= CL_QUEUE_PROFILING_ENABLE;
+      }
+
+      jniContext->commandQueue= clCreateCommandQueue(jniContext->context, (cl_device_id)jniContext->deviceId,
+            queue_props,
+            &status);
+      ASSERT_CL("clCreateCommandQueue()");
 
-   commandQueueList.add(jniContext->commandQueue, __LINE__);
+      commandQueueList.add(jniContext->commandQueue, __LINE__, __FILE__);
 
-   if (jniContext->isProfilingCSVEnabled()) {
-      // compute profile filename
+      if (config->isProfilingCSVEnabled()) {
+         // compute profile filename
 #if defined (_WIN32)
-      jint pid = GetCurrentProcessId();
+         jint pid = GetCurrentProcessId();
 #else
-      pid_t pid = getpid();
+         pid_t pid = getpid();
 #endif
-      // indicate cpu or gpu
-      // timestamp
-      // kernel name
+         // indicate cpu or gpu
+         // timestamp
+         // kernel name
 
-      jclass classMethodAccess = jenv->FindClass("java/lang/Class"); 
-      jmethodID getNameID=jenv->GetMethodID(classMethodAccess,"getName","()Ljava/lang/String;");
-      jstring className = (jstring)jenv->CallObjectMethod(jniContext->kernelClass, getNameID);
-      const char *classNameChars = jenv->GetStringUTFChars(className, NULL);
+         jclass classMethodAccess = jenv->FindClass("java/lang/Class"); 
+         jmethodID getNameID=jenv->GetMethodID(classMethodAccess,"getName","()Ljava/lang/String;");
+         jstring className = (jstring)jenv->CallObjectMethod(jniContext->kernelClass, getNameID);
+         const char *classNameChars = jenv->GetStringUTFChars(className, NULL);
 
 #define TIME_STR_LEN 200
 
-      char timeStr[TIME_STR_LEN];
-      struct tm *tmp;
-      time_t t = time(NULL);
-      tmp = localtime(&t);
-      if (tmp == NULL) {
-         perror("localtime");
-      }
-      //strftime(timeStr, TIME_STR_LEN, "%F.%H%M%S", tmp);  %F seemed to cause a core dump
-      strftime(timeStr, TIME_STR_LEN, "%H%M%S", tmp);
+         char timeStr[TIME_STR_LEN];
+         struct tm *tmp;
+         time_t t = time(NULL);
+         tmp = localtime(&t);
+         if (tmp == NULL) {
+            perror("localtime");
+         }
+         //strftime(timeStr, TIME_STR_LEN, "%F.%H%M%S", tmp);  %F seemed to cause a core dump
+         strftime(timeStr, TIME_STR_LEN, "%H%M%S", tmp);
 
-      char* fnameStr = new char[strlen(classNameChars) + strlen(timeStr) + 128];
+         char* fnameStr = new char[strlen(classNameChars) + strlen(timeStr) + 128];
 
-      //sprintf(fnameStr, "%s.%s.%d.%llx\n", classNameChars, timeStr, pid, jniContext);
-      sprintf(fnameStr, "aparapiprof.%s.%d.%016lx", timeStr, pid, (unsigned long)jniContext);
-      jenv->ReleaseStringUTFChars(className, classNameChars);
+         //sprintf(fnameStr, "%s.%s.%d.%llx\n", classNameChars, timeStr, pid, jniContext);
+         sprintf(fnameStr, "aparapiprof.%s.%d.%016lx", timeStr, pid, (unsigned long)jniContext);
+         jenv->ReleaseStringUTFChars(className, classNameChars);
 
-      FILE* profileFile = fopen(fnameStr, "w");
-      if (profileFile != NULL) {
-         jniContext->profileFile = profileFile;
-      } else {
-         jniContext->profileFile = stderr;
-         fprintf(stderr, "Could not open profile data file %s, reverting to stderr\n", fnameStr);
+         FILE* profileFile = fopen(fnameStr, "w");
+         if (profileFile != NULL) {
+            jniContext->profileFile = profileFile;
+         } else {
+            jniContext->profileFile = stderr;
+            fprintf(stderr, "Could not open profile data file %s, reverting to stderr\n", fnameStr);
+         }
+         delete []fnameStr;
       }
-      delete []fnameStr;
-   }
 
-   return((jlong)jniContext);
-}
+      return((jlong)jniContext);
+   }
 
 
 // this is called once when the arg list is first determined for this kernel
 JNI_JAVA(jint, KernelRunner, setArgsJNI)
    (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jobjectArray argArray, jint argc) {
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   cl_int status = CL_SUCCESS;
-   if (jniContext != NULL){      
-      jniContext->argc = argc;
-      jniContext->args = new KernelArg*[jniContext->argc];
-      jniContext->firstRun = true;
-
-      // Step through the array of KernelArg's to capture the type data for the Kernel's data members.
-      for (jint i=0; i<jniContext->argc; i++){ 
-
+      if (config== NULL){
+         config = new Config(jenv);
+      }
+      JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
+      cl_int status = CL_SUCCESS;
+      if (jniContext != NULL){      
+         jniContext->argc = argc;
+         jniContext->args = new KernelArg*[jniContext->argc];
+         jniContext->firstRun = true;
 
-         jobject argObj = jenv->GetObjectArrayElement(argArray, i);
-         KernelArg* arg = jniContext->args[i] = new KernelArg(jenv, jniContext, argObj);
-         if (jniContext->isVerbose()){
-            if (arg->isExplicit()){
-               fprintf(stderr, "%s is explicit!\n", arg->name);
+         // Step through the array of KernelArg's to capture the type data for the Kernel's data members.
+         for (jint i=0; i<jniContext->argc; i++){ 
+            jobject argObj = jenv->GetObjectArrayElement(argArray, i);
+            KernelArg* arg = jniContext->args[i] = new KernelArg(jenv, jniContext, argObj);
+            if (config->isVerbose()){
+               if (arg->isExplicit()){
+                  fprintf(stderr, "%s is explicit!\n", arg->name);
+               }
             }
-         }
 
-         if (arg->isPrimitive()) {
-            // for primitives, we cache the fieldID for that field in the kernel's arg object
-            if (arg->isFloat()){
-               if (arg->isStatic){
-                  arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "F");
-               }else{
-                  arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "F");
-               }
-               arg->sizeInBytes = sizeof(jfloat);
-            }else if (arg->isInt()){
-               if (arg->isStatic){
-                  arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "I");
-               }else{
-                  arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "I");
-               }
-               arg->sizeInBytes = sizeof(jint);
-            }else if (arg->isByte()){
-               if (arg->isStatic){
-                  arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "B");
-               }else{
-                  arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "B");
-               }
-               arg->sizeInBytes = sizeof(jbyte);
-            }else if (arg->isBoolean()){
-               if (arg->isStatic){
-                  arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "Z");
+            if (config->isVerbose()){
+               fprintf(stderr, "in setArgs arg %d %s type %08x\n", i, arg->name, arg->type);
+               if (arg->isLocal()){
+                  fprintf(stderr, "in setArgs arg %d %s is local\n", i, arg->name);
+               }else if (arg->isConstant()){
+                  fprintf(stderr, "in setArgs arg %d %s is constant\n", i, arg->name);
                }else{
-                  arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "Z");
+                  fprintf(stderr, "in setArgs arg %d %s is *not* local\n", i, arg->name);
                }
-               arg->sizeInBytes = sizeof(jboolean);
-            }else if (arg->isLong()){
-               if (arg->isStatic){
-                  arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "J");
-               }else{
-                  arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "J");
-               }
-               arg->sizeInBytes = sizeof(jlong);
-            }else if (arg->isDouble()){
-               if (arg->isStatic){
-                  arg->fieldID = jenv->GetStaticFieldID(jniContext->kernelClass, arg->name, "D");
-               }else{
-                  arg->fieldID = jenv->GetFieldID(jniContext->kernelClass, arg->name, "D");
-               }
-               arg->sizeInBytes = sizeof(jdouble);
             }
-         }else{ // we will use an array
-            arg->value.arrayBuffer->mem = (cl_mem) 0;
-            arg->value.arrayBuffer->javaArray = 0;
-            arg->sizeInBytes = 0;
-         }
-         if (jniContext->isVerbose()){
-            fprintf(stderr, "in setArgs arg %d %s type %08x\n", i, arg->name, arg->type);
-            if (arg->isLocal()){
-               fprintf(stderr, "in setArgs arg %d %s is local\n", i, arg->name);
-            }else if (arg->isConstant()){
-               fprintf(stderr, "in setArgs arg %d %s is constant\n", i, arg->name);
-            }else{
-               fprintf(stderr, "in setArgs arg %d %s is *not* local\n", i, arg->name);
+
+            //If an error occurred, return early so we report the first problem, not the last
+            if (jenv->ExceptionCheck() == JNI_TRUE) {
+               jniContext->argc = -1;
+               delete[] jniContext->args;
+               jniContext->args = NULL;
+               jniContext->firstRun = true;
+               return (status);
             }
-         }
 
-         //If an error occurred, return early so we report the first problem, not the last
-         if (jenv->ExceptionCheck() == JNI_TRUE) {
-            jniContext->argc = -1;
-            delete[] jniContext->args;
-            jniContext->args = NULL;
-            jniContext->firstRun = true;
-            return (status);
          }
+         // we will need an executeEvent buffer for all devices
+         jniContext->executeEvents = new cl_event[1];
 
+         // We will need *at most* jniContext->argc read/write events
+         jniContext->readEvents = new cl_event[jniContext->argc];
+         if (config->isProfilingEnabled()) {
+            jniContext->readEventArgs = new jint[jniContext->argc];
+         }
+         jniContext->writeEvents = new cl_event[jniContext->argc];
+         if (config->isProfilingEnabled()) {
+            jniContext->writeEventArgs = new jint[jniContext->argc];
+         }
       }
-      // we will need an executeEvent buffer for all devices
-      jniContext->executeEvents = new cl_event[1];
-
-      // We will need *at most* jniContext->argc read/write events
-      jniContext->readEvents = new cl_event[jniContext->argc];
-      if (jniContext->isProfilingEnabled()) {
-         jniContext->readEventArgs = new jint[jniContext->argc];
-      }
-      jniContext->writeEvents = new cl_event[jniContext->argc];
-      if (jniContext->isProfilingEnabled()) {
-         jniContext->writeEventArgs = new jint[jniContext->argc];
-      }
+      return(status);
    }
-   return(status);
-}
 
 
 
 JNI_JAVA(jstring, KernelRunner, getExtensionsJNI)
-    (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) {
-   jstring jextensions = NULL;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   if (jniContext != NULL){
-      cl_int status = CL_SUCCESS;
-      jextensions = CLHelper::getExtensions(jenv, jniContext->deviceId, &status);
+   (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) {
+      if (config== NULL){
+         config = new Config(jenv);
+      }
+      jstring jextensions = NULL;
+      JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
+      if (jniContext != NULL){
+         cl_int status = CL_SUCCESS;
+         jextensions = CLHelper::getExtensions(jenv, jniContext->deviceId, &status);
+      }
+      return jextensions;
    }
-   return jextensions;
-}
 
 KernelArg* getArgForBuffer(JNIEnv* jenv, JNIContext* jniContext, jobject buffer) {
    cl_int status = CL_SUCCESS;
@@ -1610,21 +1479,21 @@ KernelArg* getArgForBuffer(JNIEnv* jenv, JNIContext* jniContext, jobject buffer)
       for (jint i=0; returnArg == NULL && i<jniContext->argc; i++){ 
          KernelArg *arg= jniContext->args[i];
          if (arg->isArray()){
-            jboolean isSame = jenv->IsSameObject(buffer, arg->value.arrayBuffer->javaArray);
+            jboolean isSame = jenv->IsSameObject(buffer, arg->arrayBuffer->javaArray);
             if (isSame){
-		       if (jniContext->isVerbose()){
+               if (config->isVerbose()){
                   fprintf(stderr, "matched arg '%s'\n", arg->name);
                }
                returnArg = arg;
             }else{
-				 if (jniContext->isVerbose()){
+               if (config->isVerbose()){
                   fprintf(stderr, "unmatched arg '%s'\n", arg->name);
                }
-			}
+            }
          }
       }
       if (returnArg==NULL){
-         if (jniContext->isVerbose()){
+         if (config->isVerbose()){
             fprintf(stderr, "attempt to get arg for buffer that does not appear to be referenced from kernel\n");
          }
       }
@@ -1635,148 +1504,95 @@ KernelArg* getArgForBuffer(JNIEnv* jenv, JNIContext* jniContext, jobject buffer)
 // Called as a result of Kernel.get(someArray)
 JNI_JAVA(jint, KernelRunner, getJNI)
    (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jobject buffer) {
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   if (jniContext != NULL){
-      KernelArg *arg= getArgForBuffer(jenv, jniContext, buffer);
-      if (arg != NULL){
-         if (jniContext->isVerbose()){
-           fprintf(stderr, "explicitly reading buffer %s\n", arg->name);
-         }
-         arg->pin(jenv);
+      if (config== NULL){
+         config = new Config(jenv);
+      }
+      cl_int status = CL_SUCCESS;
+      JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
+      if (jniContext != NULL){
+         KernelArg *arg= getArgForBuffer(jenv, jniContext, buffer);
+         if (arg != NULL){
+            if (config->isVerbose()){
+               fprintf(stderr, "explicitly reading buffer %s\n", arg->name);
+            }
+            arg->pin(jenv);
 
-         status = clEnqueueReadBuffer(jniContext->commandQueue, arg->value.arrayBuffer->mem, CL_FALSE, 0, 
-               arg->sizeInBytes,arg->value.arrayBuffer->addr , 0, NULL, &jniContext->readEvents[0]);
-		 if (jniContext->isVerbose()){
-		    fprintf(stderr, "explicitly read %s ptr=%lx len=%d\n", arg->name, arg->value.arrayBuffer->addr,arg->sizeInBytes );
-		 }
-         if (status != CL_SUCCESS) {
-            PRINT_CL_ERR(status, "clEnqueueReadBuffer()");
-            return status;
-         }
-         status = clWaitForEvents(1, jniContext->readEvents);
-         if (status != CL_SUCCESS) {
-            PRINT_CL_ERR(status, "clWaitForEvents");
-            return status;
-         }
-         if (jniContext->isProfilingEnabled()){
-            status = profile(&arg->value.arrayBuffer->read, &jniContext->readEvents[0], 0,arg->name, jniContext->profileBaseTime);
+            status = clEnqueueReadBuffer(jniContext->commandQueue, arg->arrayBuffer->mem, CL_FALSE, 0, 
+                  arg->arrayBuffer->lengthInBytes,arg->arrayBuffer->addr , 0, NULL, &jniContext->readEvents[0]);
+            if (config->isVerbose()){
+               fprintf(stderr, "explicitly read %s ptr=%lx len=%d\n", arg->name, arg->arrayBuffer->addr,arg->arrayBuffer->lengthInBytes );
+            }
             if (status != CL_SUCCESS) {
-               PRINT_CL_ERR(status, "profile ");
+               PRINT_CL_ERR(status, "clEnqueueReadBuffer()");
                return status;
             }
-         }
+            status = clWaitForEvents(1, jniContext->readEvents);
+            if (status != CL_SUCCESS) {
+               PRINT_CL_ERR(status, "clWaitForEvents");
+               return status;
+            }
+            if (config->isProfilingEnabled()){
+               status = profile(&arg->arrayBuffer->read, &jniContext->readEvents[0], 0,arg->name, jniContext->profileBaseTime);
+               if (status != CL_SUCCESS) {
+                  PRINT_CL_ERR(status, "profile ");
+                  return status;
+               }
+            }
 
-         clReleaseEvent(jniContext->readEvents[0]);
-         if (status != CL_SUCCESS) {
-            PRINT_CL_ERR(status, "clReleaseEvent() read event");
-            return status;
-         }
-         // since this is an explicit buffer get, we expect the buffer to have changed so we commit
-         arg->unpin(jenv); // was unpinCommit
+            clReleaseEvent(jniContext->readEvents[0]);
+            if (status != CL_SUCCESS) {
+               PRINT_CL_ERR(status, "clReleaseEvent() read event");
+               return status;
+            }
+            // since this is an explicit buffer get, we expect the buffer to have changed so we commit
+            arg->unpin(jenv); // was unpinCommit
 
-      }else{
-         if (jniContext->isVerbose()){
-            fprintf(stderr, "attempt to request to get a buffer that does not appear to be referenced from kernel\n");
+         }else{
+            if (config->isVerbose()){
+               fprintf(stderr, "attempt to request to get a buffer that does not appear to be referenced from kernel\n");
+            }
          }
       }
+      return 0;
    }
-   return 0;
-}
-
-
-jobject createProfileInfo(JNIEnv *jenv, ProfileInfo &profileInfo){
-   jobject profileInstance = JNIHelper::createInstance(jenv, ProfileInfoClass , ArgsVoidReturn(StringClassArg IntArg LongArg LongArg LongArg LongArg), 
-         ((jstring)(profileInfo.name==NULL?NULL:jenv->NewStringUTF(profileInfo.name))),
-         ((jint)profileInfo.type), 
-         ((jlong)profileInfo.start),
-         ((jlong)profileInfo.end),
-         ((jlong)profileInfo.queued),
-         ((jlong)profileInfo.submit));
-   return(profileInstance);
-}
 
 JNI_JAVA(jobject, KernelRunner, getProfileInfoJNI)
    (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) {
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   jobject returnList = NULL;
-   if (jniContext != NULL){
-      returnList = JNIHelper::createInstance(jenv, ArrayListClass, VoidReturn );
-      if (jniContext->isProfilingEnabled()){
-
-         for (jint i=0; i<jniContext->argc; i++){ 
-            KernelArg *arg= jniContext->args[i];
-            if (arg->isArray()){
-               if (arg->isMutableByKernel() && arg->value.arrayBuffer->write.valid){
-                  jobject writeProfileInfo = createProfileInfo(jenv,  arg->value.arrayBuffer->write);
-                  JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), writeProfileInfo);
+      if (config== NULL){
+         config = new Config(jenv);
+      }
+      cl_int status = CL_SUCCESS;
+      JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
+      jobject returnList = NULL;
+      if (jniContext != NULL){
+         returnList = JNIHelper::createInstance(jenv, ArrayListClass, VoidReturn );
+         if (config->isProfilingEnabled()){
+
+            for (jint i=0; i<jniContext->argc; i++){ 
+               KernelArg *arg= jniContext->args[i];
+               if (arg->isArray()){
+                  if (arg->isMutableByKernel() && arg->arrayBuffer->write.valid){
+                     jobject writeProfileInfo = arg->arrayBuffer->write.createProfileInfoInstance(jenv);
+                     JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), writeProfileInfo);
+                  }
                }
             }
-         }
 
-         for (jint pass=0; pass<jniContext->passes; pass++){
-            jobject executeProfileInfo = createProfileInfo(jenv, jniContext->exec[pass]);
-            JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), executeProfileInfo);
-         }
+            for (jint pass=0; pass<jniContext->passes; pass++){
+               jobject executeProfileInfo = jniContext->exec[pass].createProfileInfoInstance(jenv);
+               JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), executeProfileInfo);
+            }
 
-         for (jint i=0; i<jniContext->argc; i++){ 
-            KernelArg *arg= jniContext->args[i];
-            if (arg->isArray()){
-               if (arg->isReadByKernel() && arg->value.arrayBuffer->read.valid){
-                  jobject readProfileInfo = createProfileInfo(jenv,  arg->value.arrayBuffer->read);
-                  JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), readProfileInfo);
+            for (jint i=0; i<jniContext->argc; i++){ 
+               KernelArg *arg= jniContext->args[i];
+               if (arg->isArray()){
+                  if (arg->isReadByKernel() && arg->arrayBuffer->read.valid){
+                     jobject readProfileInfo = arg->arrayBuffer->read.createProfileInfoInstance(jenv);
+                     JNIHelper::callVoid(jenv, returnList, "add", ArgsBooleanReturn(ObjectClassArg), readProfileInfo);
+                  }
                }
             }
          }
       }
+      return returnList;
    }
-   return returnList;
-}
-
-
-/*
-JNI_JAVA(jint, KernelRunner, getMaxComputeUnitsJNI)
-   (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) {
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   if (jniContext != NULL){
-      return(jniContext->maxComputeUnits);
-   }else{
-      return(0);
-   }
-}
-
-JNI_JAVA(jint, KernelRunner, getMaxWorkItemDimensionsJNI)
-   (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) {
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   if (jniContext != NULL){
-      return(jniContext->maxWorkItemDimensions);
-   }else{
-      return(0);
-   }
-}
-
-JNI_JAVA(jint, KernelRunner, getMaxWorkGroupSizeJNI)
-   (JNIEnv *jenv, jobject jobj, jlong jniContextHandle) {
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   if (jniContext != NULL){
-      return(jniContext->maxWorkGroupSize);
-   }else{
-      return(0);
-   }
-}
-
-JNI_JAVA(jint, KernelRunner, getMaxWorkItemSizeJNI)
-   (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jint _index) {
-   cl_int status = CL_SUCCESS;
-   JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
-   if (jniContext != NULL && _index >=0 && _index <= (int)(jniContext->maxWorkItemDimensions)){
-      return(jniContext->maxWorkItemSizes[_index]);
-   }else{
-      return(0);
-   }
-}
-*/
diff --git a/com.amd.aparapi.jni/src/cpp/arrayBuffer.cpp b/com.amd.aparapi.jni/src/cpp/arrayBuffer.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..179a82cd95efe49de2e4aafe67bcf69822ef5029
--- /dev/null
+++ b/com.amd.aparapi.jni/src/cpp/arrayBuffer.cpp
@@ -0,0 +1,64 @@
+/*
+   Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+   following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+   disclaimer. 
+
+   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided with the distribution. 
+
+   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission. 
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+   laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 
+   through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000.  Further, pursuant to Section 740.6 of
+   the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce
+   Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export 
+   Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in 
+   Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) 
+   export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced
+   direct product is subject to national security controls as identified on the Commerce Control List (currently 
+   found in Supplement 1 to Part 774 of EAR).  For the most current Country Group listings, or for additional 
+   information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry
+   and Security?s website at http://www.bis.doc.gov/. 
+   */
+#define ARRAYBUFFER_SOURCE
+#include "arrayBuffer.h"
+
+ArrayBuffer::ArrayBuffer():
+   javaArray((jobject) 0),
+   length(0),
+   lengthInBytes(0),
+   mem((cl_mem) 0),
+   addr(NULL),
+   memMask((cl_uint)0),
+   isCopy(false),
+   isPinned(false){
+   }
+
+void ArrayBuffer::unpinAbort(JNIEnv *jenv){
+   jenv->ReleasePrimitiveArrayCritical((jarray)javaArray, addr,JNI_ABORT);
+   isPinned = JNI_FALSE;
+}
+void ArrayBuffer::unpinCommit(JNIEnv *jenv){
+   jenv->ReleasePrimitiveArrayCritical((jarray)javaArray, addr, 0);
+   isPinned = JNI_FALSE;
+}
+void ArrayBuffer::pin(JNIEnv *jenv){
+   void *ptr = addr;
+   addr = jenv->GetPrimitiveArrayCritical((jarray)javaArray,&isCopy);
+   isPinned = JNI_TRUE;
+}
diff --git a/com.amd.aparapi.jni/src/cpp/arrayBuffer.h b/com.amd.aparapi.jni/src/cpp/arrayBuffer.h
new file mode 100644
index 0000000000000000000000000000000000000000..b51f53311ae36a496dc4a33c31a5ed8cacc51700
--- /dev/null
+++ b/com.amd.aparapi.jni/src/cpp/arrayBuffer.h
@@ -0,0 +1,64 @@
+/*
+   Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+   following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+   disclaimer. 
+
+   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided with the distribution. 
+
+   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission. 
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+   laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 
+   through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000.  Further, pursuant to Section 740.6 of
+   the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce
+   Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export 
+   Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in 
+   Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) 
+   export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced
+   direct product is subject to national security controls as identified on the Commerce Control List (currently 
+   found in Supplement 1 to Part 774 of EAR).  For the most current Country Group listings, or for additional 
+   information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry
+   and Security?s website at http://www.bis.doc.gov/. 
+   */
+
+#ifndef ARRAYBUFFER_H
+#define ARRAYBUFFER_H
+#include "common.h"
+#include "profileInfo.h"
+
+class ArrayBuffer{
+   public:
+      jobject javaArray;        // The java array that this arg is mapped to 
+      cl_uint length;           // the number of elements for arrays (used only when ARRAYLENGTH bit is set for this arg)
+      jint lengthInBytes;       // bytes in the array or directBuf
+      cl_mem mem;               // the opencl buffer 
+      void *addr;               // the last address where we saw this java array object
+      cl_uint memMask;          // the mask used for createBuffer
+      jboolean isCopy;
+      jboolean isPinned;
+      char memSpec[128];        // The string form of the mask we used for create buffer. for debugging
+      ProfileInfo read;
+      ProfileInfo write;
+
+      ArrayBuffer();
+      void unpinAbort(JNIEnv *jenv);
+      void unpinCommit(JNIEnv *jenv);
+      void pin(JNIEnv *jenv);
+};
+
+#endif // ARRAYBUFFER_H
diff --git a/com.amd.aparapi.jni/src/cpp/clHelper.cpp b/com.amd.aparapi.jni/src/cpp/clHelper.cpp
index bb3f0d209ec7f0646bb25e04a656beb94c8343a1..a276750937abf6dbb241a067792d17d04bb50aa1 100644
--- a/com.amd.aparapi.jni/src/cpp/clHelper.cpp
+++ b/com.amd.aparapi.jni/src/cpp/clHelper.cpp
@@ -152,4 +152,3 @@ jstring CLHelper::getExtensions(JNIEnv *jenv, cl_device_id deviceId, cl_int *sta
 }
 
 
-
diff --git a/com.amd.aparapi.jni/src/cpp/clHelper.h b/com.amd.aparapi.jni/src/cpp/clHelper.h
index d4ec57ab7923690a50c222b6cc1a9d934bce879e..7f5f18e4cf321dfbdd8f1203487305741553dd81 100644
--- a/com.amd.aparapi.jni/src/cpp/clHelper.h
+++ b/com.amd.aparapi.jni/src/cpp/clHelper.h
@@ -68,5 +68,90 @@ class CLHelper{
    static jstring getExtensions(JNIEnv *jenv, cl_device_id deviceId, cl_int *status);
 };
 
+template <typename  T> class List; // forward
+
+template <typename  T> class Ref{
+   private:
+      T value;
+      int line;
+      char *fileName;
+      Ref<T> *next;
+      friend class List<T>;
+   public:
+      Ref(T _value, int _line, char* _fileName);
+};
+
+template <typename  T> class List{
+   private:
+      char *name;
+      Ref<T> *head;
+      int count;
+   public:
+      List(char *_name);
+      void add(T _value, int _line, char *_fileName);
+      void remove(T _value, int _line, char *_fileName);
+      void report(FILE *stream);
+};
+
+template <typename  T> Ref<T>::Ref(T _value, int _line, char* _fileName):
+   value(_value),
+   line(_line),
+   fileName(_fileName),
+   next(NULL){
+   }
+
+template <typename  T> List<T>::List(char *_name): 
+   head(NULL),
+   count(0),
+   name(_name){
+   }
+
+template <typename  T> void List<T>::add(T _value, int _line, char *_fileName){
+   Ref<T> *handle = new Ref<T>(_value, _line, _fileName);
+   handle->next = head; 
+   head = handle;
+   count++;
+}
+
+template <typename  T> void List<T>::remove(T _value, int _line, char *_fileName){
+   for (Ref<T> *ptr = head, *last=NULL; ptr != NULL; last=ptr, ptr = ptr->next){
+      if (ptr->value == _value){
+         if (last == NULL){ // head 
+            head = ptr->next;
+         }else{ // !head
+            last->next = ptr->next;
+         }
+         delete ptr;
+         count--;
+         return;
+      }
+   }
+   fprintf(stderr, "FILE %s LINE %d failed to find %s to remove %0lx\n", _fileName, _line, name, _value);
+}
+
+template <typename  T> void List<T>::report(FILE *stream){
+   if (head != NULL){
+      fprintf(stream, "Resource report %d resources of type %s still in play ", count, name);
+      for (Ref<T> *ptr = head; ptr != NULL; ptr = ptr->next){
+         fprintf(stream, " %0lx(%d)", ptr->value, ptr->line);
+      }
+      fprintf(stream, "\n");
+   }
+}
+
+#ifdef CLHELPER_SOURCE
+List<cl_command_queue> commandQueueList("cl_command_queue");
+List<cl_mem> memList("cl_mem");
+List<cl_event> readEventList("cl_event (read)");
+List<cl_event> executeEventList("cl_event (exec)");
+List<cl_event> writeEventList("cl_event (write)");
+#else
+extern List<cl_command_queue> commandQueueList;
+extern List<cl_mem> memList;
+extern List<cl_event> readEventList;
+extern List<cl_event> executeEventList;
+extern List<cl_event> writeEventList;
+#endif
+
 #endif // CLHELPER_H
 
diff --git a/com.amd.aparapi.jni/src/cpp/config.cpp b/com.amd.aparapi.jni/src/cpp/config.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a56334c559aa77cb0b7c837ac90bbe7637d656ad
--- /dev/null
+++ b/com.amd.aparapi.jni/src/cpp/config.cpp
@@ -0,0 +1,77 @@
+/*
+   Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+   following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+   disclaimer. 
+
+   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided with the distribution. 
+
+   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission. 
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+   laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 
+   through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000.  Further, pursuant to Section 740.6 of
+   the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce
+   Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export 
+   Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in 
+   Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) 
+   export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced
+   direct product is subject to national security controls as identified on the Commerce Control List (currently 
+   found in Supplement 1 to Part 774 of EAR).  For the most current Country Group listings, or for additional 
+   information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry
+   and Security?s website at http://www.bis.doc.gov/. 
+   */
+#define CONFIG_SOURCE
+#include "config.h"
+
+jboolean Config::getBoolean(JNIEnv *jenv, char *fieldName){
+   jfieldID fieldID = jenv->GetStaticFieldID(configClass, fieldName, "Z");
+   return(jenv->GetStaticBooleanField(configClass, fieldID));
+}
+
+Config::Config(JNIEnv *jenv){
+   enableVerboseJNI = false;
+   configClass = jenv->FindClass("com/amd/aparapi/Config");
+   if (configClass == NULL ||  jenv->ExceptionCheck()) {
+      jenv->ExceptionDescribe(); 
+      jenv->ExceptionClear();
+      fprintf(stderr, "bummer! getting Config from instance\n");
+   }else{
+      enableVerboseJNI = getBoolean(jenv, "enableVerboseJNI");
+      enableVerboseJNIOpenCLResourceTracking = getBoolean(jenv, "enableVerboseJNIOpenCLResourceTracking");
+      enableProfiling = getBoolean(jenv, "enableProfiling");
+      enableProfilingCSV = getBoolean(jenv, "enableProfilingCSV");
+   }
+   fprintf(stderr, "Config::enableVerboseJNI=%s\n",enableVerboseJNI?"true":"false");
+   fprintf(stderr, "Config::enableVerboseJNIOpenCLResourceTracking=%s\n",enableVerboseJNIOpenCLResourceTracking?"true":"false");
+   fprintf(stderr, "Config::enableProfiling=%s\n",enableProfiling?"true":"false");
+   fprintf(stderr, "Config::enableProfilingCSV=%s\n",enableProfilingCSV?"true":"false");
+}
+
+jboolean Config::isVerbose(){
+   return enableVerboseJNI;
+}
+
+jboolean Config::isProfilingCSVEnabled(){
+   return enableProfilingCSV;
+}
+jboolean Config::isTrackingOpenCLResources(){
+   return enableVerboseJNIOpenCLResourceTracking;
+}
+jboolean Config::isProfilingEnabled(){
+   return enableProfiling;
+}
diff --git a/com.amd.aparapi.jni/src/cpp/config.h b/com.amd.aparapi.jni/src/cpp/config.h
new file mode 100644
index 0000000000000000000000000000000000000000..ccebf6be5d54639132cc57846503ca1428006b66
--- /dev/null
+++ b/com.amd.aparapi.jni/src/cpp/config.h
@@ -0,0 +1,64 @@
+/*
+   Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+   following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+   disclaimer. 
+
+   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided with the distribution. 
+
+   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission. 
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+   laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 
+   through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000.  Further, pursuant to Section 740.6 of
+   the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce
+   Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export 
+   Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in 
+   Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) 
+   export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced
+   direct product is subject to national security controls as identified on the Commerce Control List (currently 
+   found in Supplement 1 to Part 774 of EAR).  For the most current Country Group listings, or for additional 
+   information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry
+   and Security?s website at http://www.bis.doc.gov/. 
+   */
+#ifndef CONFIG_H
+#define CONFIG_H
+#include "common.h"
+
+class Config{
+   public: 
+      jboolean configured;
+      jclass configClass;
+      jboolean enableVerboseJNI;
+      jboolean enableVerboseJNIOpenCLResourceTracking;
+      jboolean enableProfiling;
+      jboolean enableProfilingCSV;
+
+      jboolean getBoolean(JNIEnv *jenv, char *fieldName);
+      Config(JNIEnv *jenv);
+      jboolean isVerbose();
+      jboolean isProfilingCSVEnabled();
+      jboolean isTrackingOpenCLResources();
+      jboolean isProfilingEnabled();
+};
+
+#ifdef CONFIG_SOURCE
+Config *config=NULL;
+#else
+extern Config *config;
+#endif
+#endif 
diff --git a/com.amd.aparapi.jni/src/cpp/profileInfo.cpp b/com.amd.aparapi.jni/src/cpp/profileInfo.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..284d33a52ba8cb44d59490f16128ec42bca810a0
--- /dev/null
+++ b/com.amd.aparapi.jni/src/cpp/profileInfo.cpp
@@ -0,0 +1,60 @@
+/*
+   Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+   following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+   disclaimer. 
+
+   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided with the distribution. 
+
+   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission. 
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+   laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 
+   through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000.  Further, pursuant to Section 740.6 of
+   the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce
+   Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export 
+   Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in 
+   Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) 
+   export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced
+   direct product is subject to national security controls as identified on the Commerce Control List (currently 
+   found in Supplement 1 to Part 774 of EAR).  For the most current Country Group listings, or for additional 
+   information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry
+   and Security?s website at http://www.bis.doc.gov/. 
+   */
+#define PROFILEINFO_SOURCE
+#include "profileInfo.h"
+
+ProfileInfo::ProfileInfo():
+   valid(false),
+   type(-1), //-1 unknown, 0 write, 1 execute, 2 read
+   name(NULL),
+   queued((cl_ulong)0L),
+   submit((cl_ulong)0L),
+   start((cl_ulong)0L),
+   end((cl_ulong)0L) {
+}
+
+jobject ProfileInfo::createProfileInfoInstance(JNIEnv *jenv){
+   jobject profileInstance = JNIHelper::createInstance(jenv, ProfileInfoClass , ArgsVoidReturn(StringClassArg IntArg LongArg LongArg LongArg LongArg), 
+         ((jstring)(name==NULL?NULL:jenv->NewStringUTF(name))),
+         ((jint)type), 
+         ((jlong)start),
+         ((jlong)end),
+         ((jlong)queued),
+         ((jlong)submit));
+   return(profileInstance);
+}
diff --git a/com.amd.aparapi.jni/src/cpp/profileInfo.h b/com.amd.aparapi.jni/src/cpp/profileInfo.h
new file mode 100644
index 0000000000000000000000000000000000000000..a100922b51f1becc79549cdda0321060a4a19e96
--- /dev/null
+++ b/com.amd.aparapi.jni/src/cpp/profileInfo.h
@@ -0,0 +1,56 @@
+/*
+   Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+   following conditions are met:
+
+   Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+   disclaimer. 
+
+   Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+   disclaimer in the documentation and/or other materials provided with the distribution. 
+
+   Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission. 
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+   WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+   laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 
+   through 774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000.  Further, pursuant to Section 740.6 of
+   the EAR, you hereby certify that, except pursuant to a license granted by the United States Department of Commerce
+   Bureau of Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export 
+   Administration Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in 
+   Country Groups D:1, E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) 
+   export to Country Groups D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced
+   direct product is subject to national security controls as identified on the Commerce Control List (currently 
+   found in Supplement 1 to Part 774 of EAR).  For the most current Country Group listings, or for additional 
+   information about the EAR or your obligations under those regulations, please refer to the U.S. Bureau of Industry
+   and Security?s website at http://www.bis.doc.gov/. 
+   */
+#ifndef PROFILEINFO_H
+#define PROFILEINFO_H
+#include "common.h"
+#include "jniHelper.h"
+
+class ProfileInfo{
+   public:
+      jboolean valid;
+      jint type; //0 write, 1 execute, 2 read
+      char *name;
+      cl_ulong queued;
+      cl_ulong submit;
+      cl_ulong start;
+      cl_ulong end;
+      ProfileInfo();
+      jobject createProfileInfoInstance(JNIEnv *jenv);
+};
+
+#endif
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
index 9436bff005e0a10d1520fa5d47f27bb073168144..06d0399360659b0a902d59424ea1917f7bfc2da4 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
@@ -41,6 +41,8 @@ import java.util.logging.Handler;
 import java.util.logging.Level;
 import java.util.logging.Logger;
 
+import com.amd.aparapi.KernelRunner.UsedByJNICode;
+
 /**
  * A central location for holding all runtime configurable properties as well as logging configuration.
  * 
@@ -71,7 +73,7 @@ class Config{
     *  Usage -Dcom.amd.aparapi.enableProfiling={true|false}
     *  
     */
-   static final boolean enableProfiling = Boolean.getBoolean(propPkgName + ".enableProfiling");
+   @UsedByJNICode static final boolean enableProfiling = Boolean.getBoolean(propPkgName + ".enableProfiling");
 
    /**
     * Allows the user to turn on OpenCL profiling for the JNI/OpenCL layer, this information will be written to CSV file
@@ -79,7 +81,7 @@ class Config{
     *  Usage -Dcom.amd.aparapi.enableProfiling={true|false}
     *  
     */
-   static final boolean enableProfilingCSV = Boolean.getBoolean(propPkgName + ".enableProfilingCSV");
+   @UsedByJNICode static final boolean enableProfilingCSV = Boolean.getBoolean(propPkgName + ".enableProfilingCSV");
 
    /**
     * Allows the user to request that verbose JNI messages be dumped to stderr.
@@ -87,7 +89,7 @@ class Config{
     *  Usage -Dcom.amd.aparapi.enableVerboseJNI={true|false}
     *  
     */
-   static final boolean enableVerboseJNI = Boolean.getBoolean(propPkgName + ".enableVerboseJNI");
+   @UsedByJNICode static final boolean enableVerboseJNI = Boolean.getBoolean(propPkgName + ".enableVerboseJNI");
    
    /**
     * Allows the user to request tracking of opencl resources.  
@@ -97,7 +99,7 @@ class Config{
     *  Usage -Dcom.amd.aparapi.enableOpenCLResourceTracking={true|false}
     *  
     */
-   static final boolean enableVerboseJNIOpenCLResourceTracking = Boolean.getBoolean(propPkgName + ".enableVerboseJNIOpenCLResourceTracking");
+   @UsedByJNICode static final boolean enableVerboseJNIOpenCLResourceTracking = Boolean.getBoolean(propPkgName + ".enableVerboseJNIOpenCLResourceTracking");
    /**
     * Allows the user to request that the execution mode of each kernel invocation be reported to stdout.
     * 
@@ -179,10 +181,7 @@ class Config{
          e.printStackTrace();
       }
 
-      if (dumpFlags) {
-         logger.fine("executionMode = " + executionMode);
-
-      }
+    
    };
 
    public interface InstructionListener{
@@ -213,6 +212,21 @@ class Config{
             e.printStackTrace();
          }
       }
+      if (dumpFlags) {
+         
+         System.out.println(propPkgName+".executionMode{GPU|CPU|JTP|SEQ}="+executionMode);
+         System.out.println(propPkgName+".logLevel{OFF|FINEST|FINER|FINE|WARNING|SEVERE|ALL}="+logger.getLevel());
+         System.out.println(propPkgName+".enableProfiling{true|false}="+enableProfiling);
+         System.out.println(propPkgName+".enableProfilingCSV{true|false}="+enableProfilingCSV);
+         System.out.println(propPkgName+".enableVerboseJNI{true|false}="+enableVerboseJNI);
+         System.out.println(propPkgName+".enableVerboseJNIOpenCLResourceTracking{true|false}="+enableVerboseJNIOpenCLResourceTracking);
+         System.out.println(propPkgName+".enableShowGeneratedOpenCL{true|false}="+enableShowGeneratedOpenCL);
+         System.out.println(propPkgName+".enableExecutionModeReporting{true|false}="+enableExecutionModeReporting);
+         System.out.println(propPkgName+".enableInstructionDecodeViewer{true|false}="+enableInstructionDecodeViewer);
+         System.out.println(propPkgName+".instructionListenerClassName{<class name which extends com.amd.aparapi.Config.InstructionListener>}="+instructionListenerClassName);
+       
+
+      }
    }
 
 }
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java
index fc4bdc9fbc59f3af6390309903e1305c5c4ece1c..75e5a9731f7c2c9e0eafe0cb07eb393c8b5d936c 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/KernelRunner.java
@@ -321,6 +321,17 @@ class KernelRunner{
     * @author rlamothe
     */
    @UsedByJNICode public static final int ARG_CHAR = 1 << 21;
+   
+   /**
+    * This 'bit' indicates that a particular <code>KernelArg</code> represents a <code>static</code> field (array or primitive).
+    * 
+    * 
+    * @see com.amd.aparapi.annotations.UsedByJNICode
+    * @see com.amd.aparapi.KernelRunner.KernelArg
+    * 
+    * @author gfrost
+    */
+   @UsedByJNICode public static final int ARG_STATIC = 1 << 22;
 
    static final String CL_KHR_FP64 = "cl_khr_fp64";
 
@@ -354,7 +365,7 @@ class KernelRunner{
     * 
     * @author gfrost
     */
-   @UsedByJNICode public static final int JNI_FLAG_ENABLE_PROFILING = 1 << 0;
+   //@UsedByJNICode public static final int JNI_FLAG_ENABLE_PROFILING = 1 << 0;
 
    /**
     * This 'bit' indicates that we wish to store profiling information in a CSV file from JNI code.
@@ -364,7 +375,7 @@ class KernelRunner{
     * 
     * @author gfrost
     */
-   @UsedByJNICode public static final int JNI_FLAG_ENABLE_PROFILING_CSV = 1 << 1;
+  // @UsedByJNICode public static final int JNI_FLAG_ENABLE_PROFILING_CSV = 1 << 1;
 
    /**
     * This 'bit' indicates that we want to execute on the GPU.
@@ -388,7 +399,7 @@ class KernelRunner{
     * @author gfrost
     */
 
-   @UsedByJNICode public static final int JNI_FLAG_ENABLE_VERBOSE_JNI = 1 << 3;
+  // @UsedByJNICode public static final int JNI_FLAG_ENABLE_VERBOSE_JNI = 1 << 3;
    
    /**
     * This 'bit' indicates that we wish to enable OpenCL resource tracking by JNI layer to be written to stderr.<br/>
@@ -399,7 +410,7 @@ class KernelRunner{
     * @author gfrost
     */
 
-   @UsedByJNICode @Annotations.Experimental public static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4;
+ //  @UsedByJNICode @Annotations.Experimental public static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4;
 
 
    /**
@@ -441,11 +452,7 @@ class KernelRunner{
        */
       @UsedByJNICode public int type;
 
-      /**
-       * True if the field is static
-       */
-      @UsedByJNICode public boolean isStatic;
-
+     
       /**
        * Name of the field
        */
@@ -1391,10 +1398,10 @@ class KernelRunner{
                   }
                }
 
-               jniFlags |= (Config.enableProfiling ? JNI_FLAG_ENABLE_PROFILING : 0);
-               jniFlags |= (Config.enableProfilingCSV ? JNI_FLAG_ENABLE_PROFILING_CSV | JNI_FLAG_ENABLE_PROFILING : 0);
-               jniFlags |= (Config.enableVerboseJNI ? JNI_FLAG_ENABLE_VERBOSE_JNI : 0);
-               jniFlags |= (Config.enableVerboseJNIOpenCLResourceTracking ? JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING :0);
+             //  jniFlags |= (Config.enableProfiling ? JNI_FLAG_ENABLE_PROFILING : 0);
+             //  jniFlags |= (Config.enableProfilingCSV ? JNI_FLAG_ENABLE_PROFILING_CSV | JNI_FLAG_ENABLE_PROFILING : 0);
+             //  jniFlags |= (Config.enableVerboseJNI ? JNI_FLAG_ENABLE_VERBOSE_JNI : 0);
+             // jniFlags |= (Config.enableVerboseJNIOpenCLResourceTracking ? JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING :0);
                // jniFlags |= (kernel.getExecutionMode().equals(EXECUTION_MODE.GPU) ? JNI_FLAG_USE_GPU : 0);
                // Init the device to check capabilities before emitting the
                // code that requires the capabilities.
@@ -1461,7 +1468,10 @@ class KernelRunner{
                      args[i] = new KernelArg();
                      args[i].name = field.getName();
                      args[i].field = field;
-                     args[i].isStatic = (field.getModifiers() & Modifier.STATIC) == Modifier.STATIC;
+                     if ((field.getModifiers() & Modifier.STATIC)== Modifier.STATIC){
+                        args[i].type |= ARG_STATIC;
+                     }
+                  
                      Class<?> type = field.getType();
                      if (type.isArray()) {