diff --git a/CHANGELOG.md b/CHANGELOG.md
index 599aa2ec9a9fabaf0a91bb9ecb761978dbb30037..026e6081ebbe0bda18250544a150815ef2333f6d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@ jni# Aparapi jni Changelog
 ## 1.3.2
 
 * Fixed local arrays handling 1D and ND, to cope with arrays resizing across kernel executions
+* Fixed aparapi now supports efficient execution on discrete GPU and other devices with dedicated memory
 
 ## 1.3.1
 
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index c3c9e4cd4ad59f1c273ed1a20d690f543d45a4d4..5e51ce3d782b9391de612ef8581af0d37d2df919 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -28,4 +28,5 @@ Below are some of the specific details of various contributions.
 * Paul Miner issue #61 and #115 (JTP Speed up and fixes to explicit puts) June 13th 2013
 & lgalluci for his fix for issue #121 (incorrect toString for 3D ranges) July 6th 2013
 * Luis Mendes Issue #51 JVM crash when using multi-dimensional local arrays (refs #51)
-* Luis Mendes submitted local arrays handling 1D and ND, to cope with arrays resizing across kernel executions
\ No newline at end of file
+* Luis Mendes submitted local arrays handling 1D and ND, to cope with arrays resizing across kernel executions
+* Luis Mendes submitted #107 aparapi now supports efficient execution on discrete GPU and other devices
\ No newline at end of file
diff --git a/src/cpp/JNIHelper.h b/src/cpp/JNIHelper.h
index d8253c193b9d44908630f376288a700341def1b7..7166fb9f9e73c8e1bb850646db2b1c35eb5977ac 100644
--- a/src/cpp/JNIHelper.h
+++ b/src/cpp/JNIHelper.h
@@ -232,6 +232,11 @@ class JNIHelper {
           return getInstanceField<jT>(jenv, instance, fieldName, getSignature((jT)0));
       }
 
+      template<typename jT>
+      static jT getInstanceFieldWithException(JNIEnv *jenv, jobject instance, const char *fieldName) {
+          return getInstanceFieldWithException<jT>(jenv, instance, fieldName, getSignature((jT)0));
+      }
+
       template<typename jT>
       static jT getInstanceField(JNIEnv *jenv, jobject instance, const char *fieldName, const char *signature) {
          jT value = (jT)0;
@@ -254,6 +259,25 @@ class JNIHelper {
          return(value);
       }
 
+      template<typename jT>
+      static jT getInstanceFieldWithException(JNIEnv *jenv, jobject instance, const char *fieldName, const char *signature) {
+         jT value = (jT)0;
+         try {
+            jclass theClass = jenv->GetObjectClass(instance);
+            if (theClass == NULL ||  jenv->ExceptionCheck())
+               throw "bummer! getting class from instance\n";
+            jfieldID fieldId = jenv->GetFieldID(theClass,fieldName, signature);
+            if (fieldId == NULL || jenv->ExceptionCheck())
+               throw std::string("bummer getting ") + getType(value) + "field '" + fieldName + "' \n";
+            getField(jenv, instance, fieldId, &value);
+            if (jenv->ExceptionCheck())
+               throw std::string("bummer getting ") + getType(value) + "field '" + fieldName + "' \n";
+         } catch(std::string& se) {
+            jenv->ExceptionClear();
+            throw se;
+         }
+         return(value);
+      }
 
       static jfieldID GetFieldID(JNIEnv* jenv, jclass c, const char* name, const char* type) {
          jfieldID field = jenv->GetFieldID(c, name, type);
diff --git a/src/cpp/invoke/OpenCLJNI.cpp b/src/cpp/invoke/OpenCLJNI.cpp
index d72794ac0a37a7ee08421f260b8cf4703816425b..00127b215c0238fb59b9ec0dced3d8fbd1484551 100644
--- a/src/cpp/invoke/OpenCLJNI.cpp
+++ b/src/cpp/invoke/OpenCLJNI.cpp
@@ -68,6 +68,21 @@
 jobject OpenCLDevice::getPlatformInstance(JNIEnv *jenv, jobject deviceInstance){
    return(JNIHelper::getInstanceField<jobject>(jenv, deviceInstance, "platform", OpenCLPlatformClassArg ));
 }
+
+bool OpenCLDevice::isSharedMemory(JNIEnv *jenv, jobject deviceInstance) {
+    try {
+        jboolean value = (JNIHelper::getInstanceFieldWithException<jboolean>(jenv, deviceInstance, "sharedMemory"));
+        if (value != 0) {
+            return true;
+        }
+    } catch (std::string &se) {
+        //For backwards compatibility with older Aparapi versions.
+        fprintf(stderr, "Property sharedMemory not found for class OpenCLDevice, using default: true\n");
+        return true;
+    }
+    return false;
+}
+
 cl_device_id OpenCLDevice::getDeviceId(JNIEnv *jenv, jobject deviceInstance){
    return((cl_device_id)JNIHelper::getInstanceField<jlong>(jenv, deviceInstance, "deviceId"));
 }
diff --git a/src/cpp/invoke/OpenCLJNI.h b/src/cpp/invoke/OpenCLJNI.h
index 2d24a6e0e53ad13ae522f95a64c89210895e1b42..eb24ad9f130c2b8602139124369d4ce16fde6a03 100644
--- a/src/cpp/invoke/OpenCLJNI.h
+++ b/src/cpp/invoke/OpenCLJNI.h
@@ -63,6 +63,7 @@
 
 class OpenCLDevice{
    public:
+      static bool isSharedMemory(JNIEnv *jenv, jobject deviceInstance);
       static jobject getPlatformInstance(JNIEnv *jenv, jobject deviceInstance);
       static cl_device_id getDeviceId(JNIEnv *jenv, jobject deviceInstance);
 };
diff --git a/src/cpp/runKernel/Aparapi.cpp b/src/cpp/runKernel/Aparapi.cpp
index c346271d2912e88d11a844fb29cd94d91791a82f..02d967620ae5982dc488becd453553f145b97908 100644
--- a/src/cpp/runKernel/Aparapi.cpp
+++ b/src/cpp/runKernel/Aparapi.cpp
@@ -365,7 +365,12 @@ void updateArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argP
    cl_int status = CL_SUCCESS;
    // if either this is the first run or user changed input array
    // or gc moved something, then we create buffers/args
-   cl_uint mask = CL_MEM_USE_HOST_PTR;
+   cl_uint mask = 0;
+   if (jniContext->isSharedMemory()) {
+       mask |= CL_MEM_USE_HOST_PTR;
+   } else {
+       mask |= CL_MEM_COPY_HOST_PTR;
+   }
    if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE;
    else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY;
    else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY;
@@ -373,7 +378,7 @@ void updateArray(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& argP
 
    arg->arrayBuffer->syncMinimalParams(jenv, arg);
    if (config->isVerbose()) {
-      strcpy(arg->arrayBuffer->memSpec,"CL_MEM_USE_HOST_PTR");
+      strcpy(arg->arrayBuffer->memSpec, (mask & CL_MEM_COPY_HOST_PTR) != 0 ? "CL_MEM_COPY_HOST_PTR" : "CL_MEM_USE_HOST_PTR");
       if (mask & CL_MEM_READ_WRITE) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_READ_WRITE");
       if (mask & CL_MEM_READ_ONLY) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_READ_ONLY");
       if (mask & CL_MEM_WRITE_ONLY) strcat(arg->arrayBuffer->memSpec,"|CL_MEM_WRITE_ONLY");
@@ -411,12 +416,33 @@ void updateBuffer(JNIEnv* jenv, JNIContext* jniContext, KernelArg* arg, int& arg
 
    AparapiBuffer* buffer = arg->aparapiBuffer;
    cl_int status = CL_SUCCESS;
-   cl_uint mask = CL_MEM_USE_HOST_PTR;
+   cl_uint mask = 0;
+   if (jniContext->isSharedMemory()) {
+       mask |= CL_MEM_USE_HOST_PTR;
+   } else {
+       mask |= CL_MEM_COPY_HOST_PTR;
+   }
    if (arg->isReadByKernel() && arg->isMutableByKernel()) mask |= CL_MEM_READ_WRITE;
    else if (arg->isReadByKernel() && !arg->isMutableByKernel()) mask |= CL_MEM_READ_ONLY;
    else if (arg->isMutableByKernel()) mask |= CL_MEM_WRITE_ONLY;
    buffer->memMask = mask;
 
+   if (config->isVerbose()) {
+        std::string str = (mask & CL_MEM_COPY_HOST_PTR) != 0 ? "CL_MEM_COPY_HOST_PTR" : "CL_MEM_USE_HOST_PTR";
+        if (mask & CL_MEM_READ_WRITE) {
+            str += "|CL_MEM_READ_WRITE";
+        }
+        if (mask & CL_MEM_READ_ONLY) {
+            str += "|CL_MEM_READ_ONLY";
+        }
+        if (mask & CL_MEM_WRITE_ONLY) {
+            str += "|CL_MEM_WRITE_ONLY";
+        }
+
+        fprintf(stderr, "%s %d clCreateBuffer(context, %s, size=%08lx bytes, address=%p, &status)\n", arg->name,
+              argIdx, str.c_str(), (unsigned long)buffer->lengthInBytes, buffer->data);
+   }
+
    buffer->mem = clCreateBuffer(jniContext->context, buffer->memMask,
          buffer->lengthInBytes, buffer->data, &status);
 
diff --git a/src/cpp/runKernel/JNIContext.cpp b/src/cpp/runKernel/JNIContext.cpp
index ff9f875cfa2a8784c36f467feb173d47defd2dd4..363f388aa5f7afb43d9c0b0375defcd06e8d3689 100644
--- a/src/cpp/runKernel/JNIContext.cpp
+++ b/src/cpp/runKernel/JNIContext.cpp
@@ -34,6 +34,7 @@ JNIContext::JNIContext(JNIEnv *jenv, jobject _kernelObject, jobject _openCLDevic
    jobject platformInstance = OpenCLDevice::getPlatformInstance(jenv, openCLDeviceObject);
    cl_platform_id platformId = OpenCLPlatform::getPlatformId(jenv, platformInstance);
    deviceId = OpenCLDevice::getDeviceId(jenv, openCLDeviceObject);
+   sharedMemory = OpenCLDevice::isSharedMemory(jenv, openCLDeviceObject);
    cl_device_type returnedDeviceType;
    clGetDeviceInfo(deviceId, CL_DEVICE_TYPE,  sizeof(returnedDeviceType), &returnedDeviceType, NULL);
    //fprintf(stderr, "device[%p] CL_DEVICE_TYPE = %x\n", deviceId, returnedDeviceType);
diff --git a/src/cpp/runKernel/JNIContext.h b/src/cpp/runKernel/JNIContext.h
index c76e864ef99cab54b0b8b30dee13690bdd39239b..cbf8f53140cb4c0175c2c820927d6a69900e1972 100644
--- a/src/cpp/runKernel/JNIContext.h
+++ b/src/cpp/runKernel/JNIContext.h
@@ -25,6 +25,7 @@
 
 class JNIContext {
 private:
+   bool sharedMemory;
    jint flags;
    jboolean valid;
 public:
@@ -75,6 +76,10 @@ public:
       return((flags&com_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC)==com_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC?JNI_TRUE:JNI_FALSE);
    }
 
+   bool isSharedMemory() {
+       return sharedMemory;
+   }
+
    ~JNIContext(){
    }