diff --git a/com.amd.aparapi.jni/build.xml b/com.amd.aparapi.jni/build.xml
index 0ea10815afe0cd76afee85172bc42436ad1b4966..035bc928bbeb0a01880f7613fa7cc47b23c6534c 100644
--- a/com.amd.aparapi.jni/build.xml
+++ b/com.amd.aparapi.jni/build.xml
@@ -73,6 +73,55 @@ First consider editing the properties in build.properties
 
       <echo message="amd.app.sdk.dir ${amd.app.sdk.dir}"/>
 
+      <available property="linux.intel.app.sdk.exists" file="/opt/intel/opencl" type="dir"/>
+      <condition property="intel.app.sdk.dir" value="/opt/intel/opencl">
+         <and>
+            <os family="unix" />
+            <not>
+               <os family="mac" />
+            </not>
+            <isset property="linux.intel.app.sdk.exists" />
+            <not>
+               <isset property="win32.amd.app.sdk.exists" />
+            </not>
+            <not>
+               <isset property="win64.amd.app.sdk.exists" />
+            </not>
+         </and>
+      </condition>
+
+      <echo message=" intel.app.sdk.dir ${intel.app.sdk.dir}"/>
+
+      <condition property="vendor.name" value="amd">
+         <isset property="amd.app.sdk.dir" /> 
+      </condition>
+
+      <condition property="vendor.name" value="intel">
+         <and>
+            <isset property="intel.app.sdk.dir" /> 
+            <not>
+                <isset property="amd.app.sdk.dir" /> 
+            </not>
+         </and>
+      </condition>
+
+      <echo message=" vendor.name ${vendor.name}"/>
+  
+      <condition property="app.sdk.dir" value="${amd.app.sdk.dir}">
+         <isset property="amd.app.sdk.dir" /> 
+      </condition>
+
+      <condition property="app.sdk.dir" value="${intel.app.sdk.dir}">
+         <and>
+            <isset property="intel.app.sdk.dir" /> 
+            <not>
+                <isset property="app.sdk.dir" /> 
+            </not>
+         </and>
+      </condition>
+
+      <echo message="app.sdk.dir ${app.sdk.dir}"/>
+
 	  <!-- Check for Visual Studio Compiler -->
 	  <!-- This needs to be in descending order to properly handle multiple installations -->
 	  <available property="msvc.32.12.0.exists" file="C:/Program Files/Microsoft Visual Studio 12.0\VC\bin" type="dir"/>
@@ -321,6 +370,12 @@ First consider editing the properties in build.properties
       <condition property="optional.x64.subdir" value="" else="x64\">
         <equals arg1="${x86_or_x86_64}" arg2="x86"/>
       </condition>
+
+
+      <condition property="optional.app.sdk.lib.subdir" value="lib\" else="lib64\">
+        <equals arg1="${x86_or_x86_64}" arg2="x86"/>
+
+     </condition>
       
       <condition property="gcc.m.value" value="32" else="64">
         <equals arg1="${x86_or_x86_64}" arg2="x86"/>
@@ -352,18 +407,18 @@ First consider editing the properties in build.properties
                   <os family="mac" />
                </not>
                <not>
-                  <isset property="amd.app.sdk.dir" />
+                  <isset property="app.sdk.dir" />
                </not>
             </and>
          </condition>
          <![CDATA[
          You will need to edit com.amd.aparapi.jni/build.xml to compile aparapi JNI code
 
-         You need to set amd.app.sdk.dir to point to the location where AMD APP SDK is installed
+         You need to set app.sdk.dir to point to the location where OpenCL SDK is installed
          ]]>
       </fail>
       
-      <available file="${amd.app.sdk.dir}" type="dir" property="amd.app.sdk.dir.exists" />
+      <available file="${app.sdk.dir}" type="dir" property="app.sdk.dir.exists" />
 
       <fail message="Error:">
          <condition>
@@ -372,14 +427,14 @@ First consider editing the properties in build.properties
                   <os family="mac" />
                </not>
                <not>
-                  <isset property="amd.app.sdk.dir.exists" />
+                  <isset property="app.sdk.dir.exists" />
                </not>
             </and>
          </condition>
          <![CDATA[
          You will need to edit com.amd.aparapi.jni/build.xml to compile aparapi JNI code
 
-         At present amd.app.sdk.dir is set (to ${amd.app.sdk.dir}) but that dir does not exist
+         At present app.sdk.dir is set (to ${app.sdk.dir}) but that dir does not exist
          ]]>
       </fail>
    </target>
@@ -458,7 +513,7 @@ First consider editing the properties in build.properties
          <arg value="-I${java.home}/../include" />
          <arg value="-I${java.home}/../include/linux" />
          <arg value="-Iinclude" />
-         <arg value="-I${amd.app.sdk.dir}/include" />
+         <arg value="-I${app.sdk.dir}/include" />
          <arg value="-Isrc/cpp" />
          <arg value="-Isrc/cpp/runKernel" />
          <arg value="-Isrc/cpp/invoke" />
@@ -480,7 +535,8 @@ First consider editing the properties in build.properties
          <arg value="src/cpp/classtools.cpp" />
          <arg value="src/cpp/JNIHelper.cpp" />
          <arg value="src/cpp/agent.cpp" />
-         <arg value="-L${amd.app.sdk.dir}/lib/${x86_or_x86_64}" />
+         <arg value="-L${app.sdk.dir}/lib/${x86_or_x86_64}" />
+         <arg value="-L${app.sdk.dir}/${optional.app.sdk.lib.subdir}" />
          <arg value="-lOpenCL" />
       </exec>
    </target>
@@ -543,7 +599,7 @@ First consider editing the properties in build.properties
          <arg value="/I${java.home}\..\include" />
          <arg value="/I${java.home}\..\include\win32" />
          <arg value="/Iinclude" />
-	     <arg value="/I${amd.app.sdk.dir}\include" />
+	     <arg value="/I${app.sdk.dir}\include" />
          <arg value="/IC:\Program Files (x86)\Windows Kits\8.0\Include\shared" />
          <arg value="/IC:\Program Files (x86)\Windows Kits\8.0\Include\um" />
          <arg value="/Isrc/cpp" />
@@ -568,7 +624,8 @@ First consider editing the properties in build.properties
          <arg value="/link" />
          <arg value="/libpath:${msvc.dir}\vc\lib\${optional.amd64.subdir}" />
          <arg value="/libpath:${msvc.sdk.dir}\lib\${optional.x64.subdir}" />
-	     <arg value="/libpath:${amd.app.sdk.dir}\lib\${x86_or_x86_64}" />
+	 <arg value="/libpath:${app.sdk.dir}\lib\${x86_or_x86_64}" />
+	 <arg value="/libpath:${app.sdk.dir}\${optional.app.sdk.lib.subdir}" />
          <arg value="/libpath:C:\Program Files (x86)\Windows Kits\8.0\Lib\win8\um\x64" />
          <arg value="OpenCL.lib" />
          <arg value="/out:${basedir}\dist\aparapi_${x86_or_x86_64}.dll" />
@@ -588,7 +645,7 @@ First consider editing the properties in build.properties
          <arg value="-DCL_USE_DEPRECATED_OPENCL_1_1_APIS"/>
          <arg value="/I${msvc.dir}\vc\include" />
          <arg value="/I${msvc.sdk.dir}\include" />
-         <arg value="/I${amd.app.sdk.dir}\include" />
+         <arg value="/I${app.sdk.dir}\include" />
          <arg value="/Isrc/cpp" />
          <arg value="/Isrc/cpp/runKernel" />
          <arg value="/Isrc/cpp/invoke" />
@@ -596,7 +653,8 @@ First consider editing the properties in build.properties
          <arg value="/link" />
          <arg value="/libpath:${msvc.dir}\vc\lib\${optional.amd64.subdir}" />
          <arg value="/libpath:${msvc.sdk.dir}\lib\${optional.x64.subdir}" />
-         <arg value="/libpath:${amd.app.sdk.dir}\lib\${x86_or_x86_64}" />
+         <arg value="/libpath:${app.sdk.dir}\lib\${x86_or_x86_64}" />
+         <arg value="/libpath:${app.sdk.dir}\${optional.app.sdk.lib.subdir}" />
          <arg value="OpenCL.lib" />
          <arg value="/out:${basedir}/dist/cltest_${x86_or_x86_64}.exe" />
       </exec>
@@ -637,9 +695,10 @@ First consider editing the properties in build.properties
          <arg value="-DCL_USE_DEPRECATED_OPENCL_1_1_APIS"/>
          <arg value="-I${java.home}/../include" />
          <arg value="-I${java.home}/../include/linux" />
-         <arg value="-I${amd.app.sdk.dir}/include" />
+         <arg value="-I${app.sdk.dir}/include" />
          <arg value="src/cpp/cltest.cpp" />
-         <arg value="-L${amd.app.sdk.dir}/lib/${x86_or_x86_64}" />
+         <arg value="-L${app.sdk.dir}/lib/${x86_or_x86_64}" />
+         <arg value="-L${app.sdk.dir}/${optional.app.sdk.lib.subdir}" />
          <arg value="-lOpenCL" />
          <arg value="-o" />
          <arg value="${basedir}/cltest_${x86_or_x86_64}" />
diff --git a/com.amd.aparapi.jni/src/cpp/cltest.cpp b/com.amd.aparapi.jni/src/cpp/cltest.cpp
index 9604dbeb9e44ea0677b44887185810f4193b04a4..9e7c90b8ef7cc9346ca16049a683c7bcd8f6a04c 100644
--- a/com.amd.aparapi.jni/src/cpp/cltest.cpp
+++ b/com.amd.aparapi.jni/src/cpp/cltest.cpp
@@ -126,7 +126,7 @@ int main(int argc, char **argv){
       fprintf(stderr, "   CL_PLATFORM_VERSION.\"%s\"\n", platformVersionName); 
       fprintf(stderr, "   CL_PLATFORM_NAME....\"%s\"\n", platformName); 
       cl_uint deviceIdc;
-      cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ;
+      cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR;
       status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc);
       fprintf(stderr, "   Platform %d has %d device%s{\n", platformIdx, deviceIdc, ((deviceIdc==1)?"":"s"));
       if (status == CL_SUCCESS && deviceIdc >0 ){
diff --git a/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp b/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
index f35bcd12bbaf4a23f4f5cc015e36c945a715c342..ccfa62bfbae9254f8821dac1fa436380efeb6695 100644
--- a/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
+++ b/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
@@ -461,7 +461,7 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
                JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance);
 
                cl_uint deviceIdc;
-               cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ;
+               cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR;
                status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc);
                if (status == CL_SUCCESS && deviceIdc > 0 ){
                   cl_device_id* deviceIds = new cl_device_id[deviceIdc];
@@ -489,7 +489,8 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
                         }
                         if (deviceType & CL_DEVICE_TYPE_ACCELERATOR) {
                            deviceType &= ~CL_DEVICE_TYPE_ACCELERATOR;
-                           fprintf(stderr, "Accelerator ");
+                           //fprintf(stderr, "Accelerator ");
+                           deviceTypeEnumInstance = JNIHelper::getStaticFieldObject(jenv, DeviceTypeClass, "ACC", DeviceTypeClassArg);
                         }
                         //fprintf(stderr, "(0x%llx) ", deviceType);
                         //fprintf(stderr, "\n");
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp
index db9a5cfa498bf8a51b54915f52bf8ad7a310cc35..1b13151963aa66872aaefb2e630b808a43837129 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.cpp
@@ -13,6 +13,8 @@ JNIContext::JNIContext(JNIEnv *jenv, jobject _kernelObject, jobject _openCLDevic
       deviceType(((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)?CL_DEVICE_TYPE_GPU:CL_DEVICE_TYPE_CPU),
       profileFile(NULL), 
       valid(JNI_FALSE){
+   if (flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC)
+      deviceType = CL_DEVICE_TYPE_ACCELERATOR;
    cl_int status = CL_SUCCESS;
    jobject platformInstance = OpenCLDevice::getPlatformInstance(jenv, openCLDeviceObject);
    cl_platform_id platformId = OpenCLPlatform::getPlatformId(jenv, platformInstance);
diff --git a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h
index e22c5ff418b446c83ab6730b089240803e2d1370..aebad48a54ef7767be8694fca9165a4c03b47cdd 100644
--- a/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h
+++ b/com.amd.aparapi.jni/src/cpp/runKernel/JNIContext.h
@@ -51,6 +51,12 @@ public:
       return((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU?JNI_TRUE:JNI_FALSE);
    }
 
+   jboolean isUsingACC(){
+      //I'm pretty sure that this is equivalend to:
+      //return flags & com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC;
+      return((flags&com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC)==com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_ACC?JNI_TRUE:JNI_FALSE);
+   }
+
    ~JNIContext(){
    }
 
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
index 8f9df4d32b367539422d6c4e586c7f4edb256046..339ee89e9e482130aa26c93cd1904f72d3026460 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
@@ -78,7 +78,7 @@ public class Config extends ConfigJNI{
    /**
     * Allows the user to request a specific Kernel.EXECUTION_MODE enum value for all Kernels.
     *
-    *  Usage -Dcom.amd.aparapi.executionMode={SEQ|JTP|CPU|GPU}
+    *  Usage -Dcom.amd.aparapi.executionMode={SEQ|JTP|CPU|GPU|ACC}
     *  
     *  @see com.amd.aparapi.Kernel.EXECUTION_MODE
     */
@@ -188,7 +188,7 @@ public class Config extends ConfigJNI{
       }
 
       if (dumpFlags) {
-         System.out.println(propPkgName + ".executionMode{GPU|CPU|JTP|SEQ}=" + executionMode);
+         System.out.println(propPkgName + ".executionMode{GPU|ACC|CPU|JTP|SEQ}=" + executionMode);
          System.out.println(propPkgName + ".logLevel{OFF|FINEST|FINER|FINE|WARNING|SEVERE|ALL}=" + logger.getLevel());
          System.out.println(propPkgName + ".enableProfiling{true|false}=" + enableProfiling);
          System.out.println(propPkgName + ".enableProfilingCSV{true|false}=" + enableProfilingCSV);
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
index 93c4cc4324b30e39a22c7c8e1e6b9bf857a4326b..f09dfb892d6a50ef594b53235dbbc1e21d493f5a 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
@@ -306,11 +306,12 @@ public abstract class Kernel implements Cloneable {
     * determine how it executed.  
     *    
     * <p>
-    * Aparapi supports 4 execution modes. 
+    * Aparapi supports 5 execution modes. Default is GPU. 
     * <ul>
     * <table>
     * <tr><th align="left">Enum value</th><th align="left">Execution</th></tr>
     * <tr><td><code><b>GPU</b></code></td><td>Execute using OpenCL on first available GPU device</td></tr>
+    * <tr><td><code><b>ACC</b></code></td><td>Execute using OpenCL on first available Accelerator device</td></tr>
     * <tr><td><code><b>CPU</b></code></td><td>Execute using OpenCL on first available CPU device</td></tr>
     * <tr><td><code><b>JTP</b></code></td><td>Execute using a Java Thread Pool (one thread spawned per available core)</td></tr>
     * <tr><td><code><b>SEQ</b></code></td><td>Execute using a single loop. This is useful for debugging but will be less 
@@ -329,7 +330,7 @@ public abstract class Kernel implements Cloneable {
     *     kernel.execute(values.length);
     * </pre></blockquote>
     * <p>
-    * Alternatively, the property <code>com.amd.aparapi.executionMode</code> can be set to one of <code>JTP,GPU,CPU,SEQ</code>
+    * Alternatively, the property <code>com.amd.aparapi.executionMode</code> can be set to one of <code>JTP,GPU,ACC,CPU,SEQ</code>
     * when an application is launched. 
     * <p><blockquote><pre>
     *    java -classpath ....;aparapi.jar -Dcom.amd.aparapi.executionMode=GPU MyApplication  
@@ -369,7 +370,11 @@ public abstract class Kernel implements Cloneable {
        * <p>
        * This is meant to be used for debugging a kernel.
        */
-      SEQ;
+      SEQ,
+      /**
+       * The value representing execution on an accelerator device (Xeon Phi) via OpenCL.
+       */
+      ACC;
 
       static EXECUTION_MODE getDefaultExecutionMode() {
          EXECUTION_MODE defaultExecutionMode = OpenCLLoader.isOpenCLAvailable() ? GPU : JTP;
@@ -446,7 +451,7 @@ public abstract class Kernel implements Cloneable {
 
       static boolean anyOpenCL(LinkedHashSet<EXECUTION_MODE> _executionModes) {
          for (final EXECUTION_MODE mode : _executionModes) {
-            if ((mode == GPU) || (mode == CPU)) {
+            if ((mode == GPU) || (mode == ACC) || (mode == CPU)) {
                return true;
             }
          }
@@ -454,7 +459,7 @@ public abstract class Kernel implements Cloneable {
       }
 
       public boolean isOpenCL() {
-         return (this == GPU) || (this == CPU);
+         return (this == GPU) || (this == ACC) || (this == CPU);
       }
    };
 
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java b/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
index 48b335159971e0a8471dc3a1789e57adc47449bc..a4bfcdeb9d6411ce52e9593e41d2fd9f3294a9eb 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
@@ -11,27 +11,47 @@ public abstract class Device{
       GPU,
       CPU,
       JTP,
-      SEQ
+      SEQ,
+      ACC
    };
 
+   /**
+    * @return Now return the device of any types having the maximum compute units
+    */
    public static Device best() {
       return (OpenCLDevice.select(new DeviceComparitor(){
          @Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) {
-            if (_deviceLhs.getType() != _deviceRhs.getType()) {
-               if (_deviceLhs.getType() == TYPE.GPU) {
-                  return (_deviceLhs);
-               } else {
-                  return (_deviceRhs);
-               }
+            if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) {
+               return (_deviceLhs);
+            } else {
+               return (_deviceRhs);
             }
+         }
+      }));
+   }
 
+   public static Device bestGPU() {
+      return (OpenCLDevice.select(new DeviceComparitor(){
+         @Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) {
             if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) {
                return (_deviceLhs);
             } else {
                return (_deviceRhs);
             }
          }
-      }));
+      }, Device.TYPE.GPU));
+   }
+
+   public static Device bestACC() {
+      return (OpenCLDevice.select(new DeviceComparitor(){
+         @Override public OpenCLDevice select(OpenCLDevice _deviceLhs, OpenCLDevice _deviceRhs) {
+            if (_deviceLhs.getMaxComputeUnits() > _deviceRhs.getMaxComputeUnits()) {
+               return (_deviceLhs);
+            } else {
+               return (_deviceRhs);
+            }
+         }
+      }, Device.TYPE.ACC));
    }
 
    public static Device first(final Device.TYPE _type) {
@@ -51,6 +71,11 @@ public abstract class Device{
 
    }
 
+   public static Device firstACC() {
+      return (first(Device.TYPE.ACC));
+
+   }
+
    protected TYPE type = TYPE.UNKNOWN;
 
    protected int maxWorkGroupSize;
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java b/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java
index 588960586aec5e585d3bcfa9e6f54f8b5e10ce99..61bfe548a2b292191f91de30bc77f74a70a3b615 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/device/OpenCLDevice.java
@@ -448,6 +448,25 @@ public class OpenCLDevice extends Device{
       return (device);
    }
 
+   public static OpenCLDevice select(DeviceComparitor _deviceComparitor, Device.TYPE _type) {
+      OpenCLDevice device = null;
+      final OpenCLPlatform platform = new OpenCLPlatform(0, null, null, null);
+
+      for (final OpenCLPlatform p : platform.getOpenCLPlatforms()) {
+         for (final OpenCLDevice d : p.getOpenCLDevices()) {
+            if (d.getType() != _type) continue;
+            if (device == null) {
+               device = d;
+            } else {
+               device = _deviceComparitor.select(device, d);
+            }
+         }
+      }
+
+      return (device);
+   }
+
+
    @Override public String toString() {
       final StringBuilder s = new StringBuilder("{");
       boolean first = true;
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
index 4520393007f56e94418ed144cbca8da2cf5a2ca5..d34926d2bb73a4bcf6afa629329bebf9e513336b 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
@@ -275,6 +275,18 @@ public abstract class KernelRunnerJNI{
     * @author gfrost
     */
    //  @UsedByJNICode @Annotations.Experimental protected static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4;
+   
+   /**
+    * This 'bit' indicates that we want to execute on the Acceleratr.
+    * 
+    * Be careful changing final constants starting with JNI.<br/>
+    * 
+    * @see com.amd.aparapi.internal.annotation.UsedByJNICode
+    * 
+    * @author ekasit
+    */
+   @UsedByJNICode protected static final int JNI_FLAG_USE_ACC = 1 << 5;
+
 
    /*
     * Native methods
diff --git a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
index cdb6c9d094007713ad2da09f1219b06e5f491682..ad66f9b39f69d8cc77473941971d7efefcb321b5 100644
--- a/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
+++ b/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
@@ -103,6 +103,8 @@ public class KernelRunner extends KernelRunnerJNI{
 
    private int argc;
 
+   private boolean isFallBack = false; // If isFallBack, rebuild the kernel (necessary?)
+
    private static final ForkJoinWorkerThreadFactory lowPriorityThreadFactory = new ForkJoinWorkerThreadFactory(){
       @Override public ForkJoinWorkerThread newThread(ForkJoinPool pool) {
          ForkJoinWorkerThread newThread = ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
@@ -984,6 +986,7 @@ public class KernelRunner extends KernelRunnerJNI{
    }
 
    synchronized private Kernel fallBackAndExecute(String _entrypointName, final Range _range, final int _passes) {
+      isFallBack = true;
       if (kernel.hasNextExecutionMode()) {
          kernel.tryNextExecutionMode();
       } else {
@@ -996,14 +999,14 @@ public class KernelRunner extends KernelRunnerJNI{
    synchronized private Kernel warnFallBackAndExecute(String _entrypointName, final Range _range, final int _passes,
          Exception _exception) {
       if (logger.isLoggable(Level.WARNING)) {
-         logger.warning("Reverting to Java Thread Pool (JTP) for " + kernel.getClass() + ": " + _exception.getMessage());
+         logger.warning("Reverting to the next execution mode for " + kernel.getClass() + ": " + _exception.getMessage());
          _exception.printStackTrace();
       }
       return fallBackAndExecute(_entrypointName, _range, _passes);
    }
 
    synchronized private Kernel warnFallBackAndExecute(String _entrypointName, final Range _range, final int _passes, String _excuse) {
-      logger.warning("Reverting to Java Thread Pool (JTP) for " + kernel.getClass() + ": " + _excuse);
+      logger.warning("Reverting to the next execution mode for " + kernel.getClass() + ": " + _excuse);
       return fallBackAndExecute(_entrypointName, _range, _passes);
    }
 
@@ -1023,12 +1026,14 @@ public class KernelRunner extends KernelRunnerJNI{
          Device device = _range.getDevice();
 
          if ((device == null) || (device instanceof OpenCLDevice)) {
-            if (entryPoint == null) {
-               try {
-                  final ClassModel classModel = ClassModel.createClassModel(kernel.getClass());
-                  entryPoint = classModel.getEntrypoint(_entrypointName, kernel);
-               } catch (final Exception exception) {
-                  return warnFallBackAndExecute(_entrypointName, _range, _passes, exception);
+            if ((entryPoint == null) || (isFallBack)) {
+               if (entryPoint == null) {
+                  try {
+                     final ClassModel classModel = ClassModel.createClassModel(kernel.getClass());
+                     entryPoint = classModel.getEntrypoint(_entrypointName, kernel);
+                  } catch (final Exception exception) {
+                     return warnFallBackAndExecute(_entrypointName, _range, _passes, exception);
+                  }
                }
 
                if ((entryPoint != null) && !entryPoint.shouldFallback()) {
@@ -1042,10 +1047,19 @@ public class KernelRunner extends KernelRunnerJNI{
                      int jniFlags = 0;
                      if (openCLDevice == null) {
                         if (kernel.getExecutionMode().equals(EXECUTION_MODE.GPU)) {
-                           // We used to treat as before by getting first GPU device
-                           // now we get the best GPU
-                           openCLDevice = (OpenCLDevice) OpenCLDevice.best();
+                           // Get the best GPU
+                           openCLDevice = (OpenCLDevice) OpenCLDevice.bestGPU();
                            jniFlags |= JNI_FLAG_USE_GPU; // this flag might be redundant now. 
+                           if (openCLDevice == null) {
+                              return warnFallBackAndExecute(_entrypointName, _range, _passes, "GPU request can't be honored");
+                           }
+                        } else if (kernel.getExecutionMode().equals(EXECUTION_MODE.ACC)) {
+                           // Get the best ACC
+                           openCLDevice = (OpenCLDevice) OpenCLDevice.bestACC();
+                           jniFlags |= JNI_FLAG_USE_ACC; // this flag might be redundant now. 
+                           if (openCLDevice == null) {
+                              return warnFallBackAndExecute(_entrypointName, _range, _passes, "ACC request can't be honored");
+                           }
                         } else {
                            // We fetch the first CPU device 
                            openCLDevice = (OpenCLDevice) OpenCLDevice.firstCPU();
@@ -1054,9 +1068,11 @@ public class KernelRunner extends KernelRunnerJNI{
                                     "CPU request can't be honored not CPU device");
                            }
                         }
-                     } else {
+                     } else { // openCLDevice == null
                         if (openCLDevice.getType() == Device.TYPE.GPU) {
                            jniFlags |= JNI_FLAG_USE_GPU; // this flag might be redundant now. 
+                        } else if (openCLDevice.getType() == Device.TYPE.ACC) {
+                           jniFlags |= JNI_FLAG_USE_ACC; // this flag might be redundant now. 
                         }
                      }
 
@@ -1255,24 +1271,26 @@ public class KernelRunner extends KernelRunnerJNI{
 
                   try {
                      executeOpenCL(_entrypointName, _range, _passes);
+                     isFallBack = false;
                   } catch (final AparapiException e) {
                      warnFallBackAndExecute(_entrypointName, _range, _passes, e);
                   }
-               } else {
+               } else { // (entryPoint != null) && !entryPoint.shouldFallback()
                   warnFallBackAndExecute(_entrypointName, _range, _passes, "failed to locate entrypoint");
                }
-            } else {
+            } else { // (entryPoint == null) || (isFallBack)
                try {
                   executeOpenCL(_entrypointName, _range, _passes);
+                  isFallBack = false;
                } catch (final AparapiException e) {
                   warnFallBackAndExecute(_entrypointName, _range, _passes, e);
                }
             }
-         } else {
+         } else { // (device == null) || (device instanceof OpenCLDevice)
             warnFallBackAndExecute(_entrypointName, _range, _passes,
                   "OpenCL was requested but Device supplied was not an OpenCLDevice");
          }
-      } else {
+      } else { // kernel.getExecutionMode().isOpenCL()
          executeJava(_range, _passes);
       }
 
@@ -1387,14 +1405,16 @@ public class KernelRunner extends KernelRunnerJNI{
     */
    public void get(Object array) {
       if (explicit
-            && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
+            && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU)
+                  || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
          // Only makes sense when we are using OpenCL
          getJNI(jniContextHandle, array);
       }
    }
 
    public List<ProfileInfo> getProfileInfo() {
-      if (((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
+      if (((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel
+            .getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
          // Only makes sense when we are using OpenCL
          return (getProfileInfoJNI(jniContextHandle));
       } else {
@@ -1419,7 +1439,8 @@ public class KernelRunner extends KernelRunnerJNI{
 
    public void put(Object array) {
       if (explicit
-            && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
+            && ((kernel.getExecutionMode() == Kernel.EXECUTION_MODE.GPU)
+                  || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.ACC) || (kernel.getExecutionMode() == Kernel.EXECUTION_MODE.CPU))) {
          // Only makes sense when we are using OpenCL
          puts.add(array);
       }
diff --git a/samples/info/src/com/amd/aparapi/sample/info/Main.java b/samples/info/src/com/amd/aparapi/sample/info/Main.java
index 9ed78b3c654d9db7231cb0ae264ffd9c1225c8be..fcff248937d1be7a55fed94e9bf5a047ca6ece9e 100644
--- a/samples/info/src/com/amd/aparapi/sample/info/Main.java
+++ b/samples/info/src/com/amd/aparapi/sample/info/Main.java
@@ -115,6 +115,48 @@ public class Main{
          System.out.println("}");
       }
 
+      Device bestGPU = OpenCLDevice.bestGPU();
+      if (bestGPU == null) {
+         System.out.println("OpenCLDevice.bestGPU() returned null!");
+      } else {
+         System.out.println("OpenCLDevice.bestGPU() returned { ");
+         System.out.println("   Type                  : " + bestGPU.getType());
+         System.out.println("   GlobalMemSize         : " + ((OpenCLDevice) bestGPU).getGlobalMemSize());
+         System.out.println("   LocalMemSize          : " + ((OpenCLDevice) bestGPU).getLocalMemSize());
+         System.out.println("   MaxComputeUnits       : " + ((OpenCLDevice) bestGPU).getMaxComputeUnits());
+         System.out.println("   MaxWorkGroupSizes     : " + ((OpenCLDevice) bestGPU).getMaxWorkGroupSize());
+         System.out.println("   MaxWorkItemDimensions : " + ((OpenCLDevice) bestGPU).getMaxWorkItemDimensions());
+         System.out.println("}");
+      }
+
+      Device firstACC = OpenCLDevice.firstACC();
+      if (firstACC == null) {
+         System.out.println("OpenCLDevice.firstACC() returned null!");
+      } else {
+         System.out.println("OpenCLDevice.firstACC() returned { ");
+         System.out.println("   Type                  : " + firstACC.getType());
+         System.out.println("   GlobalMemSize         : " + ((OpenCLDevice) firstACC).getGlobalMemSize());
+         System.out.println("   LocalMemSize          : " + ((OpenCLDevice) firstACC).getLocalMemSize());
+         System.out.println("   MaxComputeUnits       : " + ((OpenCLDevice) firstACC).getMaxComputeUnits());
+         System.out.println("   MaxWorkGroupSizes     : " + ((OpenCLDevice) firstACC).getMaxWorkGroupSize());
+         System.out.println("   MaxWorkItemDimensions : " + ((OpenCLDevice) firstACC).getMaxWorkItemDimensions());
+         System.out.println("}");
+      }
+
+      Device bestACC = OpenCLDevice.bestACC();
+      if (bestACC == null) {
+         System.out.println("OpenCLDevice.bestACC() returned null!");
+      } else {
+         System.out.println("OpenCLDevice.bestACC() returned { ");
+         System.out.println("   Type                  : " + bestACC.getType());
+         System.out.println("   GlobalMemSize         : " + ((OpenCLDevice) bestACC).getGlobalMemSize());
+         System.out.println("   LocalMemSize          : " + ((OpenCLDevice) bestACC).getLocalMemSize());
+         System.out.println("   MaxComputeUnits       : " + ((OpenCLDevice) bestACC).getMaxComputeUnits());
+         System.out.println("   MaxWorkGroupSizes     : " + ((OpenCLDevice) bestACC).getMaxWorkGroupSize());
+         System.out.println("   MaxWorkItemDimensions : " + ((OpenCLDevice) bestACC).getMaxWorkItemDimensions());
+         System.out.println("}");
+      }
+
    }
 
 }
diff --git a/samples/squares/squares.sh b/samples/squares/squares.sh
index 3fe88051b4ec2091322246c7380dc752575f0b76..bc469f262992630b971b8880ca6c716913bfef43 100644
--- a/samples/squares/squares.sh
+++ b/samples/squares/squares.sh
@@ -1,5 +1,5 @@
 java \
  -Djava.library.path=../../com.amd.aparapi.jni/dist \
- -Dcom.amd.aparapi.executionMode=%1 \
+ -Dcom.amd.aparapi.executionMode=$1 \
  -classpath ../../com.amd.aparapi/dist/aparapi.jar:squares.jar \
  com.amd.aparapi.sample.squares.Main