diff --git a/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md b/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5da4963cee6582730bcffb02e2c5a172d0aaa116
--- /dev/null
+++ b/src/aparapi/com.amd.aparapi.jni/dist.fpga/README.md
@@ -0,0 +1,10 @@
+---------
+README
+---------
+some scripts use the following notation to find libaparapi_x86_64.so => -Djava.library.path=../../com.amd.aparapi.jni/dist.[X]
+Where X stands for platform type name: std, fpga etc.
+build and copy the X version of libaparapi_x86_64.so to this folder so scripts can pick it up:
+goto: com.amd.aparapi.jni
+run: ant -f build_X_.xml
+copy: dist/libaparapi_x86_64.so dist.X
+
diff --git a/src/aparapi/com.amd.aparapi.jni/dist.std/README.md b/src/aparapi/com.amd.aparapi.jni/dist.std/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..5da4963cee6582730bcffb02e2c5a172d0aaa116
--- /dev/null
+++ b/src/aparapi/com.amd.aparapi.jni/dist.std/README.md
@@ -0,0 +1,10 @@
+---------
+README
+---------
+some scripts use the following notation to find libaparapi_x86_64.so => -Djava.library.path=../../com.amd.aparapi.jni/dist.[X]
+Where X stands for platform type name: std, fpga etc.
+build and copy the X version of libaparapi_x86_64.so to this folder so scripts can pick it up:
+goto: com.amd.aparapi.jni
+run: ant -f build_X_.xml
+copy: dist/libaparapi_x86_64.so dist.X
+
diff --git a/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so b/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so
index b54beb2c36c3d06d6747102c2a59023f077791f6..95d77cecc323704ccb9f71fdb6fdbb71b16fdbfe 100755
Binary files a/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so and b/src/aparapi/com.amd.aparapi.jni/dist/libaparapi_x86_64.so differ
diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h
new file mode 100644
index 0000000000000000000000000000000000000000..30b2ec599b5bfd797767784db5146dc0ede8033e
--- /dev/null
+++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Kernel_FlowType.h
@@ -0,0 +1,13 @@
+/* DO NOT EDIT THIS FILE - it is machine generated */
+#include <jni.h>
+/* Header for class com_amd_aparapi_Kernel_FlowType */
+
+#ifndef _Included_com_amd_aparapi_Kernel_FlowType
+#define _Included_com_amd_aparapi_Kernel_FlowType
+#ifdef __cplusplus
+extern "C" {
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h
index 8de80a9844444d079086e7674c634aa936688d44..5a4f770f9a1bdac9398c4c2a4c40bb0131a2db64 100644
--- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h
+++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_Range.h
@@ -10,7 +10,7 @@ extern "C" {
 #undef com_amd_aparapi_Range_THREADS_PER_CORE
 #define com_amd_aparapi_Range_THREADS_PER_CORE 16L
 #undef com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE
-#define com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE 256L
+#define com_amd_aparapi_Range_MAX_OPENCL_GROUP_SIZE 1024L
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h
index 555f91426e90e4f115ba50a36bdb2132fe2c7532..6791475b00edd9bff51e1b85aab0114f4146b019 100644
--- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h
+++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_jni_KernelRunnerJNI.h
@@ -51,6 +51,12 @@ extern "C" {
 #define com_amd_aparapi_internal_jni_KernelRunnerJNI_ARG_STATIC 4194304L
 #undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU
 #define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_USE_GPU 4L
+#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW
+#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW 1L
+#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW
+#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW 2L
+#undef com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW
+#define com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW 4L
 /*
  * Class:     com_amd_aparapi_internal_jni_KernelRunnerJNI
  * Method:    initJNI
@@ -70,10 +76,10 @@ JNIEXPORT jint JNICALL Java_com_amd_aparapi_internal_jni_KernelRunnerJNI_getJNI
 /*
  * Class:     com_amd_aparapi_internal_jni_KernelRunnerJNI
  * Method:    buildProgramJNI
- * Signature: (JLjava/lang/String;)J
+ * Signature: (JLjava/lang/String;I)J
  */
 JNIEXPORT jlong JNICALL Java_com_amd_aparapi_internal_jni_KernelRunnerJNI_buildProgramJNI
-  (JNIEnv *, jobject, jlong, jstring);
+  (JNIEnv *, jobject, jlong, jstring, jint);
 
 /*
  * Class:     com_amd_aparapi_internal_jni_KernelRunnerJNI
diff --git a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h
index b9f11d7f81c45c1c24027e6723d40a0310abae83..2819cc7d35fcfd374c54f71d439e6d1627547b11 100644
--- a/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h
+++ b/src/aparapi/com.amd.aparapi.jni/include/com_amd_aparapi_internal_kernel_KernelRunner.h
@@ -51,6 +51,12 @@ extern "C" {
 #define com_amd_aparapi_internal_kernel_KernelRunner_ARG_STATIC 4194304L
 #undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_USE_GPU
 #define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_USE_GPU 4L
+#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_SOURCE_FLOW
+#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_SOURCE_FLOW 1L
+#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_BINARY_FLOW
+#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_BINARY_FLOW 2L
+#undef com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_DEFAULT_FLOW
+#define com_amd_aparapi_internal_kernel_KernelRunner_JNI_FLAG_DEFAULT_FLOW 4L
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp b/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
index a4a49cf8bad22f0cb71ac0b46932e3934b8f45ac..71f36615cb6ef859811a77248c48cce45fc03b27 100644
--- a/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
+++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/invoke/OpenCLJNI.cpp
@@ -336,7 +336,8 @@ JNI_JAVA(void, OpenCLJNI, invoke)
    }
 
 JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
-   (JNIEnv *jenv, jobject jobj) {
+   (JNIEnv *jenv, jobject jobj)
+   {
       jobject platformListInstance = JNIHelper::createInstance(jenv, ArrayListClass, VoidReturn);
       cl_int status = CL_SUCCESS;
       cl_uint platformc;
@@ -352,43 +353,23 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
             status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VERSION, sizeof(platformVersionName), platformVersionName, NULL);
             fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName); 
 
-            // fix this so OpenCL 1.3 or higher will not break!
-            if (
-#ifdef ALTERA_OPENCL
-           		1 // !!! oren fix for bad platform version check
-                ||
-#endif
-                   !strncmp(platformVersionName, "OpenCL 1.2", 10)
-                || !strncmp(platformVersionName, "OpenCL 1.1", 10)
-#ifdef __APPLE__
-                || !strncmp(platformVersionName, "OpenCL 1.0", 10)
-#endif
-               ) 
-               { 
-               char platformVendorName[512];  
-               char platformName[512];  
-               status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VENDOR, sizeof(platformVendorName), platformVendorName, NULL);
-               status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL);
-               fprintf(stderr, "platform vendor    %d %s\n", platformIdx, platformVendorName); 
-               fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName); 
-               jobject platformInstance = JNIHelper::createInstance(jenv, OpenCLPlatformClass , ArgsVoidReturn(LongArg StringClassArg StringClassArg StringClassArg ), 
+            char platformVendorName[512];
+            char platformName[512];
+            status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_VENDOR, sizeof(platformVendorName), platformVendorName, NULL);
+            status = clGetPlatformInfo(platformIds[platformIdx], CL_PLATFORM_NAME, sizeof(platformName), platformName, NULL);
+            fprintf(stderr, "platform vendor    %d %s\n", platformIdx, platformVendorName);
+            fprintf(stderr, "platform version %d %s\n", platformIdx, platformVersionName);
+            jobject platformInstance = JNIHelper::createInstance(jenv, OpenCLPlatformClass , ArgsVoidReturn(LongArg StringClassArg StringClassArg StringClassArg ),
                      (jlong)platformIds[platformIdx],
                      jenv->NewStringUTF(platformVersionName), 
                      jenv->NewStringUTF(platformVendorName),
                      jenv->NewStringUTF(platformName)
                      );
-               JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance);
-
-               cl_uint deviceIdc;
-               // !!! oren fix - detect accelerators as well, they forgot to add the CL_DEVICE_TYPE_ACCELERATOR
-#ifndef ALTERA_OPENCL
-               cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU ;
-#else
-               //cl_device_type requestedDeviceType =CL_DEVICE_TYPE_CPU |CL_DEVICE_TYPE_GPU | CL_DEVICE_TYPE_ACCELERATOR;
-               // Altera OpenCL fails if this is different then = CL_DEVICE_TYPE_ACCELERATOR
-               cl_device_type requestedDeviceType = CL_DEVICE_TYPE_ACCELERATOR;
-#endif
-               status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc);
+            JNIHelper::callVoid(jenv, platformListInstance, "add", ArgsBooleanReturn(ObjectClassArg), platformInstance);
+
+            cl_uint deviceIdc;
+            cl_device_type requestedDeviceType = CL_DEVICE_TYPE_ALL;
+            status = clGetDeviceIDs(platformIds[platformIdx], requestedDeviceType, 0, NULL, &deviceIdc);
                if (status == CL_SUCCESS && deviceIdc > 0 ){
                   fprintf(stderr, "found %d devices\n", deviceIdc);
                   cl_device_id* deviceIds = new cl_device_id[deviceIdc];
@@ -476,7 +457,7 @@ JNI_JAVA(jobject, OpenCLJNI, getPlatforms)
                }
             }
          }
-      }
+      //}
 
       return (platformListInstance);
    }
diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
index 205c425853f584fbe0b29da677a87bd0775b0821..e9e5101e51cf08e3ea5fa73062ee6d2649220d33 100644
--- a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
+++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/Aparapi.cpp
@@ -52,6 +52,8 @@
 #include <algorithm>
 //#include <string>
 
+// !!! oren change ->
+#include "ConfigSettings.h"
 
 //compiler dependant code
 /**
@@ -1152,9 +1154,9 @@ inline char* getClassName(JNIEnv* jenv, JNIContext* jniContext, const char *optE
 
    // !!! Java adds '$' chars to inner class names so replace them with '.'
    char *charPtr = classNameStr;
-   while(charPtr=strchr(charPtr,'$'))
+   while(charPtr = strchr(charPtr,'$'))
    {
-	   *charPtr='.';
+	   *charPtr = BINARY_FILE_SEP;
 	   charPtr++;
    }
 
@@ -1285,46 +1287,57 @@ inline void outputOCLFile(JNIEnv* jenv, JNIContext* jniContext, const char *sour
 
 }
 
+inline void verifyFlow(jint &buildFlags)
+{
+    // verify flow support is available
+    if(!(PLATFORM_FLOW_SUPPORT & buildFlags))
+    {
+        fprintf(stderr, "!!! Error requested flow(%0xd) not available !!!\n",buildFlags);
+        throw CLException(CL_INVALID_VALUE,"buildProgramJNI() -> bad request flow");
+    }
+
+    // check/set if default flow is requested
+    if(buildFlags==DEFAULT_FLOW)
+    	buildFlags = PLATFORM_DEFAULT_FLOW;
+}
 
 JNI_JAVA(jlong, KernelRunnerJNI, buildProgramJNI)
-   (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source) {
+   (JNIEnv *jenv, jobject jobj, jlong jniContextHandle, jstring source, jint buildFlags) {
       JNIContext* jniContext = JNIContext::getJNIContext(jniContextHandle);
       if (jniContext == NULL){
          return 0;
       }
 
       try {
-         cl_int status = CL_SUCCESS;
-
-#ifdef ALTERA_OPENCL
-#define OUTPUT_OCL_FILE
-#define USE_BINARY_FILE
-#define BINARY_FILE_EXT ".aocx"
-#endif
-
-// allows defining an alternative folder where bin files should be loaded from
-// Usefull when running in aparapi embeded mode
-#define BINARY_FOLDER_ENV_VAR "APARAPI_CL_BIN_FOLDER"
+        cl_int status = CL_SUCCESS;
 
         const char *sourceChars = jenv->GetStringUTFChars(source, NULL);
 
-//#ifdef OUTPUT_OCL_FILE
-         outputOCLFile(jenv,jniContext,sourceChars);
-//#endif
-
-#ifdef USE_BINARY_FILE
-        char *binFileFolder = getenv(BINARY_FOLDER_ENV_VAR);
-        fprintf(stderr, "Bin Folder is %s\n",binFileFolder);
-        char *binFileName = getClassName(jenv,jniContext,BINARY_FILE_EXT);
-        char *fullBinFilePath = buildFilePath(binFileFolder,binFileName);
-        fprintf(stderr, "FullBinFilePath is %s\n",fullBinFilePath);
-     	jniContext->program = CLHelper::createProgramWithBinary(jenv, jniContext->context,  1, &jniContext->deviceId, fullBinFilePath, NULL, &status);
-     	delete []binFileName;
-     	delete []fullBinFilePath;
-#else
-        jniContext->program = CLHelper::createProgramWithSource(jenv, jniContext->context,  1, &jniContext->deviceId, sourceChars, NULL, &status);
+#ifdef OUTPUT_OCL_FILE
+        outputOCLFile(jenv,jniContext,sourceChars);
 #endif
 
+        // !!! oren change ->
+        // verify the flow and modify if need be
+        verifyFlow(buildFlags);
+
+//#ifdef USE_BINARY_FILE
+        if(buildFlags & com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW)
+        {
+          char *binFileFolder = getenv(BINARY_FOLDER_ENV_VAR);
+          fprintf(stderr, "Bin Folder is %s\n",binFileFolder);
+          char *binFileName = getClassName(jenv,jniContext,BINARY_FILE_EXT);
+          char *fullBinFilePath = buildFilePath(binFileFolder,binFileName);
+          fprintf(stderr, "FullBinFilePath is %s\n",fullBinFilePath);
+     	  jniContext->program = CLHelper::createProgramWithBinary(jenv, jniContext->context,  1, &jniContext->deviceId, fullBinFilePath, NULL, &status);
+     	  delete []binFileName;
+     	  delete []fullBinFilePath;
+        }
+//#else
+        else
+          jniContext->program = CLHelper::createProgramWithSource(jenv, jniContext->context,  1, &jniContext->deviceId, sourceChars, NULL, &status);
+//#endif
+
         jenv->ReleaseStringUTFChars(source, sourceChars);
 
          if(status == CL_BUILD_PROGRAM_FAILURE) throw CLException(status, "");
diff --git a/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h
new file mode 100644
index 0000000000000000000000000000000000000000..eb7063b19a26264aeb36dbd44502477a95fbcc8c
--- /dev/null
+++ b/src/aparapi/com.amd.aparapi.jni/src/cpp/runKernel/ConfigSettings.h
@@ -0,0 +1,58 @@
+#ifndef CONFIG_SETTINGS_H
+#define CONFIG_SETTINGS_H
+
+// !!! oren changes ->
+// configuration settings for building platform specific code
+// TODO: consider moving parts of this to a configuration file later on and load settings dynamically
+
+// use values from JNI config
+#include "com_amd_aparapi_internal_jni_KernelRunnerJNI.h"
+
+
+// auto output kernel.cl file
+#define OUTPUT_OCL_FILE
+// allows defining an alternative folder where bin files should be loaded from
+// Useful when running in Aparapi embedded mode
+#define BINARY_FOLDER_ENV_VAR "APARAPI_CL_BIN_FOLDER"
+
+///////////////////////////
+// help determine if platform supports source/binary flows
+///////////////////////////
+#define SOURCE_FLOW   com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_SOURCE_FLOW
+#define BINARY_FLOW   com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_BINARY_FLOW
+#define DEFAULT_FLOW  com_amd_aparapi_internal_jni_KernelRunnerJNI_JNI_FLAG_DEFAULT_FLOW
+///////////////////////////
+
+
+///////////////////////////
+// define platform settings
+//////////////////////////
+// BINARY_FILE_EXT => define binary file extension
+// BINARY_FILE_SEP => define binary file separator, replaces java's $ signs in file names -> examples: .,_ etc.
+///////////////////////////
+// Altera platform specific
+///////////////////////////
+#ifdef ALTERA_OPENCL
+  #define PLATFORM_FLOW_SUPPORT BINARY_FLOW
+  #define PLATFORM_DEFAULT_FLOW BINARY_FLOW
+  #define BINARY_FILE_EXT ".aocx"
+  #define BINARY_FILE_SEP '.'
+#elif AMD_OPENCL
+// AMD specific
+#elif INTEL_OPENCL
+// Intel specific
+#elif NVIDIA_OPENCL
+// NVidia specific
+#else // default settings
+///////////////////////////
+// All other platforms - set the default for other platforms
+///////////////////////////
+  #define PLATFORM_FLOW_SUPPORT (BINARY_FLOW | SOURCE_FLOW)
+  #define PLATFORM_DEFAULT_FLOW SOURCE_FLOW
+  #define BINARY_FILE_EXT ".bcl"
+  #define BINARY_FILE_SEP '.'
+#endif // ALTERA_OPENCL
+
+#endif // CONFIG_SETTINGS_H
+
+
diff --git a/src/aparapi/com.amd.aparapi/dist/aparapi.jar b/src/aparapi/com.amd.aparapi/dist/aparapi.jar
index 7e1bc57f0d7e26156e6950c36dbb6b8f273faf72..0be74ef231d313a2b694f0ca651bdb8c5caf6e2b 100644
Binary files a/src/aparapi/com.amd.aparapi/dist/aparapi.jar and b/src/aparapi/com.amd.aparapi/dist/aparapi.jar differ
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
index da578847e0cda8341403d573bd67ab2401e5dd18..004ee360bc5b44717009635a969994fdfd054722 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Config.java
@@ -93,7 +93,16 @@ public class Config extends ConfigJNI{
     *  
     */
    public static final String platformHint = System.getProperty(propPkgName + ".platformHint");
-   
+
+   //!!! oren change 7.15.15 -> allow choosing a flow type
+   /**
+    * Allows the user to select a flow type
+    *
+    *  Usage -Dcom.amd.aparapi.flowType={binary|source|default}
+    *  
+    */
+   public static final String flowType = System.getProperty(propPkgName + ".flowType");
+
    /**
     * Allows the user to request that the execution mode of each kernel invocation be reported to stdout.
     *
@@ -204,6 +213,8 @@ public class Config extends ConfigJNI{
          System.out.println(propPkgName + ".enableProfilingCSV{true|false}=" + enableProfilingCSV);
          // !!! oren change
          System.out.println(propPkgName + ".profilingFileNameFormatStr{format str}=" + profilingFileNameFormatStr);
+         System.out.println(propPkgName + ".flowType{source|binary|default}=" + flowType);
+         //////////////////
          System.out.println(propPkgName + ".enableVerboseJNI{true|false}=" + enableVerboseJNI);
          System.out.println(propPkgName + ".enableVerboseJNIOpenCLResourceTracking{true|false}="
                + enableVerboseJNIOpenCLResourceTracking);
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
index 900b344042d8e9ccf6209f0aa2da91e9142031fc..2d11bcce22f037ca2f3653ff72d0588de4303590 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Kernel.java
@@ -405,6 +405,80 @@ public abstract class Kernel implements Cloneable {
       }
    };
 
+   ////////////////////
+   // !!! oren change -> add source/binary flow support to kernel 
+   ////////////////////
+   public static enum FlowType
+   {
+	   // flow type list
+	   SOURCE(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_SOURCE_FLOW),
+	   BINARY(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_BINARY_FLOW),
+	   DEFAULT(com.amd.aparapi.internal.jni.KernelRunnerJNI.JNI_FLAG_DEFAULT_FLOW);
+
+	   // data store
+	   int flowType;
+
+	   FlowType(int flowType)
+	   {
+		   setValue(flowType);
+	   }
+
+	   FlowType(String flowTypeStr)
+	   {
+		   this.flowType = strToFlowType(flowTypeStr).getValue();
+	   }
+
+	   public int getValue()
+	   {
+		   return this.flowType;
+	   }
+
+	   private void setValue(int flowType)
+	   {
+		   this.flowType = flowType;
+	   }
+
+	   public static FlowType getDefaultFlowType() 
+	   {
+		   // if set by user try get value else set to default 
+		   FlowType flowType = (Config.flowType==null) ? DEFAULT : strToFlowType(Config.flowType);
+		   return flowType;
+	   }
+
+	   public static FlowType strToFlowType(final String flowTypeStr)
+	   {
+		   try 
+		   {
+			   FlowType flowType = valueOf(flowTypeStr.toUpperCase());
+			   return flowType;
+		   }
+		   catch (Exception e)
+		   {
+			   logger.info("!!! bad flow type => (" + flowTypeStr + ") => reverting to default platform flow!");
+			   throw e;
+		   }
+	   }
+
+   }
+   
+   public FlowType getFlowType() {
+		return kernelFlowType;
+	}
+
+
+	public void setFlowType(FlowType kernelFlowType) {
+		this.kernelFlowType = kernelFlowType;
+	}
+
+	public void setFlowType(String flowTypeStr) {
+		this.kernelFlowType = FlowType.strToFlowType(flowTypeStr);
+	}
+   
+   private FlowType kernelFlowType = FlowType.getDefaultFlowType();
+   
+     
+   ////////////////////
+   
    private KernelRunner kernelRunner = null;
 
    private KernelState kernelState = new KernelState();
@@ -616,7 +690,8 @@ public abstract class Kernel implements Cloneable {
       return getGlobalId(0);
    }
 
-   @OpenCLDelegate
+
+@OpenCLDelegate
    protected final int getGlobalId(int _dim) {
       return kernelState.getGlobalIds()[_dim];
    }
@@ -2816,4 +2891,5 @@ public abstract class Kernel implements Cloneable {
          executionMode = currentMode.next();
       }
    }
+
 }
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
index 06fd2c6377c2cc959587e573cdc32bc5048d31be..34e3a4b5ac1e6a33db58640df381c7ee2daa501f 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/Range.java
@@ -51,7 +51,11 @@ public class Range extends RangeJNI{
 
    public static final int THREADS_PER_CORE = 16;
 
-   public static final int MAX_OPENCL_GROUP_SIZE = 256;
+   // !!! oren change -> this value looks out dated and the mechanism probably needs revisiting !!! 
+   // we already see evidence of improved performance for size==1024 on certain devices (ref: FPGA doc classification paper) 
+   // for now we set it to 4X original value, but we should think about it more...
+   //public static final int MAX_OPENCL_GROUP_SIZE = 256;
+   public static final int MAX_OPENCL_GROUP_SIZE = 1024;
 
    public static final int MAX_GROUP_SIZE = Math.max(Runtime.getRuntime().availableProcessors() * THREADS_PER_CORE,
          MAX_OPENCL_GROUP_SIZE);
@@ -113,11 +117,18 @@ public class Range extends RangeJNI{
     */
 
    private static int[] getFactors(int _value, int _max) {
-      final int factors[] = new int[MAX_GROUP_SIZE];
+      //final int factors[] = new int[MAX_GROUP_SIZE];
       int factorIdx = 0;
 
-      for (int possibleFactor = 1; possibleFactor <= _max; possibleFactor++) {
-         if ((_value % possibleFactor) == 0) {
+      // !!! oren bug fix -> based on poz findings
+      // max can not be bigger then value and if factorIdx >= MAX_GROUP_SIZE we will have an access violation
+      final int GroupSizeLimit = Math.min(Math.min(_max,_value),MAX_GROUP_SIZE);
+      final int factors[] = new int[GroupSizeLimit];
+      //for (int possibleFactor = 1; possibleFactor <= _max; possibleFactor++) 
+      for (int possibleFactor = 1; possibleFactor <= GroupSizeLimit; possibleFactor++) 
+      {
+         if ((_value % possibleFactor) == 0) 
+         {
             factors[factorIdx++] = possibleFactor;
          }
       }
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
index 7fa7ec4c59e3ce4d44070e2aca4cfdb40ccf0bd3..f1c2f68e09c0d578956ed0b43c74b6360a368ea8 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/device/Device.java
@@ -1,8 +1,11 @@
 package com.amd.aparapi.device;
 
+import java.util.List;
+
 import com.amd.aparapi.Range;
 import com.amd.aparapi.device.OpenCLDevice.DeviceComparitor;
 import com.amd.aparapi.device.OpenCLDevice.DeviceSelector;
+import com.amd.aparapi.internal.opencl.OpenCLPlatform;
 
 public abstract class Device{
 
@@ -14,6 +17,98 @@ public abstract class Device{
       JTP,
       SEQ
    };
+   
+   // !!! oren change -> get device using the tuple (platform, deviceType, id)
+   
+   public static Device getDevice(String platformName, Device.TYPE deviceType, int deviceId)
+   {
+	   return getDevice(platformName,deviceType.name(),deviceId);
+   }
+
+   // get first available device
+
+   public static Device getDevice(String platformName, Device.TYPE deviceType)
+   {
+	   return getDevice(platformName,deviceType.name(),0);
+   }
+
+   public static Device getDevice(String platformName, String deviceTypeName)
+   {
+	   return getDevice(platformName,deviceTypeName,0);
+   }
+
+   public static Device getDevice(String platformName, String deviceTypeName, int deviceId)
+   {
+      List<OpenCLPlatform> platforms = (new OpenCLPlatform()).getOpenCLPlatforms();
+
+      int platformc = 0;
+      for (OpenCLPlatform platform : platforms) 
+      {
+         if(platform.getName().contains(platformName))
+         {
+
+           System.out.println("Platform " + platformc + "{");
+
+           System.out.println("   Name    : \"" + platform.getName() + "\"");
+
+           System.out.println("   Vendor  : \"" + platform.getVendor() + "\"");
+
+           System.out.println("   Version : \"" + platform.getVersion() + "\"");
+
+           List<OpenCLDevice> devices = platform.getOpenCLDevices();
+
+           System.out.println("   Platform contains " + devices.size() + " OpenCL devices");
+
+           int devicec = 0;
+
+           for (OpenCLDevice device : devices) 
+           {
+             if( device.getType().name().equalsIgnoreCase(deviceTypeName))
+             {
+
+               System.out.println("   Device " + devicec + "{");
+
+               System.out.println("       Type                  : " + device.getType());
+
+               System.out.println("       GlobalMemSize         : " + device.getGlobalMemSize());
+
+               System.out.println("       LocalMemSize          : " + device.getLocalMemSize());
+
+               System.out.println("       MaxComputeUnits       : " + device.getMaxComputeUnits());
+
+               System.out.println("       MaxWorkGroupSizes     : " + device.getMaxWorkGroupSize());
+
+               System.out.println("       MaxWorkItemDimensions : " + device.getMaxWorkItemDimensions());
+
+               System.out.println("   }");
+               
+               if(deviceId>0 && (devicec!=deviceId))
+               {
+            	   System.out.println("!!! devicec!=deviceId(" + deviceId + ") => continue search !!!");
+            	   continue;
+               }
+            	   
+               // close platform bracket
+               System.out.println("}");
+
+               return device; 
+             }
+
+             devicec++;
+           }
+           System.out.println("Device type/id combination not found");
+
+           System.out.println("}");
+
+           platformc++;
+
+       }
+
+     }
+     // return not found !!!
+     return null;
+   }
+
 
    public static Device best() {
       return (OpenCLDevice.select(new DeviceComparitor(){
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
index a672093fe97114a2e1fe4d5ed1fc63c73bb7e5bf..da60ff634120a890eeeebf3a6315e69de3c7e395 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/jni/KernelRunnerJNI.java
@@ -276,6 +276,19 @@ public abstract class KernelRunnerJNI{
     */
    //  @UsedByJNICode @Annotations.Experimental protected static final int JNI_FLAG_ENABLE_VERBOSE_JNI_OPENCL_RESOURCE_TRACKING = 1 << 4;
 
+   /** !!! oren change ->
+    * These flags indicate that we want to build source/binary i.e. use source/binary flow.
+    * 
+    * Be careful changing final constants starting with JNI.<br/>
+    * 
+    * @see com.amd.aparapi.internal.annotation.UsedByJNICode
+    * 
+    * @author oren
+    */
+   @UsedByJNICode public static final int JNI_FLAG_SOURCE_FLOW  = 1 << 0;
+   @UsedByJNICode public static final int JNI_FLAG_BINARY_FLOW  = 1 << 1;
+   @UsedByJNICode public static final int JNI_FLAG_DEFAULT_FLOW = 1 << 2;
+   
    /*
     * Native methods
     */
@@ -295,7 +308,9 @@ public abstract class KernelRunnerJNI{
 
    protected native int getJNI(long _jniContextHandle, Object _array);
 
-   protected native long buildProgramJNI(long _jniContextHandle, String _source);
+   //protected native long buildProgramJNI(long _jniContextHandle, String _source);
+   // !!! oren change -> add binary option to build
+   protected native long buildProgramJNI(long _jniContextHandle, String _source, int _buildFlags);
 
    protected native int setArgsJNI(long _jniContextHandle, KernelArgJNI[] _args, int argc);
 
diff --git a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
index af7b5afa9332c896407951c1259a02deeefc4b59..c8578a9bc6ccaac8631a2e36cb2fbec19826b29c 100644
--- a/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
+++ b/src/aparapi/com.amd.aparapi/src/java/com/amd/aparapi/internal/kernel/KernelRunner.java
@@ -1047,8 +1047,11 @@ public class KernelRunner extends KernelRunnerJNI{
 					e1.printStackTrace();
 				  }
                   */
+                  // !!! oren change -> support flow types
+                  // set flow type
+                  int buildFlags = kernel.getFlowType().getValue();
                   // Send the string to OpenCL to compile it
-                  if (buildProgramJNI(jniContextHandle, openCL) == 0) {
+                  if (buildProgramJNI(jniContextHandle, openCL,buildFlags) == 0) {
                      return warnFallBackAndExecute(_entrypointName, _range, _passes, "OpenCL compile failed");
                   }
 
diff --git a/src/aparapi/samples/add/add.jar b/src/aparapi/samples/add/add.jar
index bfdfc338d748f2d47dce6d35f6641722bb0f35c4..d3b27be0a3a87fa33d23810463f0b115fb033af0 100644
Binary files a/src/aparapi/samples/add/add.jar and b/src/aparapi/samples/add/add.jar differ
diff --git a/src/aparapi/samples/add/selectPlatform.sh b/src/aparapi/samples/add/selectPlatform.sh
new file mode 100644
index 0000000000000000000000000000000000000000..386ea6e808ac110d0d514f39957587451f59291d
--- /dev/null
+++ b/src/aparapi/samples/add/selectPlatform.sh
@@ -0,0 +1,42 @@
+
+#
+# select platform and flow test script. 
+# runs add kernel on diffrent platforms/flows.
+#
+# $1 = platformHint [AMD,Altera,Intel,NVidia]
+# $2 = deviceType  [CPU,CPU,ACC]
+# $3 = deviceId [0=first available device..n=available device]
+# $4 = flowTypeStr [source,binary,default]
+#      can use -Dcom.amd.aparapi.flowType instead
+# $5 = dist.[std,fpga] to load -> !!! note -> remember to build and copy libaparapi_x86_64.so to the correct place
+#      example: cp ../../com.amd.aparapi.jni/dist/libaparapi_x86_64.so ../../com.amd.aparapi.jni/dist.std/
+#      Alternatively use a script verion below without $5 ->
+#      -Djava.library.path=../../com.amd.aparapi.jni/dist \
+
+# usage examples:
+# sh selectPlatform.sh AMD CPU 0 source std
+# sh selectPlatform.sh Altera ACC 0 binary fpga
+
+java \
+ -Djava.library.path=../../com.amd.aparapi.jni/dist.$5 \
+ -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \
+ com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3 $4
+
+#
+# other script variants uncomment to use ->
+#
+
+# script version with libaparapi_x86_64.so in dist
+#java \
+# -Djava.library.path=../../com.amd.aparapi.jni/dist \
+# -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \
+# com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3
+
+# script version with flow type set through config 
+#java \
+# -Djava.library.path=../../com.amd.aparapi.jni/dist.$5 \
+# -Dcom.amd.aparapi.flowType=$4 \
+# -classpath ../../com.amd.aparapi/dist/aparapi.jar:add.jar \
+# com.amd.aparapi.sample.add.MainSelectPlatform $1 $2 $3
+
+
diff --git a/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java b/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java
new file mode 100644
index 0000000000000000000000000000000000000000..053483ef860404f82a66a9874f93dd513f6123d7
--- /dev/null
+++ b/src/aparapi/samples/add/src/com/amd/aparapi/sample/add/MainSelectPlatform.java
@@ -0,0 +1,277 @@
+/*
+
+Copyright (c) 2010-2011, Advanced Micro Devices, Inc.
+
+All rights reserved.
+
+
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
+
+following conditions are met:
+
+
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following
+
+disclaimer. 
+
+
+
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
+
+disclaimer in the documentation and/or other materials provided with the distribution. 
+
+
+
+Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products
+
+derived from this software without specific prior written permission. 
+
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
+
+WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 
+
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+
+If you use the software (in whole or in part), you shall adhere to all applicable U.S., European, and other export
+
+laws, including but not limited to the U.S. Export Administration Regulations ("EAR"), (15 C.F.R. Sections 730 through
+
+774), and E.U. Council Regulation (EC) No 1334/2000 of 22 June 2000.  Further, pursuant to Section 740.6 of the EAR,
+
+you hereby certify that, except pursuant to a license granted by the United States Department of Commerce Bureau of 
+
+Industry and Security or as otherwise permitted pursuant to a License Exception under the U.S. Export Administration 
+
+Regulations ("EAR"), you will not (1) export, re-export or release to a national of a country in Country Groups D:1,
+
+E:1 or E:2 any restricted technology, software, or source code you receive hereunder, or (2) export to Country Groups
+
+D:1, E:1 or E:2 the direct product of such technology or software, if such foreign produced direct product is subject
+
+to national security controls as identified on the Commerce Control List (currently found in Supplement 1 to Part 774
+
+of EAR).  For the most current Country Group listings, or for additional information about the EAR or your obligations
+
+under those regulations, please refer to the U.S. Bureau of Industry and Security's website at http://www.bis.doc.gov/. 
+
+
+
+*/
+
+
+
+package com.amd.aparapi.sample.add;
+
+
+
+import java.io.IOException;
+
+import java.util.concurrent.TimeUnit;
+
+
+
+import com.amd.aparapi.Kernel;
+
+import com.amd.aparapi.Range;
+
+
+import com.amd.aparapi.device.Device;
+
+import com.amd.aparapi.device.OpenCLDevice;
+
+import com.amd.aparapi.internal.opencl.OpenCLPlatform;
+
+
+import java.util.List;
+
+
+
+public class MainSelectPlatform {
+
+   public static void listPlatformsAndDevices()
+   {
+      List<OpenCLPlatform> platforms = (new OpenCLPlatform()).getOpenCLPlatforms();
+
+      int platformc = 0;
+      for (OpenCLPlatform platform : platforms) 
+      {
+
+           System.out.println("Platform " + platformc + "{");
+
+           System.out.println("   Name    : \"" + platform.getName() + "\"");
+
+           System.out.println("   Vendor  : \"" + platform.getVendor() + "\"");
+
+           System.out.println("   Version : \"" + platform.getVersion() + "\"");
+
+           List<OpenCLDevice> devices = platform.getOpenCLDevices();
+
+           System.out.println("   Platform contains " + devices.size() + " OpenCL devices");
+
+           int devicec = 0;
+
+           for (OpenCLDevice device : devices) 
+           {
+               System.out.println("   Device " + devicec + "{");
+
+               System.out.println("       Type                  : " + device.getType());
+
+               System.out.println("       GlobalMemSize         : " + device.getGlobalMemSize());
+
+               System.out.println("       LocalMemSize          : " + device.getLocalMemSize());
+
+               System.out.println("       MaxComputeUnits       : " + device.getMaxComputeUnits());
+
+               System.out.println("       MaxWorkGroupSizes     : " + device.getMaxWorkGroupSize());
+
+               System.out.println("       MaxWorkItemDimensions : " + device.getMaxWorkItemDimensions());
+
+               System.out.println("   }");
+
+               devicec++;
+           }
+
+           // close platform bracket
+           System.out.println("}");
+
+           platformc++;
+      }
+   }
+
+   public static void main(String[] args) {
+
+
+
+      final int size = 1000*1000;
+
+
+
+      final float[] a = new float[size];
+
+      final float[] b = new float[size];
+
+
+
+      for (int i = 0; i < size; i++) {
+
+         a[i] = (float) (Math.random() * 100);
+
+         b[i] = (float) (Math.random() * 100);
+
+      }
+
+
+
+      final float[] sum = new float[size];
+
+
+
+      Kernel kernel = new Kernel(){
+
+         @Override public void run() {
+
+            int gid = getGlobalId();
+
+            sum[gid] = a[gid] + b[gid];
+
+         }
+
+      };
+
+
+
+     
+
+      // !!! oren -> add time measurement 
+
+      System.out.printf("Running kernel..");
+
+
+
+      long startTime = System.nanoTime();
+      
+     
+      // !!! experiment with platform/device selection
+      System.out.printf("**** listPlatformsAndDevices ****\n");
+      listPlatformsAndDevices();
+      System.out.printf("****************\n");
+      if(args.length<2)
+      {
+         System.out.printf("****************\n");
+         System.out.printf("Usage is: select platformHint deviceType\n");
+         System.out.printf("****************\n");
+         return;
+      }
+
+      String platformHint = args[0];
+      String deviceType = args[1];
+      int deviceId = (args.length>2) ? Integer.parseInt(args[2]) : 0;
+      String flowTypeStr = (args.length>3) ? args[3] : null;
+      if(flowTypeStr!=null)
+    	  kernel.setFlowType(flowTypeStr);
+      System.out.printf("**** getDevice ****\n");
+      Device device = Device.getDevice(platformHint,deviceType,deviceId);
+      kernel.execute(Range.create(device,512,16)); 
+      System.out.printf("****************\n");
+
+
+
+      long elapsedTimeNano = System.nanoTime() - startTime;
+
+      
+
+      long elapsedTimeSec = TimeUnit.SECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS);
+
+      
+
+      long elapsedTimeMilli = TimeUnit.MILLISECONDS.convert(elapsedTimeNano, TimeUnit.NANOSECONDS);
+
+      
+
+      System.out.printf("****************\n");
+
+      System.out.printf("Elapsed time in milli: %d\n",elapsedTimeMilli);
+
+      System.out.printf("Elapsed time in sec  : %d\n",elapsedTimeSec);
+
+      System.out.printf("****************\n");
+
+
+
+      // !!! oren change -> show first 10 only 
+
+      //for (int i = 0; i < size; i++) {
+
+      int displayRange = (size > 20) ? 20 : size; 
+
+      System.out.printf("**************** Showing first %d results ****************\n",displayRange);
+
+      for (int i = 0; i < displayRange; i++) {
+
+         System.out.printf("%6.2f + %6.2f = %8.2f\n", a[i], b[i], sum[i]);
+
+      }
+
+
+
+      kernel.dispose();
+
+   }
+
+
+
+}