From 4a3514b1f4446f806566d98c7a3a4a18dca21771 Mon Sep 17 00:00:00 2001
From: CoreRasurae <luis.p.mendes@gmail.com>
Date: Thu, 26 Apr 2018 20:51:14 +0100
Subject: [PATCH] Update: Support for OpenCLDevice configurator/configure API

---
 CHANGELOG.md                                  |  7 +-
 CONTRIBUTORS.md                               |  3 +-
 pom.xml                                       |  2 +-
 .../device/IOpenCLDeviceConfigurator.java     | 34 +++++++
 .../java/com/aparapi/device/OpenCLDevice.java | 59 +++++++++++-
 .../runtime/OpenCLDeviceConfiguratorTest.java | 96 +++++++++++++++++++
 .../OpenCLDeviceNoConfiguratorTest.java       | 90 +++++++++++++++++
 7 files changed, 285 insertions(+), 6 deletions(-)
 create mode 100644 src/main/java/com/aparapi/device/IOpenCLDeviceConfigurator.java
 create mode 100644 src/test/java/com/aparapi/runtime/OpenCLDeviceConfiguratorTest.java
 create mode 100644 src/test/java/com/aparapi/runtime/OpenCLDeviceNoConfiguratorTest.java

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 37e7d6a4..2477f610 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,11 @@
 # Aparapi Changelog
 
-## 1.8.1
+## 1.9.0
+
+* (JNI) Fixed local arrays handling 1D and ND, to cope with arrays resizing across kernel executions
+* Significant speed-up on discrete GPUs with dedicated memory - OpenCLDevice.setSharedMemory(false)
+* (JNI) Aparapi now supports efficient execution on discrete GPU and other devices with dedicated memory
+* Support for OpenCLDevice configurator/configure API
 
 ## 1.8.0
 
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index e0ef9adb..4e136b2e 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -50,4 +50,5 @@ Below are some of the specific details of various contributions.
 * Luis Mendes submited PR for issue #101 - Possible deadlock in JTP mode
 * Luis Mendes submited PR to facilitate KernelManager class extension with non-static parameters in constructors
 * Luis Mendes submited PR to Enable kernel profiling and execution simultaneously on multiple devices
-* Luis Mendes submited PR to fix issue #78 - Signed integer constants were interpreted as unsigned values for instruction SIPUSH
\ No newline at end of file
+* Luis Mendes submited PR to fix issue #78 - Signed integer constants were interpreted as unsigned values for instruction SIPUSH
+* Luis Mendes submited PR to Support for OpenCLDevice configurator/configure API
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index 33c1e1c2..0ab36096 100644
--- a/pom.xml
+++ b/pom.xml
@@ -10,7 +10,7 @@
 
     <groupId>com.aparapi</groupId>
     <artifactId>aparapi</artifactId>
-    <version>1.8.1-SNAPSHOT</version>
+    <version>1.9.0-SNAPSHOT</version>
     <packaging>jar</packaging>
 
     <prerequisites>
diff --git a/src/main/java/com/aparapi/device/IOpenCLDeviceConfigurator.java b/src/main/java/com/aparapi/device/IOpenCLDeviceConfigurator.java
new file mode 100644
index 00000000..38a8a8b4
--- /dev/null
+++ b/src/main/java/com/aparapi/device/IOpenCLDeviceConfigurator.java
@@ -0,0 +1,34 @@
+/**
+ * Copyright (c) 2016 - 2018 Syncleus, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.aparapi.device;
+
+/**
+ * This interface provides a way for the client application to refine the default devices
+ * configuration obtained by the underlying OpenCL platforms in a centralized, automated manner.<br/>
+ * 
+ * @author CoreRasurae
+ */
+public interface IOpenCLDeviceConfigurator {
+	
+	/**
+	 * Called for each OpenCL device detected by Aparapi.<br/>
+	 * Gives the client application an opportunity to refine the device configuration parameters.
+	 * 
+	 * @param device the OpenCL device to be configured
+	 */
+	public void configure(OpenCLDevice device);
+	
+}
diff --git a/src/main/java/com/aparapi/device/OpenCLDevice.java b/src/main/java/com/aparapi/device/OpenCLDevice.java
index a105540e..823f82e3 100644
--- a/src/main/java/com/aparapi/device/OpenCLDevice.java
+++ b/src/main/java/com/aparapi/device/OpenCLDevice.java
@@ -15,8 +15,6 @@
  */
 package com.aparapi.device;
 
-import com.aparapi.opencl.OpenCL.Kernel;
-
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
@@ -29,6 +27,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
 
 import com.aparapi.Range;
 import com.aparapi.internal.opencl.OpenCLArgDescriptor;
@@ -41,11 +40,14 @@ import com.aparapi.opencl.OpenCL.Constant;
 import com.aparapi.opencl.OpenCL.GlobalReadOnly;
 import com.aparapi.opencl.OpenCL.GlobalReadWrite;
 import com.aparapi.opencl.OpenCL.GlobalWriteOnly;
+import com.aparapi.opencl.OpenCL.Kernel;
 import com.aparapi.opencl.OpenCL.Local;
 import com.aparapi.opencl.OpenCL.Resource;
 import com.aparapi.opencl.OpenCL.Source;
 
 public class OpenCLDevice extends Device implements Comparable<Device> {
+	
+   private static IOpenCLDeviceConfigurator configurator = null;
 
    private final OpenCLPlatform platform;
 
@@ -58,11 +60,22 @@ public class OpenCLDevice extends Device implements Comparable<Device> {
    private long globalMemSize;
 
    private long maxMemAllocSize;
+   
+   private boolean sharedMemory = true;
 
    private String shortDescription = null;
 
    private String name = null;
 
+   private AtomicBoolean underConfiguration = new AtomicBoolean(false);
+   /**
+    * Statically assigns a new configurator for all OpenCL devices detected after the assignment. 
+    * @param _configurator the configurator instance
+    */
+   public static void setConfigurator(IOpenCLDeviceConfigurator _configurator) {
+	   configurator = _configurator;
+   }
+   
    /**
     * Minimal constructor
     *
@@ -112,7 +125,7 @@ public class OpenCLDevice extends Device implements Comparable<Device> {
       globalMemSize = _globalMemSize;
    }
 
-   void setMaxWorkItemSize(int _dim, int _value) {
+   public void setMaxWorkItemSize(int _dim, int _value) {
       maxWorkItemSize[_dim] = _value;
    }
 
@@ -124,10 +137,50 @@ public class OpenCLDevice extends Device implements Comparable<Device> {
     this.name = name;
   }
 
+  /**
+   * Called by the underlying Aparapi OpenCL platform, upon device
+   * detection.
+   */
+  public void configure() {
+	  if (configurator != null && !underConfiguration.get()) {
+		 if (underConfiguration.compareAndSet(false, true)) {
+			 configurator.configure(this);
+			 underConfiguration.set(false);
+		 }
+	  }
+  }
+  
   @Override
   public long getDeviceId() {
       return (deviceId);
    }
+  
+  /**
+   * Configure if device has the memory shared with the host memory.
+   * <br/>
+   * <b>Note1: </b>For discrete GPU devices having dedicated memory, 
+   * thus not shared with host, this should be set to false. This can result
+   * in significant kernel execution speed-ups for such HW configurations.
+   * Aparapi is unable to detect this property automatically for all devices, unless
+   * the client application provides a configurator ({@link #IOpenCLDeviceConfigurator}). 
+   * <br/> 
+   * <b>Note2: </b>By default devices are initialized has having shared memory - to maintain
+   * backwards compatibility - unless Aparapi can unequivocally identify the device.
+   * @param _sharedMemory <ul><li>true, if OpenCL device has the memory shared with the host memory</li>
+   *             <li>false, if OpenCL device is a discrete unit, having dedicated memory, thus not shared with host</li></ul>
+   */
+  public void setSharedMemory(boolean _sharedMemory) {
+	  sharedMemory = _sharedMemory;
+  }
+  
+  /**
+   * Retrieves the shared memory flag 
+   * @return <ul><li>true, if OpenCL device has the memory shared with the host memory</li>
+   *             <li>false, if OpenCL device is a discrete unit, having dedicated memory, thus not shared with host</li></ul>   
+   */
+  public boolean isSharedMemory() {
+	  return sharedMemory;
+  }
 
    @Override
    public String getShortDescription() {
diff --git a/src/test/java/com/aparapi/runtime/OpenCLDeviceConfiguratorTest.java b/src/test/java/com/aparapi/runtime/OpenCLDeviceConfiguratorTest.java
new file mode 100644
index 00000000..942dd2ef
--- /dev/null
+++ b/src/test/java/com/aparapi/runtime/OpenCLDeviceConfiguratorTest.java
@@ -0,0 +1,96 @@
+/**
+ * Copyright (c) 2016 - 2018 Syncleus, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.aparapi.runtime;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeTrue;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.junit.Test;
+
+import com.aparapi.device.Device;
+import com.aparapi.device.IOpenCLDeviceConfigurator;
+import com.aparapi.device.OpenCLDevice;
+import com.aparapi.internal.kernel.KernelManager;
+import com.aparapi.internal.opencl.OpenCLPlatform;
+
+/**
+ * Tests for feature OpenCLDeviceConfigurator set 1/2 
+ * 
+ * @author CoreRasurae
+ */
+public class OpenCLDeviceConfiguratorTest {
+    private static OpenCLDevice openCLDevice = null;
+
+    private class CLKernelManager extends KernelManager {
+    	@Override
+    	protected List<Device.TYPE> getPreferredDeviceTypes() {
+    		return Arrays.asList(Device.TYPE.ACC, Device.TYPE.GPU, Device.TYPE.CPU);
+    	}
+    }
+        
+    public void setUp() throws Exception {
+    	KernelManager.setKernelManager(new CLKernelManager());
+        Device device = KernelManager.instance().bestDevice();
+        if (device == null || !(device instanceof OpenCLDevice)) {
+        	System.out.println("!!!No OpenCLDevice available for running the integration test");
+        }
+        assumeTrue (device != null && device instanceof OpenCLDevice);
+        openCLDevice = (OpenCLDevice) device;
+    }
+
+    public void setUpWithConfigurator(IOpenCLDeviceConfigurator configurator) throws Exception {
+    	OpenCLDevice.setConfigurator(configurator);
+    	setUp();
+    }
+    
+    @Test
+    public void configuratorCallbackTest() throws Exception {
+    	final AtomicInteger callCounter = new AtomicInteger(0);
+    	IOpenCLDeviceConfigurator configurator = new IOpenCLDeviceConfigurator() {
+			@Override
+			public void configure(OpenCLDevice device) {
+				callCounter.incrementAndGet();
+				device.setName("Configured");
+				device.setSharedMemory(false);
+			}
+    	};
+    	setUpWithConfigurator(configurator);
+    	assertTrue("Number of configured devices should be > 0", callCounter.get() > 0);
+    	int numberOfConfiguredDevices = callCounter.get();
+    	
+		assertFalse("Device isShareMempory() should return false", openCLDevice.isSharedMemory());
+		assertEquals("Device name should be \"Configured\"", "Configured", openCLDevice.getName());   
+    	
+    	int numberOfDevices = 0;
+    	List<OpenCLPlatform> platforms = OpenCLPlatform.getUncachedOpenCLPlatforms();
+    	for (OpenCLPlatform platform : platforms) {
+    		for (OpenCLDevice device : platform.getOpenCLDevices()) {
+    			assertFalse("Device isShareMempory() should return false", device.isSharedMemory());
+    			assertEquals("Device name should be \"Configured\"", "Configured", device.getName());
+    			numberOfDevices++;
+    		}
+    	}
+
+    	assertEquals("Number of configured devices should match numnber of devices", numberOfDevices, numberOfConfiguredDevices);
+    	assertEquals("Number of calls doesn't match the expected", numberOfDevices*2, callCounter.get());
+    }
+ }
diff --git a/src/test/java/com/aparapi/runtime/OpenCLDeviceNoConfiguratorTest.java b/src/test/java/com/aparapi/runtime/OpenCLDeviceNoConfiguratorTest.java
new file mode 100644
index 00000000..b640d5de
--- /dev/null
+++ b/src/test/java/com/aparapi/runtime/OpenCLDeviceNoConfiguratorTest.java
@@ -0,0 +1,90 @@
+/**
+ * Copyright (c) 2016 - 2018 Syncleus, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.aparapi.runtime;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assume.assumeTrue;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import org.junit.Test;
+
+import com.aparapi.device.Device;
+import com.aparapi.device.IOpenCLDeviceConfigurator;
+import com.aparapi.device.OpenCLDevice;
+import com.aparapi.internal.kernel.KernelManager;
+import com.aparapi.internal.opencl.OpenCLPlatform;
+
+/**
+ * Tests for feature OpenCLDeviceConfigurator set 2/2 
+ * 
+ * @author CoreRasurae
+ */
+public class OpenCLDeviceNoConfiguratorTest {
+    private static OpenCLDevice openCLDevice = null;
+
+    private class CLKernelManager extends KernelManager {
+    	@Override
+    	protected List<Device.TYPE> getPreferredDeviceTypes() {
+    		return Arrays.asList(Device.TYPE.ACC, Device.TYPE.GPU, Device.TYPE.CPU);
+    	}
+    }
+        
+    public void setUp() throws Exception {
+    	KernelManager.setKernelManager(new CLKernelManager());
+        Device device = KernelManager.instance().bestDevice();
+        if (device == null || !(device instanceof OpenCLDevice)) {
+        	System.out.println("!!!No OpenCLDevice available for running the integration test");
+        }
+        assumeTrue (device != null && device instanceof OpenCLDevice);
+        openCLDevice = (OpenCLDevice) device;
+    }
+
+    @Test
+    public void noConfiguratorTest() throws Exception {
+    	setUp();
+    	assertTrue("Device isShareMempory() should return true", openCLDevice.isSharedMemory());
+		assertNotEquals("Device name should not be \"Configured\"", "Configured", openCLDevice.getName());
+    	List<OpenCLPlatform> platforms = OpenCLPlatform.getUncachedOpenCLPlatforms();
+    	for (OpenCLPlatform platform : platforms) {
+    		for (OpenCLDevice device : platform.getOpenCLDevices()) {
+    			assertTrue("Device isSharedMempory() should return true", device.isSharedMemory());
+    			assertNotEquals("Device name should not be \"Configured\"", "Configured", device.getName());
+    		}
+    	}
+    }
+    
+    @Test
+    public void protectionAgainstRecursiveConfiguresTest() {
+    	OpenCLDevice dev = new OpenCLDevice(null, 101L, Device.TYPE.CPU);
+    	final AtomicInteger callCounter = new AtomicInteger(0);
+    	IOpenCLDeviceConfigurator configurator = new IOpenCLDeviceConfigurator() {
+			@Override
+			public void configure(OpenCLDevice device) {
+				callCounter.incrementAndGet();
+				device.configure();
+			}
+    	};
+    	OpenCLDevice.setConfigurator(configurator);
+    	dev.configure();
+    	
+    	assertEquals("Number of confgure() calls should be one", 1, callCounter.get());
+    }
+}
-- 
GitLab