From 0f36f9d1fcd55e46204fbdb245c9f479c9093702 Mon Sep 17 00:00:00 2001
From: brenzosa <brenzosa@gmail.com>
Date: Sun, 16 Jul 2017 10:49:53 +0800
Subject: [PATCH] New: Added Kernel.fma(float, float, float)

Also added Kernel.fma(double, double, double)
Added FusedMultiplyAddTest
---
 src/main/java/com/aparapi/Kernel.java         | 34 +++++++
 .../codegen/test/FusedMultiplyAdd.java        | 29 ++++++
 .../codegen/test/FusedMultiplyAddTest.java    | 88 +++++++++++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 src/test/java/com/aparapi/codegen/test/FusedMultiplyAdd.java
 create mode 100644 src/test/java/com/aparapi/codegen/test/FusedMultiplyAddTest.java

diff --git a/src/main/java/com/aparapi/Kernel.java b/src/main/java/com/aparapi/Kernel.java
index a4a49e43..f697ff4a 100644
--- a/src/main/java/com/aparapi/Kernel.java
+++ b/src/main/java/com/aparapi/Kernel.java
@@ -1999,6 +1999,40 @@ public abstract class Kernel implements Cloneable {
         return a * b + c;
     }
 
+    /**
+     * Delegates to either {code}a*b+c{code} (Java) or <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(float, float, float)</a></code> (OpenCL).
+      *
+      * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
+      *
+      * @param a value to delegate to first argument of <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(float, float, float)</a></code>
+      * @param b value to delegate to second argument of <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(float, float, float)</a></code>
+      * @param c value to delegate to third argument of <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(float, float, float)</a></code>
+      * @return a * b + c / <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(float, float, float)</a></code>
+      *
+      * @see <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(float, float, float)</a></code>
+      */
+    @OpenCLMapping(mapTo = "fma")
+    protected float fma(final float a, final float b, final float c) {
+       return a * b + c;
+    }
+
+    /**
+     * Delegates to either {code}a*b+c{code} (Java) or <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(double, double, double)</a></code> (OpenCL).
+      *
+      * User should note the differences in precision between Java and OpenCL's implementation of arithmetic functions to determine whether the difference in precision is acceptable.
+      *
+      * @param a value to delegate to first argument of <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(double, double, double)</a></code>
+      * @param b value to delegate to second argument of <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(double, double, double)</a></code>
+      * @param c value to delegate to third argument of <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(double, double, double)</a></code>
+      * @return a * b + c / <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(double, double, double)</a></code>
+      *
+      * @see <code><a href="http://www.khronos.org/registry/OpenCL/sdk/1.1/docs/man/xhtml/fma.html">fma(double, double, double)</a></code>
+      */
+    @OpenCLMapping(mapTo = "fma")
+    protected double fma(final double a, final double b, final double c) {
+       return a * b + c;
+    }
+
     @OpenCLMapping(mapTo = "nextafter")
     protected final double nextAfter(final double start, final double direction) {
         return Math.nextAfter(start, direction);
diff --git a/src/test/java/com/aparapi/codegen/test/FusedMultiplyAdd.java b/src/test/java/com/aparapi/codegen/test/FusedMultiplyAdd.java
new file mode 100644
index 00000000..ca1da6ac
--- /dev/null
+++ b/src/test/java/com/aparapi/codegen/test/FusedMultiplyAdd.java
@@ -0,0 +1,29 @@
+/**
+ * Copyright (c) 2016 - 2017 Syncleus, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.aparapi.codegen.test;
+
+import com.aparapi.Kernel;
+
+public class FusedMultiplyAdd extends Kernel {
+    public void run() {
+        double d1 = 123.0, d2 = 0.456, d3 = 789.0;
+        float f1 = 123.0f, f2 = 0.456f, f3 = 789.0f;
+
+        @SuppressWarnings("unused") boolean pass = true;
+        if ((fma(d1, d2, d3) != 845.088) || (fma(f1, f2, f3) != 845.088f))
+            pass = false;
+    }
+}
\ No newline at end of file
diff --git a/src/test/java/com/aparapi/codegen/test/FusedMultiplyAddTest.java b/src/test/java/com/aparapi/codegen/test/FusedMultiplyAddTest.java
new file mode 100644
index 00000000..d4c79f08
--- /dev/null
+++ b/src/test/java/com/aparapi/codegen/test/FusedMultiplyAddTest.java
@@ -0,0 +1,88 @@
+/**
+ * Copyright (c) 2016 - 2017 Syncleus, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.aparapi.codegen.test;
+
+import org.junit.Test;
+
+public class FusedMultiplyAddTest extends com.aparapi.codegen.CodeGenJUnitBase {
+    private static final String[] expectedOpenCL = {"#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" +
+        "typedef struct This_s{\n" +
+        " int passid;\n" +
+        "}This;\n" +
+        "int get_pass_id(This *this){\n" +
+        "   return this->passid;\n" +
+        "}\n" +
+        "__kernel void run(\n" +
+        "   int passid\n" +
+        "){\n" +
+        "   This thisStruct;\n" +
+        "   This* this=&thisStruct;\n" +
+        "   this->passid = passid;\n" +
+        "   {\n" +
+        "      double d1 = 123.0;\n" +
+        "      double d2 = 0.456;\n" +
+        "      double d3 = 789.0;\n" +
+        "      float f1 = 123.0f;\n" +
+        "      float f2 = 0.456f;\n" +
+        "      float f3 = 789.0f;\n" +
+        "      char pass = 1;\n" +
+        "      if (fma(d1, d2, d3)!=845.088 || fma(f1, f2, f3)!=845.088f){\n" +
+        "         pass = 0;\n" +
+        "      }\n" +
+        "      return;\n" +
+        "   }\n" +
+        "}"};
+    private static final Class<? extends com.aparapi.internal.exception.AparapiException> expectedException = null;
+
+    @Test
+    public void FusedMultiplyAddTest() {
+        test(com.aparapi.codegen.test.FusedMultiplyAdd.class, expectedException, expectedOpenCL);
+    }
+
+    @Test
+    public void FusedMultiplyAddTestWorksWithCaching() {
+        test(com.aparapi.codegen.test.FusedMultiplyAdd.class, expectedException, expectedOpenCL);
+    }
+}
+/**{OpenCL{
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+typedef struct This_s{
+   int passid;
+}This;
+int get_pass_id(This *this){
+   return this->passid;
+}
+__kernel void run(
+   int passid
+){
+   This thisStruct;
+   This* this=&thisStruct;
+   this->passid = passid;
+   {
+      double d1 = 123.0;
+      double d2 = 0.456;
+      double d3 = 789.0;
+      float f1 = 123.0f;
+      float f2 = 0.456f;
+      float f3 = 789.0f;
+      char pass = 1;
+      if (fma(d1, d2, d3)!=845.088 || fma(f1, f2, f3)!=845.088f){
+         pass = 0;
+      }
+      return;
+   }
+}OpenCL}**/
-- 
GitLab