diff --git a/src/main/java/com/aparapi/examples/mdarray/MDArray.java b/src/main/java/com/aparapi/examples/mdarray/MDArray.java index ff08012e71e9cc64f8c91fcd2640937f7cd7464f..7414b7ea00ece84edecf08160f6a81788bb64bb0 100644 --- a/src/main/java/com/aparapi/examples/mdarray/MDArray.java +++ b/src/main/java/com/aparapi/examples/mdarray/MDArray.java @@ -1,1291 +1,1291 @@ -/** - * This product currently only contains code developed by authors - * of specific components, as identified by the source code files. - * - * Since product implements StAX API, it has dependencies to StAX API - * classes. - * - * For additional credits (generally to people who reported problems) - * see CREDITS file. - */ -package com.aparapi.examples.mdarray; - -import com.aparapi.Kernel; - -public class MDArray { - - static int N = 1 << 10; - - static int M = 1 << 5; - - public static void main(String[] args) { - System.out.println("boolean 1D"); - Zrun1D(); - System.out.println("byte 1D"); - Brun1D(); - System.out.println("short 1D"); - Srun1D(); - System.out.println("int 1D"); - Irun1D(); - System.out.println("long 1D"); - Lrun1D(); - System.out.println("float 1D"); - Frun1D(); - System.out.println("double 1D"); - Drun1D(); - System.out.println("boolean 2D"); - Zrun2D(); - System.out.println("byte 2D"); - Brun2D(); - System.out.println("short 2D"); - Srun2D(); - System.out.println("int 2D"); - Irun2D(); - System.out.println("long 2D"); - Lrun2D(); - System.out.println("float 2D"); - Frun2D(); - System.out.println("double 2D"); - Drun2D(); - System.out.println("boolean 3D"); - Zrun3D(); - System.out.println("byte 3D"); - Brun3D(); - System.out.println("short 3D"); - Srun3D(); - System.out.println("int 3D"); - Irun3D(); - System.out.println("long 3D"); - Lrun3D(); - System.out.println("float 3D"); - Frun3D(); - System.out.println("double 3D"); - Drun3D(); - } - - private static boolean[] matMull(boolean[] A, boolean[] B, int N) { - final boolean[] C = new boolean[N * N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[(i * N) + j] ^= A[(i * N) + k] & B[(k * N) + j]; - } - } - } - return C; - } - - private static byte[] matMull(byte[] A, byte[] B, int N) { - final byte[] C = new byte[N * N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[(i * N) + j] += (byte) (A[(i * N) + k] * B[(k * N) + j]); - } - } - } - return C; - } - - private static short[] matMull(short[] A, short[] B, int N) { - final short[] C = new short[N * N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[(i * N) + j] += (short) (A[(i * N) + k] * B[(k * N) + j]); - } - } - } - return C; - } - - private static int[] matMull(int[] A, int[] B, int N) { - final int[] C = new int[N * N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[(i * N) + j] += A[(i * N) + k] * B[(k * N) + j]; - } - } - } - return C; - } - - private static long[] matMull(long[] A, long[] B, int N) { - final long[] C = new long[N * N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[(i * N) + j] += A[(i * N) + k] * B[(k * N) + j]; - } - } - } - return C; - } - - private static float[] matMull(float[] A, float[] B, int N) { - final float[] C = new float[N * N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[(i * N) + j] += A[(i * N) + k] * B[(k * N) + j]; - } - } - } - return C; - } - - private static double[] matMull(double[] A, double[] B, int N) { - final double[] C = new double[N * N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[(i * N) + j] += A[(i * N) + k] * B[(k * N) + j]; - } - } - } - return C; - } - - private static boolean[][] matMull(boolean[][] A, boolean[][] B, int N) { - final boolean[][] C = new boolean[N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[i][j] ^= A[i][k] & B[k][j]; - } - } - } - return C; - } - - private static byte[][] matMull(byte[][] A, byte[][] B, int N) { - final byte[][] C = new byte[N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[i][j] += (byte) (A[i][k] * B[k][j]); - } - } - } - return C; - } - - private static short[][] matMull(short[][] A, short[][] B, int N) { - final short[][] C = new short[N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[i][j] += (short) (A[i][k] * B[k][j]); - } - } - } - return C; - } - - private static int[][] matMull(int[][] A, int[][] B, int N) { - final int[][] C = new int[N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[i][j] += A[i][k] * B[k][j]; - } - } - } - return C; - } - - private static long[][] matMull(long[][] A, long[][] B, int N) { - final long[][] C = new long[N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[i][j] += A[i][k] * B[k][j]; - } - } - } - return C; - } - - private static float[][] matMull(float[][] A, float[][] B, int N) { - final float[][] C = new float[N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[i][j] += A[i][k] * B[k][j]; - } - } - } - return C; - } - - private static double[][] matMull(double[][] A, double[][] B, int N) { - final double[][] C = new double[N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - C[i][j] += A[i][k] * B[k][j]; - } - } - } - return C; - } - - private static boolean[][][] matMull(boolean[][][] A, boolean[][][] B, int N) { - final boolean[][][] C = new boolean[N][N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - for (int l = 0; l < N; l++) { - C[i][j][k] ^= A[i][j][l] & B[l][j][k]; - } - } - } - } - return C; - } - - private static byte[][][] matMull(byte[][][] A, byte[][][] B, int N) { - final byte[][][] C = new byte[N][N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - for (int l = 0; l < N; l++) { - C[i][j][k] += (byte) (A[i][j][l] * B[l][j][k]); - } - } - } - } - return C; - } - - private static short[][][] matMull(short[][][] A, short[][][] B, int N) { - final short[][][] C = new short[N][N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - for (int l = 0; l < N; l++) { - C[i][j][k] += (short) (A[i][j][l] * B[l][j][k]); - } - } - } - } - return C; - } - - private static int[][][] matMull(int[][][] A, int[][][] B, int N) { - final int[][][] C = new int[N][N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - for (int l = 0; l < N; l++) { - C[i][j][k] += A[i][j][l] * B[l][j][k]; - } - } - } - } - return C; - } - - private static long[][][] matMull(long[][][] A, long[][][] B, int N) { - final long[][][] C = new long[N][N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - for (int l = 0; l < N; l++) { - C[i][j][k] += A[i][j][l] * B[l][j][k]; - } - } - } - } - return C; - } - - private static float[][][] matMull(float[][][] A, float[][][] B, int N) { - final float[][][] C = new float[N][N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - for (int l = 0; l < N; l++) { - C[i][j][k] += A[i][j][l] * B[l][j][k]; - } - } - } - } - return C; - } - - private static double[][][] matMull(double[][][] A, double[][][] B, int N) { - final double[][][] C = new double[N][N][N]; - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - for (int k = 0; k < N; k++) { - for (int l = 0; l < N; l++) { - C[i][j][k] += A[i][j][l] * B[l][j][k]; - } - } - } - } - return C; - } - - private static boolean checkResults(boolean[] cpu, boolean[] gpu) { - for (int i = 0; i < cpu.length; i++) { - if (cpu[i] != gpu[i]) { - return false; - } - } - return true; - } - - private static boolean checkResults(byte[] cpu, byte[] gpu) { - for (int i = 0; i < cpu.length; i++) { - if (cpu[i] != gpu[i]) { - return false; - } - } - return true; - } - - private static boolean checkResults(short[] cpu, short[] gpu) { - for (int i = 0; i < cpu.length; i++) { - if (cpu[i] != gpu[i]) { - return false; - } - } - return true; - } - - private static boolean checkResults(int[] cpu, int[] gpu) { - for (int i = 0; i < cpu.length; i++) { - if (cpu[i] != gpu[i]) { - return false; - } - } - return true; - } - - private static boolean checkResults(long[] cpu, long[] gpu) { - for (int i = 0; i < cpu.length; i++) { - if (cpu[i] != gpu[i]) { - return false; - } - } - return true; - } - - private static boolean checkResults(float[] cpu, float[] gpu) { - for (int i = 0; i < cpu.length; i++) { - if (cpu[i] != gpu[i]) { - return false; - } - } - return true; - } - - private static boolean checkResults(double[] cpu, double[] gpu) { - for (int i = 0; i < cpu.length; i++) { - if (cpu[i] != gpu[i]) { - return false; - } - } - return true; - } - - private static boolean checkResults(boolean[][] cpu, boolean[][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - if (cpu[i][j] != gpu[i][j]) { - return false; - } - } - } - return true; - } - - private static boolean checkResults(byte[][] cpu, byte[][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - if (cpu[i][j] != gpu[i][j]) { - return false; - } - } - } - return true; - } - - private static boolean checkResults(short[][] cpu, short[][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - if (cpu[i][j] != gpu[i][j]) { - return false; - } - } - } - return true; - } - - private static boolean checkResults(int[][] cpu, int[][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - if (cpu[i][j] != gpu[i][j]) { - return false; - } - } - } - return true; - } - - private static boolean checkResults(long[][] cpu, long[][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - if (cpu[i][j] != gpu[i][j]) { - return false; - } - } - } - return true; - } - - private static boolean checkResults(float[][] cpu, float[][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - if (cpu[i][j] != gpu[i][j]) { - return false; - } - } - } - return true; - } - - private static boolean checkResults(double[][] cpu, double[][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - if (cpu[i][j] != gpu[i][j]) { - return false; - } - } - } - return true; - } - - private static boolean checkResults(boolean[][][] cpu, boolean[][][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - for (int k = 0; k < cpu[i][j].length; k++) { - if (cpu[i][j][k] != gpu[i][j][k]) { - return false; - } - } - } - } - return true; - } - - private static boolean checkResults(byte[][][] cpu, byte[][][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - for (int k = 0; k < cpu[i][j].length; k++) { - if (cpu[i][j][k] != gpu[i][j][k]) { - return false; - } - } - } - } - return true; - } - - private static boolean checkResults(short[][][] cpu, short[][][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - for (int k = 0; k < cpu[i][j].length; k++) { - if (cpu[i][j][k] != gpu[i][j][k]) { - return false; - } - } - } - } - return true; - } - - private static boolean checkResults(int[][][] cpu, int[][][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - for (int k = 0; k < cpu[i][j].length; k++) { - if (cpu[i][j][k] != gpu[i][j][k]) { - return false; - } - } - } - } - return true; - } - - private static boolean checkResults(long[][][] cpu, long[][][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - for (int k = 0; k < cpu[i][j].length; k++) { - if (cpu[i][j][k] != gpu[i][j][k]) { - return false; - } - } - } - } - return true; - } - - private static boolean checkResults(float[][][] cpu, float[][][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - for (int k = 0; k < cpu[i][j].length; k++) { - if (cpu[i][j][k] != gpu[i][j][k]) { - return false; - } - } - } - } - return true; - } - - private static boolean checkResults(double[][][] cpu, double[][][] gpu) { - for (int i = 0; i < cpu.length; i++) { - for (int j = 0; j < cpu[i].length; j++) { - for (int k = 0; k < cpu[i][j].length; k++) { - if (cpu[i][j][k] != gpu[i][j][k]) { - return false; - } - } - } - } - return true; - } - - public static void Zrun1D() { - final boolean[] A = new boolean[N * N]; - final boolean[] B = new boolean[N * N]; - final boolean[] gpu = new boolean[N * N]; - boolean[] cpu = new boolean[N * N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[(i * N) + j] = ((i % 2) == 0) ^ ((j % 2) == 0); - B[(i * N) + j] = ((i % 2) == 0) & ((j % 2) == 0); - cpu[(i * N) + j] = false; - gpu[(i * N) + j] = false; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new ZMatMul1D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Brun1D() { - final byte[] A = new byte[N * N]; - final byte[] B = new byte[N * N]; - final byte[] gpu = new byte[N * N]; - byte[] cpu = new byte[N * N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[(i * N) + j] = (byte) (i + j); - B[(i * N) + j] = (byte) (i - j); - cpu[(i * N) + j] = (byte) 0; - gpu[(i * N) + j] = (byte) 0; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new BMatMul1D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Srun1D() { - final short[] A = new short[N * N]; - final short[] B = new short[N * N]; - final short[] gpu = new short[N * N]; - short[] cpu = new short[N * N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[(i * N) + j] = (short) (i + j); - B[(i * N) + j] = (short) (i - j); - cpu[(i * N) + j] = (short) 0; - gpu[(i * N) + j] = (short) 0; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new SMatMul1D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Irun1D() { - final int[] A = new int[N * N]; - final int[] B = new int[N * N]; - final int[] gpu = new int[N * N]; - int[] cpu = new int[N * N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[(i * N) + j] = i + j; - B[(i * N) + j] = i - j; - cpu[(i * N) + j] = 0; - gpu[(i * N) + j] = 0; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new IMatMul1D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Lrun1D() { - final long[] A = new long[N * N]; - final long[] B = new long[N * N]; - final long[] gpu = new long[N * N]; - long[] cpu = new long[N * N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[(i * N) + j] = i + j; - B[(i * N) + j] = i - j; - cpu[(i * N) + j] = 0l; - gpu[(i * N) + j] = 0l; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new LMatMul1D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Frun1D() { - final float[] A = new float[N * N]; - final float[] B = new float[N * N]; - final float[] gpu = new float[N * N]; - float[] cpu = new float[N * N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[(i * N) + j] = i + j; - B[(i * N) + j] = i - j; - cpu[(i * N) + j] = 0.0f; - gpu[(i * N) + j] = 0.0f; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new FMatMul1D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Drun1D() { - final double[] A = new double[N * N]; - final double[] B = new double[N * N]; - final double[] gpu = new double[N * N]; - double[] cpu = new double[N * N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[(i * N) + j] = i + j; - B[(i * N) + j] = i - j; - cpu[(i * N) + j] = 0.0; - gpu[(i * N) + j] = 0.0; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new DMatMul1D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Zrun2D() { - final boolean[][] A = new boolean[N][N]; - final boolean[][] B = new boolean[N][N]; - final boolean[][] gpu = new boolean[N][N]; - boolean[][] cpu = new boolean[N][N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[i][j] = ((i % 2) == 0) ^ ((j % 2) == 0); - B[i][j] = ((i % 2) == 0) & ((j % 2) == 0); - cpu[i][j] = false; - gpu[i][j] = false; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new ZMatMul2D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Brun2D() { - final byte[][] A = new byte[N][N]; - final byte[][] B = new byte[N][N]; - final byte[][] gpu = new byte[N][N]; - byte[][] cpu = new byte[N][N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[i][j] = (byte) (i + j); - B[i][j] = (byte) (i - j); - cpu[i][j] = (byte) 0; - gpu[i][j] = (byte) 0; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new BMatMul2D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Srun2D() { - final short[][] A = new short[N][N]; - final short[][] B = new short[N][N]; - final short[][] gpu = new short[N][N]; - short[][] cpu = new short[N][N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[i][j] = (short) (i + j); - B[i][j] = (short) (i - j); - cpu[i][j] = (short) 0; - gpu[i][j] = (short) 0; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new SMatMul2D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Irun2D() { - final int[][] A = new int[N][N]; - final int[][] B = new int[N][N]; - final int[][] gpu = new int[N][N]; - int[][] cpu = new int[N][N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[i][j] = i + j; - B[i][j] = i - j; - cpu[i][j] = 0; - gpu[i][j] = 0; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new IMatMul2D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Lrun2D() { - final long[][] A = new long[N][N]; - final long[][] B = new long[N][N]; - final long[][] gpu = new long[N][N]; - long[][] cpu = new long[N][N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[i][j] = i + j; - B[i][j] = i - j; - cpu[i][j] = 0l; - gpu[i][j] = 0l; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new LMatMul2D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Frun2D() { - final float[][] A = new float[N][N]; - final float[][] B = new float[N][N]; - final float[][] gpu = new float[N][N]; - float[][] cpu = new float[N][N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[i][j] = i + j; - B[i][j] = i - j; - cpu[i][j] = 0.0f; - gpu[i][j] = 0.0f; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new FMatMul2D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Drun2D() { - final double[][] A = new double[N][N]; - final double[][] B = new double[N][N]; - final double[][] gpu = new double[N][N]; - double[][] cpu = new double[N][N]; - - for (int i = 0; i < N; i++) { - for (int j = 0; j < N; j++) { - A[i][j] = i + j; - B[i][j] = i - j; - cpu[i][j] = 0.0; - gpu[i][j] = 0.0; - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new DMatMul2D(A, B, gpu, N); - kernel.execute(N * N); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, N); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Zrun3D() { - final boolean[][][] A = new boolean[M][M][M]; - final boolean[][][] B = new boolean[M][M][M]; - final boolean[][][] gpu = new boolean[M][M][M]; - boolean[][][] cpu = new boolean[M][M][M]; - - for (int i = 0; i < M; i++) { - for (int j = 0; j < M; j++) { - for (int k = 0; k < M; k++) { - A[i][j][k] = ((i % 2) == 0) ^ (((j % 2) == 0) & ((k % 2) == 0)); - B[i][j][k] = (((i % 2) == 0) & ((j % 2) == 0)) ^ ((k % 2) == 0); - ; - cpu[i][j][k] = false; - gpu[i][j][k] = false; - } - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new ZMatMul3D(A, B, gpu, M); - kernel.execute(M * M * M); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, M); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Brun3D() { - final byte[][][] A = new byte[M][M][M]; - final byte[][][] B = new byte[M][M][M]; - final byte[][][] gpu = new byte[M][M][M]; - byte[][][] cpu = new byte[M][M][M]; - - for (int i = 0; i < M; i++) { - for (int j = 0; j < M; j++) { - for (int k = 0; k < M; k++) { - A[i][j][k] = (byte) (i + j + k); - B[i][j][k] = (byte) ((i - j) + k); - cpu[i][j][k] = (byte) 0; - gpu[i][j][k] = (byte) 0; - } - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new BMatMul3D(A, B, gpu, M); - kernel.execute(M * M * M); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, M); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Srun3D() { - final short[][][] A = new short[M][M][M]; - final short[][][] B = new short[M][M][M]; - final short[][][] gpu = new short[M][M][M]; - short[][][] cpu = new short[M][M][M]; - - for (int i = 0; i < M; i++) { - for (int j = 0; j < M; j++) { - for (int k = 0; k < M; k++) { - A[i][j][k] = (short) (i + j + k); - B[i][j][k] = (short) ((i - j) + k); - cpu[i][j][k] = (short) 0; - gpu[i][j][k] = (short) 0; - } - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new SMatMul3D(A, B, gpu, M); - kernel.execute(M * M * M); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, M); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Irun3D() { - final int[][][] A = new int[M][M][M]; - final int[][][] B = new int[M][M][M]; - final int[][][] gpu = new int[M][M][M]; - int[][][] cpu = new int[M][M][M]; - - for (int i = 0; i < M; i++) { - for (int j = 0; j < M; j++) { - for (int k = 0; k < M; k++) { - A[i][j][k] = i + j + k; - B[i][j][k] = (i - j) + k; - cpu[i][j][k] = 0; - gpu[i][j][k] = 0; - } - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new IMatMul3D(A, B, gpu, M); - kernel.execute(M * M * M); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, M); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Lrun3D() { - final long[][][] A = new long[M][M][M]; - final long[][][] B = new long[M][M][M]; - final long[][][] gpu = new long[M][M][M]; - long[][][] cpu = new long[M][M][M]; - - for (int i = 0; i < M; i++) { - for (int j = 0; j < M; j++) { - for (int k = 0; k < M; k++) { - A[i][j][k] = i + j + k; - B[i][j][k] = (i - j) + k; - cpu[i][j][k] = 0l; - gpu[i][j][k] = 0l; - } - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new LMatMul3D(A, B, gpu, M); - kernel.execute(M * M * M); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, M); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Frun3D() { - final float[][][] A = new float[M][M][M]; - final float[][][] B = new float[M][M][M]; - final float[][][] gpu = new float[M][M][M]; - float[][][] cpu = new float[M][M][M]; - - for (int i = 0; i < M; i++) { - for (int j = 0; j < M; j++) { - for (int k = 0; k < M; k++) { - A[i][j][k] = i + j + k; - B[i][j][k] = (i - j) + k; - cpu[i][j][k] = 0.0f; - gpu[i][j][k] = 0.0f; - } - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new FMatMul3D(A, B, gpu, M); - kernel.execute(M * M * M); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, M); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } - - public static void Drun3D() { - final double[][][] A = new double[M][M][M]; - final double[][][] B = new double[M][M][M]; - final double[][][] gpu = new double[M][M][M]; - double[][][] cpu = new double[M][M][M]; - - for (int i = 0; i < M; i++) { - for (int j = 0; j < M; j++) { - for (int k = 0; k < M; k++) { - A[i][j][k] = i + j + k; - B[i][j][k] = (i - j) + k; - cpu[i][j][k] = 0.0; - gpu[i][j][k] = 0.0; - } - } - } - - long gs = System.currentTimeMillis(); - final Kernel kernel = new DMatMul3D(A, B, gpu, M); - kernel.execute(M * M * M); - gs = System.currentTimeMillis() - gs; - - long cs = System.currentTimeMillis(); - cpu = matMull(A, B, M); - cs = System.currentTimeMillis() - cs; - - System.out.println("gpu time: " + gs + "\ncpu time: " + cs); - System.out.print("valid? "); - - if (checkResults(cpu, gpu)) { - System.out.println("yes"); - } else { - System.out.println("no"); - } - } -} +/** + * This product currently only contains code developed by authors + * of specific components, as identified by the source code files. + * + * Since product implements StAX API, it has dependencies to StAX API + * classes. + * + * For additional credits (generally to people who reported problems) + * see CREDITS file. + */ +package com.aparapi.examples.mdarray; + +import com.aparapi.Kernel; + +public class MDArray { + + static int N = 1 << 10; + + static int M = 1 << 5; + + public static void main(String[] args) { + System.out.println("boolean 1D"); + Zrun1D(); + System.out.println("byte 1D"); + Brun1D(); + System.out.println("short 1D"); + Srun1D(); + System.out.println("int 1D"); + Irun1D(); + System.out.println("long 1D"); + Lrun1D(); + System.out.println("float 1D"); + Frun1D(); + System.out.println("double 1D"); + Drun1D(); + System.out.println("boolean 2D"); + Zrun2D(); + System.out.println("byte 2D"); + Brun2D(); + System.out.println("short 2D"); + Srun2D(); + System.out.println("int 2D"); + Irun2D(); + System.out.println("long 2D"); + Lrun2D(); + System.out.println("float 2D"); + Frun2D(); + System.out.println("double 2D"); + Drun2D(); + System.out.println("boolean 3D"); + Zrun3D(); + System.out.println("byte 3D"); + Brun3D(); + System.out.println("short 3D"); + Srun3D(); + System.out.println("int 3D"); + Irun3D(); + System.out.println("long 3D"); + Lrun3D(); + System.out.println("float 3D"); + Frun3D(); + System.out.println("double 3D"); + Drun3D(); + } + + private static boolean[] matMull(boolean[] A, boolean[] B, int N) { + final boolean[] C = new boolean[N * N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[(i * N) + j] ^= A[(i * N) + k] & B[(k * N) + j]; + } + } + } + return C; + } + + private static byte[] matMull(byte[] A, byte[] B, int N) { + final byte[] C = new byte[N * N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[(i * N) + j] += (byte) (A[(i * N) + k] * B[(k * N) + j]); + } + } + } + return C; + } + + private static short[] matMull(short[] A, short[] B, int N) { + final short[] C = new short[N * N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[(i * N) + j] += (short) (A[(i * N) + k] * B[(k * N) + j]); + } + } + } + return C; + } + + private static int[] matMull(int[] A, int[] B, int N) { + final int[] C = new int[N * N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[(i * N) + j] += A[(i * N) + k] * B[(k * N) + j]; + } + } + } + return C; + } + + private static long[] matMull(long[] A, long[] B, int N) { + final long[] C = new long[N * N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[(i * N) + j] += A[(i * N) + k] * B[(k * N) + j]; + } + } + } + return C; + } + + private static float[] matMull(float[] A, float[] B, int N) { + final float[] C = new float[N * N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[(i * N) + j] += A[(i * N) + k] * B[(k * N) + j]; + } + } + } + return C; + } + + private static double[] matMull(double[] A, double[] B, int N) { + final double[] C = new double[N * N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[(i * N) + j] += A[(i * N) + k] * B[(k * N) + j]; + } + } + } + return C; + } + + private static boolean[][] matMull(boolean[][] A, boolean[][] B, int N) { + final boolean[][] C = new boolean[N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[i][j] ^= A[i][k] & B[k][j]; + } + } + } + return C; + } + + private static byte[][] matMull(byte[][] A, byte[][] B, int N) { + final byte[][] C = new byte[N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[i][j] += (byte) (A[i][k] * B[k][j]); + } + } + } + return C; + } + + private static short[][] matMull(short[][] A, short[][] B, int N) { + final short[][] C = new short[N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[i][j] += (short) (A[i][k] * B[k][j]); + } + } + } + return C; + } + + private static int[][] matMull(int[][] A, int[][] B, int N) { + final int[][] C = new int[N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[i][j] += A[i][k] * B[k][j]; + } + } + } + return C; + } + + private static long[][] matMull(long[][] A, long[][] B, int N) { + final long[][] C = new long[N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[i][j] += A[i][k] * B[k][j]; + } + } + } + return C; + } + + private static float[][] matMull(float[][] A, float[][] B, int N) { + final float[][] C = new float[N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[i][j] += A[i][k] * B[k][j]; + } + } + } + return C; + } + + private static double[][] matMull(double[][] A, double[][] B, int N) { + final double[][] C = new double[N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + C[i][j] += A[i][k] * B[k][j]; + } + } + } + return C; + } + + private static boolean[][][] matMull(boolean[][][] A, boolean[][][] B, int N) { + final boolean[][][] C = new boolean[N][N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + for (int l = 0; l < N; l++) { + C[i][j][k] ^= A[i][j][l] & B[l][j][k]; + } + } + } + } + return C; + } + + private static byte[][][] matMull(byte[][][] A, byte[][][] B, int N) { + final byte[][][] C = new byte[N][N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + for (int l = 0; l < N; l++) { + C[i][j][k] += (byte) (A[i][j][l] * B[l][j][k]); + } + } + } + } + return C; + } + + private static short[][][] matMull(short[][][] A, short[][][] B, int N) { + final short[][][] C = new short[N][N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + for (int l = 0; l < N; l++) { + C[i][j][k] += (short) (A[i][j][l] * B[l][j][k]); + } + } + } + } + return C; + } + + private static int[][][] matMull(int[][][] A, int[][][] B, int N) { + final int[][][] C = new int[N][N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + for (int l = 0; l < N; l++) { + C[i][j][k] += A[i][j][l] * B[l][j][k]; + } + } + } + } + return C; + } + + private static long[][][] matMull(long[][][] A, long[][][] B, int N) { + final long[][][] C = new long[N][N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + for (int l = 0; l < N; l++) { + C[i][j][k] += A[i][j][l] * B[l][j][k]; + } + } + } + } + return C; + } + + private static float[][][] matMull(float[][][] A, float[][][] B, int N) { + final float[][][] C = new float[N][N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + for (int l = 0; l < N; l++) { + C[i][j][k] += A[i][j][l] * B[l][j][k]; + } + } + } + } + return C; + } + + private static double[][][] matMull(double[][][] A, double[][][] B, int N) { + final double[][][] C = new double[N][N][N]; + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + for (int k = 0; k < N; k++) { + for (int l = 0; l < N; l++) { + C[i][j][k] += A[i][j][l] * B[l][j][k]; + } + } + } + } + return C; + } + + private static boolean checkResults(boolean[] cpu, boolean[] gpu) { + for (int i = 0; i < cpu.length; i++) { + if (cpu[i] != gpu[i]) { + return false; + } + } + return true; + } + + private static boolean checkResults(byte[] cpu, byte[] gpu) { + for (int i = 0; i < cpu.length; i++) { + if (cpu[i] != gpu[i]) { + return false; + } + } + return true; + } + + private static boolean checkResults(short[] cpu, short[] gpu) { + for (int i = 0; i < cpu.length; i++) { + if (cpu[i] != gpu[i]) { + return false; + } + } + return true; + } + + private static boolean checkResults(int[] cpu, int[] gpu) { + for (int i = 0; i < cpu.length; i++) { + if (cpu[i] != gpu[i]) { + return false; + } + } + return true; + } + + private static boolean checkResults(long[] cpu, long[] gpu) { + for (int i = 0; i < cpu.length; i++) { + if (cpu[i] != gpu[i]) { + return false; + } + } + return true; + } + + private static boolean checkResults(float[] cpu, float[] gpu) { + for (int i = 0; i < cpu.length; i++) { + if (cpu[i] != gpu[i]) { + return false; + } + } + return true; + } + + private static boolean checkResults(double[] cpu, double[] gpu) { + for (int i = 0; i < cpu.length; i++) { + if (cpu[i] != gpu[i]) { + return false; + } + } + return true; + } + + private static boolean checkResults(boolean[][] cpu, boolean[][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + if (cpu[i][j] != gpu[i][j]) { + return false; + } + } + } + return true; + } + + private static boolean checkResults(byte[][] cpu, byte[][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + if (cpu[i][j] != gpu[i][j]) { + return false; + } + } + } + return true; + } + + private static boolean checkResults(short[][] cpu, short[][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + if (cpu[i][j] != gpu[i][j]) { + return false; + } + } + } + return true; + } + + private static boolean checkResults(int[][] cpu, int[][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + if (cpu[i][j] != gpu[i][j]) { + return false; + } + } + } + return true; + } + + private static boolean checkResults(long[][] cpu, long[][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + if (cpu[i][j] != gpu[i][j]) { + return false; + } + } + } + return true; + } + + private static boolean checkResults(float[][] cpu, float[][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + if (cpu[i][j] != gpu[i][j]) { + return false; + } + } + } + return true; + } + + private static boolean checkResults(double[][] cpu, double[][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + if (cpu[i][j] != gpu[i][j]) { + return false; + } + } + } + return true; + } + + private static boolean checkResults(boolean[][][] cpu, boolean[][][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + for (int k = 0; k < cpu[i][j].length; k++) { + if (cpu[i][j][k] != gpu[i][j][k]) { + return false; + } + } + } + } + return true; + } + + private static boolean checkResults(byte[][][] cpu, byte[][][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + for (int k = 0; k < cpu[i][j].length; k++) { + if (cpu[i][j][k] != gpu[i][j][k]) { + return false; + } + } + } + } + return true; + } + + private static boolean checkResults(short[][][] cpu, short[][][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + for (int k = 0; k < cpu[i][j].length; k++) { + if (cpu[i][j][k] != gpu[i][j][k]) { + return false; + } + } + } + } + return true; + } + + private static boolean checkResults(int[][][] cpu, int[][][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + for (int k = 0; k < cpu[i][j].length; k++) { + if (cpu[i][j][k] != gpu[i][j][k]) { + return false; + } + } + } + } + return true; + } + + private static boolean checkResults(long[][][] cpu, long[][][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + for (int k = 0; k < cpu[i][j].length; k++) { + if (cpu[i][j][k] != gpu[i][j][k]) { + return false; + } + } + } + } + return true; + } + + private static boolean checkResults(float[][][] cpu, float[][][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + for (int k = 0; k < cpu[i][j].length; k++) { + if (cpu[i][j][k] != gpu[i][j][k]) { + return false; + } + } + } + } + return true; + } + + private static boolean checkResults(double[][][] cpu, double[][][] gpu) { + for (int i = 0; i < cpu.length; i++) { + for (int j = 0; j < cpu[i].length; j++) { + for (int k = 0; k < cpu[i][j].length; k++) { + if (cpu[i][j][k] != gpu[i][j][k]) { + return false; + } + } + } + } + return true; + } + + public static void Zrun1D() { + final boolean[] A = new boolean[N * N]; + final boolean[] B = new boolean[N * N]; + final boolean[] gpu = new boolean[N * N]; + boolean[] cpu = new boolean[N * N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[(i * N) + j] = ((i % 2) == 0) ^ ((j % 2) == 0); + B[(i * N) + j] = ((i % 2) == 0) & ((j % 2) == 0); + cpu[(i * N) + j] = false; + gpu[(i * N) + j] = false; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new ZMatMul1D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Brun1D() { + final byte[] A = new byte[N * N]; + final byte[] B = new byte[N * N]; + final byte[] gpu = new byte[N * N]; + byte[] cpu = new byte[N * N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[(i * N) + j] = (byte) (i + j); + B[(i * N) + j] = (byte) (i - j); + cpu[(i * N) + j] = (byte) 0; + gpu[(i * N) + j] = (byte) 0; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new BMatMul1D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Srun1D() { + final short[] A = new short[N * N]; + final short[] B = new short[N * N]; + final short[] gpu = new short[N * N]; + short[] cpu = new short[N * N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[(i * N) + j] = (short) (i + j); + B[(i * N) + j] = (short) (i - j); + cpu[(i * N) + j] = (short) 0; + gpu[(i * N) + j] = (short) 0; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new SMatMul1D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Irun1D() { + final int[] A = new int[N * N]; + final int[] B = new int[N * N]; + final int[] gpu = new int[N * N]; + int[] cpu = new int[N * N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[(i * N) + j] = i + j; + B[(i * N) + j] = i - j; + cpu[(i * N) + j] = 0; + gpu[(i * N) + j] = 0; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new IMatMul1D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Lrun1D() { + final long[] A = new long[N * N]; + final long[] B = new long[N * N]; + final long[] gpu = new long[N * N]; + long[] cpu = new long[N * N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[(i * N) + j] = i + j; + B[(i * N) + j] = i - j; + cpu[(i * N) + j] = 0l; + gpu[(i * N) + j] = 0l; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new LMatMul1D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Frun1D() { + final float[] A = new float[N * N]; + final float[] B = new float[N * N]; + final float[] gpu = new float[N * N]; + float[] cpu = new float[N * N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[(i * N) + j] = i + j; + B[(i * N) + j] = i - j; + cpu[(i * N) + j] = 0.0f; + gpu[(i * N) + j] = 0.0f; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new FMatMul1D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Drun1D() { + final double[] A = new double[N * N]; + final double[] B = new double[N * N]; + final double[] gpu = new double[N * N]; + double[] cpu = new double[N * N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[(i * N) + j] = i + j; + B[(i * N) + j] = i - j; + cpu[(i * N) + j] = 0.0; + gpu[(i * N) + j] = 0.0; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new DMatMul1D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Zrun2D() { + final boolean[][] A = new boolean[N][N]; + final boolean[][] B = new boolean[N][N]; + final boolean[][] gpu = new boolean[N][N]; + boolean[][] cpu = new boolean[N][N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[i][j] = ((i % 2) == 0) ^ ((j % 2) == 0); + B[i][j] = ((i % 2) == 0) & ((j % 2) == 0); + cpu[i][j] = false; + gpu[i][j] = false; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new ZMatMul2D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Brun2D() { + final byte[][] A = new byte[N][N]; + final byte[][] B = new byte[N][N]; + final byte[][] gpu = new byte[N][N]; + byte[][] cpu = new byte[N][N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[i][j] = (byte) (i + j); + B[i][j] = (byte) (i - j); + cpu[i][j] = (byte) 0; + gpu[i][j] = (byte) 0; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new BMatMul2D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Srun2D() { + final short[][] A = new short[N][N]; + final short[][] B = new short[N][N]; + final short[][] gpu = new short[N][N]; + short[][] cpu = new short[N][N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[i][j] = (short) (i + j); + B[i][j] = (short) (i - j); + cpu[i][j] = (short) 0; + gpu[i][j] = (short) 0; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new SMatMul2D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Irun2D() { + final int[][] A = new int[N][N]; + final int[][] B = new int[N][N]; + final int[][] gpu = new int[N][N]; + int[][] cpu = new int[N][N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[i][j] = i + j; + B[i][j] = i - j; + cpu[i][j] = 0; + gpu[i][j] = 0; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new IMatMul2D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Lrun2D() { + final long[][] A = new long[N][N]; + final long[][] B = new long[N][N]; + final long[][] gpu = new long[N][N]; + long[][] cpu = new long[N][N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[i][j] = i + j; + B[i][j] = i - j; + cpu[i][j] = 0l; + gpu[i][j] = 0l; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new LMatMul2D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Frun2D() { + final float[][] A = new float[N][N]; + final float[][] B = new float[N][N]; + final float[][] gpu = new float[N][N]; + float[][] cpu = new float[N][N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[i][j] = i + j; + B[i][j] = i - j; + cpu[i][j] = 0.0f; + gpu[i][j] = 0.0f; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new FMatMul2D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Drun2D() { + final double[][] A = new double[N][N]; + final double[][] B = new double[N][N]; + final double[][] gpu = new double[N][N]; + double[][] cpu = new double[N][N]; + + for (int i = 0; i < N; i++) { + for (int j = 0; j < N; j++) { + A[i][j] = i + j; + B[i][j] = i - j; + cpu[i][j] = 0.0; + gpu[i][j] = 0.0; + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new DMatMul2D(A, B, gpu, N); + kernel.execute(N * N); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, N); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Zrun3D() { + final boolean[][][] A = new boolean[M][M][M]; + final boolean[][][] B = new boolean[M][M][M]; + final boolean[][][] gpu = new boolean[M][M][M]; + boolean[][][] cpu = new boolean[M][M][M]; + + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + for (int k = 0; k < M; k++) { + A[i][j][k] = ((i % 2) == 0) ^ (((j % 2) == 0) & ((k % 2) == 0)); + B[i][j][k] = (((i % 2) == 0) & ((j % 2) == 0)) ^ ((k % 2) == 0); + ; + cpu[i][j][k] = false; + gpu[i][j][k] = false; + } + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new ZMatMul3D(A, B, gpu, M); + kernel.execute(M * M * M); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, M); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Brun3D() { + final byte[][][] A = new byte[M][M][M]; + final byte[][][] B = new byte[M][M][M]; + final byte[][][] gpu = new byte[M][M][M]; + byte[][][] cpu = new byte[M][M][M]; + + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + for (int k = 0; k < M; k++) { + A[i][j][k] = (byte) (i + j + k); + B[i][j][k] = (byte) ((i - j) + k); + cpu[i][j][k] = (byte) 0; + gpu[i][j][k] = (byte) 0; + } + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new BMatMul3D(A, B, gpu, M); + kernel.execute(M * M * M); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, M); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Srun3D() { + final short[][][] A = new short[M][M][M]; + final short[][][] B = new short[M][M][M]; + final short[][][] gpu = new short[M][M][M]; + short[][][] cpu = new short[M][M][M]; + + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + for (int k = 0; k < M; k++) { + A[i][j][k] = (short) (i + j + k); + B[i][j][k] = (short) ((i - j) + k); + cpu[i][j][k] = (short) 0; + gpu[i][j][k] = (short) 0; + } + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new SMatMul3D(A, B, gpu, M); + kernel.execute(M * M * M); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, M); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Irun3D() { + final int[][][] A = new int[M][M][M]; + final int[][][] B = new int[M][M][M]; + final int[][][] gpu = new int[M][M][M]; + int[][][] cpu = new int[M][M][M]; + + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + for (int k = 0; k < M; k++) { + A[i][j][k] = i + j + k; + B[i][j][k] = (i - j) + k; + cpu[i][j][k] = 0; + gpu[i][j][k] = 0; + } + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new IMatMul3D(A, B, gpu, M); + kernel.execute(M * M * M); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, M); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Lrun3D() { + final long[][][] A = new long[M][M][M]; + final long[][][] B = new long[M][M][M]; + final long[][][] gpu = new long[M][M][M]; + long[][][] cpu = new long[M][M][M]; + + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + for (int k = 0; k < M; k++) { + A[i][j][k] = i + j + k; + B[i][j][k] = (i - j) + k; + cpu[i][j][k] = 0l; + gpu[i][j][k] = 0l; + } + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new LMatMul3D(A, B, gpu, M); + kernel.execute(M * M * M); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, M); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Frun3D() { + final float[][][] A = new float[M][M][M]; + final float[][][] B = new float[M][M][M]; + final float[][][] gpu = new float[M][M][M]; + float[][][] cpu = new float[M][M][M]; + + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + for (int k = 0; k < M; k++) { + A[i][j][k] = i + j + k; + B[i][j][k] = (i - j) + k; + cpu[i][j][k] = 0.0f; + gpu[i][j][k] = 0.0f; + } + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new FMatMul3D(A, B, gpu, M); + kernel.execute(M * M * M); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, M); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } + + public static void Drun3D() { + final double[][][] A = new double[M][M][M]; + final double[][][] B = new double[M][M][M]; + final double[][][] gpu = new double[M][M][M]; + double[][][] cpu = new double[M][M][M]; + + for (int i = 0; i < M; i++) { + for (int j = 0; j < M; j++) { + for (int k = 0; k < M; k++) { + A[i][j][k] = i + j + k; + B[i][j][k] = (i - j) + k; + cpu[i][j][k] = 0.0; + gpu[i][j][k] = 0.0; + } + } + } + + long gs = System.currentTimeMillis(); + final Kernel kernel = new DMatMul3D(A, B, gpu, M); + kernel.execute(M * M * M); + gs = System.currentTimeMillis() - gs; + + long cs = System.currentTimeMillis(); + cpu = matMull(A, B, M); + cs = System.currentTimeMillis() - cs; + + System.out.println("gpu time: " + gs + "\ncpu time: " + cs); + System.out.print("valid? "); + + if (checkResults(cpu, gpu)) { + System.out.println("yes"); + } else { + System.out.println("no"); + } + } +}