CS255
Chris Pollett
Feb 20, 2019
Let A =[0, 0] be a global array of length 2
A = [0,0] SpawnSync(true, 0) SpawnSync(parent, location): 1 if(parent): spawn SpawnSync(false, location + 1) 2 A[location] = location + 1 3 sync 4 if(parent): for i =0 to A.length - 1: print A[i]
The above code should output:
1 2
import java.lang.Thread; public class SpawnSyncDemo extends Thread { public SpawnSyncDemo(SpawnSyncDemo spawner, int location) { this.location = location; this.spawner = spawner; done = false; } public void run() { SpawnSyncDemo child = null; done = false; if(spawner == null) { child = new SpawnSyncDemo(this, location + 1); child.start(); } a[location] = location + 1; done = true; if(child != null) { child.sync(); } if(spawner == null) { for(int i = 0; i < a.length; i++) { System.out.println(a[i]); } } } public synchronized void sync() { if(!done) { try { wait(); /*parent executes this code and waits until child thread completes */ } catch(InterruptedException ie) { ie.printStackTrace(); } } } public static void main(String args[]) { SpawnSyncDemo parent = new SpawnSyncDemo(null, 0); parent.start(); } int location; SpawnSyncDemo spawner; boolean done; static int a[] = {0, 0}; }
import java.lang.Thread; public class SumDemo extends Thread { public SumDemo(SumDemo spawner, int low, int high) { this.high = high; this.low = low; this.spawner = spawner; done = false; c = 0; } public void run() { SumDemo firstHalf = null; SumDemo secondHalf = null; done = false; if(low == high) { c = a[low]; } else { int mid = (low + high) / 2; firstHalf = new SumDemo(this, low, mid); firstHalf.start(); secondHalf = new SumDemo(this, mid + 1, high); secondHalf.start(); } if(low != high ){ firstHalf.sync(); secondHalf.sync(); c = firstHalf.c + secondHalf.c; } done = true; if(spawner == null) { System.out.println(c); } } public synchronized void sync() { if(!done) { try { wait(); } catch(InterruptedException ie) { ie.printStackTrace(); } } } public static void main(String args[]) { SumDemo parent = new SumDemo(null, 0, a.length - 1); parent.start(); } int low; int high; int c = 0; SumDemo spawner; boolean done; static int a[] = {4, 3, 9, -1, 1}; }
javac -classpath "./jocl-2.0.1.jar" MyJOCL.javato compile such a program, and use:
java -classpath ".:./jocl-2.0.1.jar" MyJOCLto run it.
/* * JOCL - Java bindings for OpenCL * * Copyright 2009 Marco Hutter - http://www.jocl.org/ */ import static org.jocl.CL.*; import org.jocl.*; /** * A small JOCL sample. */ public class JOCLSample { /** * The source code of the OpenCL program to execute */ private static String programSource = "__kernel void "+ "sampleKernel(__global const float *a,"+ " __global const float *b,"+ " __global float *c)"+ "{"+ " int gid = get_global_id(0);"+ " c[gid] = a[gid] * b[gid];"+ "}"; /** * The entry point of this sample * * @param args Not used */ public static void main(String args[]) { // Create input- and output data int n = 10; float srcArrayA[] = new float[n]; float srcArrayB[] = new float[n]; float dstArray[] = new float[n]; for (int i=0; i<n; i++) { srcArrayA[i] = i; srcArrayB[i] = i; } Pointer srcA = Pointer.to(srcArrayA); Pointer srcB = Pointer.to(srcArrayB); Pointer dst = Pointer.to(dstArray); // The platform, device type and device number // that will be used final int platformIndex = 0; final long deviceType = CL_DEVICE_TYPE_ALL; final int deviceIndex = 0; // Enable exceptions and subsequently omit error checks in this sample CL.setExceptionsEnabled(true); // Obtain the number of platforms int numPlatformsArray[] = new int[1]; clGetPlatformIDs(0, null, numPlatformsArray); int numPlatforms = numPlatformsArray[0]; // Obtain a platform ID cl_platform_id platforms[] = new cl_platform_id[numPlatforms]; clGetPlatformIDs(platforms.length, platforms, null); cl_platform_id platform = platforms[platformIndex]; // Initialize the context properties cl_context_properties contextProperties = new cl_context_properties(); contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform); // Obtain the number of devices for the platform int numDevicesArray[] = new int[1]; clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray); int numDevices = numDevicesArray[0]; // Obtain a device ID cl_device_id devices[] = new cl_device_id[numDevices]; clGetDeviceIDs(platform, deviceType, numDevices, devices, null); cl_device_id device = devices[deviceIndex]; // Create a context for the selected device cl_context context = clCreateContext( contextProperties, 1, new cl_device_id[]{device}, null, null, null); /* Create a command-queue for the selected device This syntax is not deprecated in OpenCL 2 in favor of cl_queue_properties cmd_props = new cl_queue_properties(); int[] errors = new int[1]; cl_command_queue commandQueue = clCreateCommandQueueWithProperties(context, device, cmd_props, errors); However, the new syntax requires native compilation I have not done */ cl_command_queue commandQueue = clCreateCommandQueue(context, device, 0, null); // Allocate the memory objects for the input- and output data cl_mem memObjects[] = new cl_mem[3]; memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, srcA, null); memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, srcB, null); memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_float * n, null, null); // Create the program from the source code cl_program program = clCreateProgramWithSource(context, 1, new String[]{ programSource }, null, null); // Build the program clBuildProgram(program, 0, null, null, null, null); // Create the kernel cl_kernel kernel = clCreateKernel(program, "sampleKernel", null); // Set the arguments for the kernel clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0])); clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1])); clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[2])); // Set the work-item dimensions long global_work_size[] = new long[]{n}; long local_work_size[] = new long[]{1}; // Execute the kernel clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, global_work_size, local_work_size, 0, null, null); // Read the output data clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, n * Sizeof.cl_float, dst, 0, null, null); // Release kernel, program, and memory objects clReleaseMemObject(memObjects[0]); clReleaseMemObject(memObjects[1]); clReleaseMemObject(memObjects[2]); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(commandQueue); clReleaseContext(context); // Verify the result boolean passed = true; final float epsilon = 1e-7f; for (int i=0; i<n; i++) { float x = dstArray[i]; float y = srcArrayA[i] * srcArrayB[i]; boolean epsilonEqual = Math.abs(x - y) <= epsilon * Math.abs(x); if (!epsilonEqual) { passed = false; break; } } System.out.println("Test "+(passed?"PASSED":"FAILED")); if (n <= 10) { System.out.println("Result: "+java.util.Arrays.toString(dstArray)); } } }