CS255
Chris Pollett
Feb 21, 2022
P-SQUARE-MATRIX-MULTIPLY(A, B) 1 n = A.rows 2 let C be a new n x n matrix 3 parallel for i = 1 to n 4 parallel for j = 1 to n 5 c[i][j] = 0 6 for k = 1 to n 7 c[i][j] = c[i][j] + a[i][k] * b[k][j] 8 return C
P-MATRIX-MULTIPLY-RECURSIVE(C, A, B) 1 n = A.rows 2 if n == 1 3 c[1][1] = a[1][1] * b[1][1] 4 else let T be a new n x n matrix 5 partition A, B , C , and T into n/2 x n/2 submatrices A_(11), A_(12), A_(21), A_(22); B_(11), B_(12), B_(21), B_(22); C_(11), C_(12), C_(21), C_(22); and T_(11), T_(12), T_(21), T_(22) respectively 6 spawn P-MATRIX-MULTIPLY-RECURSIVE(C_(11), A_(11), B_(11)) 7 spawn P-MATRIX-MULTIPLY-RECURSIVE(C_(12), A_(11), B_(12)) 8 spawn P-MATRIX-MULTIPLY-RECURSIVE(C_(21), A_(21), B_(11)) 9 spawn P-MATRIX-MULTIPLY-RECURSIVE(C_(22), A_(21), B_(12)) 10 spawn P-MATRIX-MULTIPLY-RECURSIVE(T_(11), A_(12), B_(21)) 11 spawn P-MATRIX-MULTIPLY-RECURSIVE(T_(12), A_(12), B_(22)) 12 spawn P-MATRIX-MULTIPLY-RECURSIVE(T_(21), A_(22), B_(21)) 13 P-MATRIX-MULTIPLY-RECURSIVE(T_(22), A_(22), B_(22)) 14 sync 15 parallel for i = 1 to n 16 parallel for j = 1 to n 17 c[i][j] = c[i][j] + t[i][j]
Let A =[0, 0] be a global array of length 2
A = [0,0] SpawnSync(true, 0) SpawnSync(parent, location): 1 if(parent): spawn SpawnSync(false, location + 1) 2 A[location] = location + 1 3 sync 4 if(parent): for i =0 to A.length - 1: print A[i]
The above code should output:
1 2
import java.lang.Thread; public class SpawnSyncDemo extends Thread { public SpawnSyncDemo(SpawnSyncDemo spawner, int location) { this.location = location; this.spawner = spawner; done = false; } public void run() { SpawnSyncDemo child = null; done = false; if(spawner == null) { child = new SpawnSyncDemo(this, location + 1); child.start(); } a[location] = location + 1; done = true; if(child != null) { child.sync(); } if(spawner == null) { for(int i = 0; i < a.length; i++) { System.out.println(a[i]); } } } public synchronized void sync() { if(!done) { try { wait(); /*parent executes this code and waits until child thread completes */ } catch(InterruptedException ie) { ie.printStackTrace(); } } } public static void main(String args[]) { SpawnSyncDemo parent = new SpawnSyncDemo(null, 0); parent.start(); } int location; SpawnSyncDemo spawner; boolean done; static int a[] = {0, 0}; }
import java.lang.Thread; public class SumDemo extends Thread { public SumDemo(SumDemo spawner, int low, int high) { this.high = high; this.low = low; this.spawner = spawner; done = false; c = 0; } public void run() { SumDemo firstHalf = null; SumDemo secondHalf = null; done = false; if(low == high) { c = a[low]; } else { int mid = (low + high) / 2; firstHalf = new SumDemo(this, low, mid); firstHalf.start(); secondHalf = new SumDemo(this, mid + 1, high); secondHalf.start(); } if(low != high ){ firstHalf.sync(); secondHalf.sync(); c = firstHalf.c + secondHalf.c; } done = true; if(spawner == null) { System.out.println(c); } } public synchronized void sync() { if(!done) { try { wait(); } catch(InterruptedException ie) { ie.printStackTrace(); } } } public static void main(String args[]) { SumDemo parent = new SumDemo(null, 0, a.length - 1); parent.start(); } int low; int high; int c = 0; SumDemo spawner; boolean done; static int a[] = {4, 3, 9, -1, 1}; }
Which of the following statements is true?
javac -classpath "./jocl-2.0.4.jar" MyJOCL.javato compile such a program, and use:
java -classpath ".:./jocl-2.0.4.jar" MyJOCLto run it.
/* * JOCL - Java bindings for OpenCL * * Copyright 2009 Marco Hutter - http://www.jocl.org/ */ import static org.jocl.CL.*; import org.jocl.*; /** * A small JOCL sample. */ public class JOCLSample { /** * The source code of the OpenCL program to execute */ private static String programSource = "__kernel void "+ "sampleKernel(__global const float *a,"+ " __global const float *b,"+ " __global float *c)"+ "{"+ " int gid = get_global_id(0);"+ " c[gid] = a[gid] * b[gid];"+ "}"; /** * The entry point of this sample * * @param args Not used */ public static void main(String args[]) { // Create input- and output data int n = 10; float srcArrayA[] = new float[n]; float srcArrayB[] = new float[n]; float dstArray[] = new float[n]; for (int i=0; i<n; i++) { srcArrayA[i] = i; srcArrayB[i] = i; } Pointer srcA = Pointer.to(srcArrayA); Pointer srcB = Pointer.to(srcArrayB); Pointer dst = Pointer.to(dstArray); // The platform, device type and device number // that will be used final int platformIndex = 0; final long deviceType = CL_DEVICE_TYPE_ALL; final int deviceIndex = 0; // Enable exceptions and subsequently omit error checks in this sample CL.setExceptionsEnabled(true); // Obtain the number of platforms int numPlatformsArray[] = new int[1]; clGetPlatformIDs(0, null, numPlatformsArray); int numPlatforms = numPlatformsArray[0]; // Obtain a platform ID cl_platform_id platforms[] = new cl_platform_id[numPlatforms]; clGetPlatformIDs(platforms.length, platforms, null); cl_platform_id platform = platforms[platformIndex]; // Initialize the context properties cl_context_properties contextProperties = new cl_context_properties(); contextProperties.addProperty(CL_CONTEXT_PLATFORM, platform); // Obtain the number of devices for the platform int numDevicesArray[] = new int[1]; clGetDeviceIDs(platform, deviceType, 0, null, numDevicesArray); int numDevices = numDevicesArray[0]; // Obtain a device ID cl_device_id devices[] = new cl_device_id[numDevices]; clGetDeviceIDs(platform, deviceType, numDevices, devices, null); cl_device_id device = devices[deviceIndex]; // Create a context for the selected device cl_context context = clCreateContext( contextProperties, 1, new cl_device_id[]{device}, null, null, null); /* Create a command-queue for the selected device This syntax is not deprecated in OpenCL 2 in favor of cl_queue_properties cmd_props = new cl_queue_properties(); int[] errors = new int[1]; cl_command_queue commandQueue = clCreateCommandQueueWithProperties(context, device, cmd_props, errors); However, the new syntax requires native compilation I have not done */ cl_command_queue commandQueue = clCreateCommandQueue(context, device, 0, null); // Allocate the memory objects for the input- and output data cl_mem memObjects[] = new cl_mem[3]; memObjects[0] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, srcA, null); memObjects[1] = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, Sizeof.cl_float * n, srcB, null); memObjects[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, Sizeof.cl_float * n, null, null); // Create the program from the source code cl_program program = clCreateProgramWithSource(context, 1, new String[]{ programSource }, null, null); // Build the program clBuildProgram(program, 0, null, null, null, null); // Create the kernel cl_kernel kernel = clCreateKernel(program, "sampleKernel", null); // Set the arguments for the kernel clSetKernelArg(kernel, 0, Sizeof.cl_mem, Pointer.to(memObjects[0])); clSetKernelArg(kernel, 1, Sizeof.cl_mem, Pointer.to(memObjects[1])); clSetKernelArg(kernel, 2, Sizeof.cl_mem, Pointer.to(memObjects[2])); // Set the work-item dimensions long global_work_size[] = new long[]{n}; long local_work_size[] = new long[]{1}; // Execute the kernel clEnqueueNDRangeKernel(commandQueue, kernel, 1, null, global_work_size, local_work_size, 0, null, null); // Read the output data clEnqueueReadBuffer(commandQueue, memObjects[2], CL_TRUE, 0, n * Sizeof.cl_float, dst, 0, null, null); // Release kernel, program, and memory objects clReleaseMemObject(memObjects[0]); clReleaseMemObject(memObjects[1]); clReleaseMemObject(memObjects[2]); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(commandQueue); clReleaseContext(context); // Verify the result boolean passed = true; final float epsilon = 1e-7f; for (int i=0; i<n; i++) { float x = dstArray[i]; float y = srcArrayA[i] * srcArrayB[i]; boolean epsilonEqual = Math.abs(x - y) <= epsilon * Math.abs(x); if (!epsilonEqual) { passed = false; break; } } System.out.println("Test "+(passed?"PASSED":"FAILED")); if (n <= 10) { System.out.println("Result: "+java.util.Arrays.toString(dstArray)); } } }