ดำเนินการเคอร์เนลเดียวกันหลายครั้งใน OpenCL

ฉันต้องการเรียกใช้เคอร์เนล (เดียวกัน) หลายครั้ง (สมมติว่า 3 ครั้ง) ขนานกับ OpenCL ฉันอ่านหัวข้อที่คล้ายกัน แต่ฉันยังคงสับสน ฉันได้เขียนโปรแกรมที่รันมันครั้งเดียวแล้ว ฉันรู้ว่าฉันต้องทำการเปลี่ยนแปลงกับ clEnqueueNDKernelRangeKernel( ) และฉันพยายามแล้วแต่ล้มเหลว ใครก็ได้กรุณาบอกฉันว่าฉันจะทำมันหลายครั้งได้อย่างไร ขอบคุณที่ช่วยเหลือ.

//Includes
#include <stdio.h>
#include <stdlib.h>
#include <iostream>

#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif

#define DATA_SIZE 10

using namespace std;

const char *ProgramSource =
"__kernel void add(__global float *inputA, __global float *inputB, __global 
float *inputC, __global float *output)\n"\
"{\n"\
"  size_t id = get_global_id(0);\n"\
"float f;\n"\
"float y1 = 0.0f;\n"\
"y1 = inputA[id] + inputB[id] + inputC[id];\n"\
"  output[id] = y1;\n"\
"}\n";

int main(void)
{
cl_context context;
cl_context_properties properties[3];
cl_kernel kernel;
cl_command_queue command_queue;
cl_program program;
cl_int err;
cl_uint num_of_platforms = 0;
cl_platform_id platform_id;
cl_device_id device_id;
cl_uint num_of_devices = 0;
cl_mem inputA, inputB, inputC, output;

size_t global;

float inputDataA[DATA_SIZE] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
float inputDataB[DATA_SIZE] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
float inputDataC[DATA_SIZE] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };    
float y1[DATA_SIZE] = { 0 };
int i;

// retreive a list of platforms avaible
if (clGetPlatformIDs(1, &platform_id, &num_of_platforms) != CL_SUCCESS)
{
    printf("Unable to get platform_id\n");
    return 1;
}

// try to get a supported GPU device
if (clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 1, &device_id, 
&num_of_devices) != CL_SUCCESS)
{
    printf("Unable to get device_id\n");
    return 1;
}

// context properties list - must be terminated with 0
properties[0] = CL_CONTEXT_PLATFORM;
properties[1] = (cl_context_properties)platform_id;
properties[2] = 0;

// create a context with the GPU device
context = clCreateContext(properties, 1, &device_id, NULL, NULL, &err);

// create command queue using the context and device
command_queue = clCreateCommandQueue(context, device_id, 0, &err);

// create a program from the kernel source code
program = clCreateProgramWithSource(context, 1, (const char 
**)&ProgramSource, NULL, &err);

// compile the program
if (clBuildProgram(program, 0, NULL, NULL, NULL, NULL) != CL_SUCCESS)
{
    printf("Error building program\n");
    return 1;
}

// specify which kernel from the program to execute
kernel = clCreateKernel(program, "add", &err);

// create buffers for the input and ouput
inputA = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL);
inputB = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL);
inputC = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL);
output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * DATA_SIZE, NULL, NULL);

// load data into the input buffer
clEnqueueWriteBuffer(command_queue, inputA, CL_TRUE, 0, sizeof(float) * DATA_SIZE, inputDataA, 0, NULL, NULL);
clEnqueueWriteBuffer(command_queue, inputB, CL_TRUE, 0, sizeof(float) * DATA_SIZE, inputDataB, 0, NULL, NULL);
clEnqueueWriteBuffer(command_queue, inputC, CL_TRUE, 0, sizeof(float) * DATA_SIZE, inputDataC, 0, NULL, NULL);
// set the argument list for the kernel command
clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputA);
clSetKernelArg(kernel, 1, sizeof(cl_mem), &inputB);
clSetKernelArg(kernel, 2, sizeof(cl_mem), &inputC);
clSetKernelArg(kernel, 3, sizeof(cl_mem), &output);

global = DATA_SIZE;

clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
clFinish(command_queue);

// copy the results from out of the output buffer
clEnqueueReadBuffer(command_queue, output, CL_TRUE, 0, sizeof(float) *DATA_SIZE, y1, 0, NULL, NULL);

// print the results
printf("y1: ");

for (i = 0; i<DATA_SIZE; i++)
{
    printf("%f\n ", y1[i]);
}

// cleanup - release OpenCL resources
clReleaseMemObject(inputA);
clReleaseMemObject(inputB);
clReleaseMemObject(inputC);
clReleaseMemObject(output);
clReleaseProgram(program);
clReleaseKernel(kernel);
clReleaseCommandQueue(command_queue);
clReleaseContext(context);

return 0;

}

kernel opencl

ZSA 25.07.2018 แหล่งที่มา

comment

ฉันพยายามแล้ว แต่ล้มเหลว - คุณลองทำอะไรไปแล้ว มันล้มเหลวได้อย่างไร? - pmdj 25.07.2018

comment

ฉันเปลี่ยนพารามิเตอร์ SetkernelArg() แต่มันไม่ทำงาน แต่ตอนนี้ฉันได้รับผลลัพธ์ที่ถูกต้องหากฉันเขียนคำสั่ง clEnqueueNDKernelRangeKernel( ) 3 ครั้ง แต่ฉันไม่แน่ใจว่า 3 คำสั่งนี้ทำงานแบบขนานแยกกันหรือไม่ - ZSA 25.07.2018

comment

คุณต้องเพิ่มตัวอย่างที่ทำซ้ำได้น้อยที่สุด ในคำถามของคุณ: หากเราไม่สามารถจำลองผลลัพธ์ของคุณได้ ก็จะยากขึ้นมาก เพื่อให้เราได้แนะนำสิ่งที่ต้องแก้ไข - Xirema 25.07.2018

คำตอบ (1)

arrow_upward
0
arrow_downward

โดยพื้นฐานแล้วคุณกำลังทำงานกับคิวคำสั่งและบริบทของอุปกรณ์ที่กำหนด

สองสิ่ง:

If you have not specified something particularly, the commands will be executed in order and never in parallel independently on the queue.
- Create multiple queues for each kernel execution in order to execute the kernel in parallel independently on each queue (3 queues for 3 executions).
- วิธีการนี้สิ้นเปลืองสำหรับการดำเนินการแบบคู่ขนานที่เป็นอิสระจำนวนมาก ดังนั้นฉันจึงไม่เห็นประโยชน์มากนักในเรื่องนี้ อย่างไรก็ตาม: OpenCl เป็นเรื่องเกี่ยวกับการวัดประสิทธิภาพเพื่อดูประโยชน์ของแนวทางนี้ และฉันไม่รู้ว่าคุณมีประโยชน์อย่างไร
ควรเขียนฟังก์ชันที่ทำตามขั้นตอน (การสร้างบัฟเฟอร์ การเติมบัฟเฟอร์ การตั้งค่าอาร์กิวเมนต์ คำขอดำเนินการเคอร์เนล การอ่านจากบัฟเฟอร์ การลบบัฟเฟอร์) และเรียกมันหลายครั้ง เพื่อเพิ่มประสิทธิภาพสูงสุด ให้สร้างบัฟเฟอร์เพียงครั้งเดียวแล้วลบออกเมื่อโปรแกรมของคุณสิ้นสุด ใช้ซ้ำบ่อยที่สุดเท่าที่จะทำได้ (หากคุณทำตามลำดับ)

สุดท้ายแต่ไม่ท้ายสุด: วัดประสิทธิภาพที่เพิ่มขึ้นและประเมินว่าแนวทางใดเหมาะกับปัญหาของคุณมากที่สุด

Akar 27.07.2018

comment

ฉันได้เพิ่มตัวอย่างง่ายๆ ของการเพิ่มอาร์เรย์ คุณช่วยบอกฉันได้ไหมว่าฉันต้องทำการเปลี่ยนแปลงที่ไหนเพื่อรันเคอร์เนลนี้ 3 ครั้งแบบขนานอย่างอิสระ มันจะช่วยแม่ได้มาก ขอบคุณ - ZSA; 28.07.2018

ดำเนินการเคอร์เนลเดียวกันหลายครั้งใน OpenCL

คำตอบ (1)

คำถามในหัวข้อ