25 #ifndef OPENSUBDIV_OPENSUBDIV3_OSD_CL_EVALUATOR_H 26 #define OPENSUBDIV_OPENSUBDIV3_OSD_CL_EVALUATOR_H 28 #include "../version.h" 30 #include "../osd/opencl.h" 31 #include "../osd/types.h" 32 #include "../osd/bufferDescriptor.h" 35 namespace OPENSUBDIV_VERSION {
52 template <
typename DEVICE_CONTEXT>
54 DEVICE_CONTEXT context) {
58 template <
typename DEVICE_CONTEXT>
61 DEVICE_CONTEXT context) {
62 return new CLStencilTable(limitStencilTable, context->GetContext());
66 cl_context clContext);
68 cl_context clContext);
96 CLEvaluator(cl_context context, cl_command_queue queue);
102 template <
typename DEVICE_CONTEXT>
107 DEVICE_CONTEXT deviceContext) {
108 return Create(srcDesc, dstDesc, duDesc, dvDesc,
109 deviceContext->GetContext(),
110 deviceContext->GetCommandQueue());
117 cl_context clContext,
118 cl_command_queue clCommandQueue) {
120 if (kernel->
Compile(srcDesc, dstDesc, duDesc, dvDesc))
return kernel;
174 template <
typename SRC_BUFFER,
typename DST_BUFFER,
175 typename STENCIL_TABLE,
typename DEVICE_CONTEXT>
179 STENCIL_TABLE
const *stencilTable,
181 DEVICE_CONTEXT deviceContext,
182 unsigned int numStartEvents=0,
183 const cl_event* startEvents=NULL,
184 cl_event* endEvent=NULL) {
190 numStartEvents, startEvents, endEvent);
193 instance = Create(srcDesc, dstDesc,
201 numStartEvents, startEvents, endEvent);
264 template <
typename SRC_BUFFER,
typename DST_BUFFER,
265 typename STENCIL_TABLE,
typename DEVICE_CONTEXT>
271 STENCIL_TABLE
const *stencilTable,
273 DEVICE_CONTEXT deviceContext,
274 unsigned int numStartEvents=0,
275 const cl_event* startEvents=NULL,
276 cl_event* endEvent=NULL) {
284 numStartEvents, startEvents, endEvent);
287 instance = Create(srcDesc, dstDesc, duDesc, dvDesc,
295 numStartEvents, startEvents, endEvent);
306 template <
typename SRC_BUFFER,
typename DST_BUFFER,
typename STENCIL_TABLE>
310 STENCIL_TABLE
const *stencilTable,
311 unsigned int numStartEvents=0,
312 const cl_event* startEvents=NULL,
313 cl_event* endEvent=NULL)
const {
314 return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
315 dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
316 stencilTable->GetSizesBuffer(),
317 stencilTable->GetOffsetsBuffer(),
318 stencilTable->GetIndicesBuffer(),
319 stencilTable->GetWeightsBuffer(),
321 stencilTable->GetNumStencils(),
322 numStartEvents, startEvents, endEvent);
328 template <
typename SRC_BUFFER,
typename DST_BUFFER,
typename STENCIL_TABLE>
334 STENCIL_TABLE
const *stencilTable,
335 unsigned int numStartEvents=0,
336 const cl_event* startEvents=NULL,
337 cl_event* endEvent=NULL)
const {
338 return EvalStencils(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
339 dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
340 duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
341 dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
342 stencilTable->GetSizesBuffer(),
343 stencilTable->GetOffsetsBuffer(),
344 stencilTable->GetIndicesBuffer(),
345 stencilTable->GetWeightsBuffer(),
346 stencilTable->GetDuWeightsBuffer(),
347 stencilTable->GetDvWeightsBuffer(),
349 stencilTable->GetNumStencils(),
350 numStartEvents, startEvents, endEvent);
363 unsigned int numStartEvents=0,
364 const cl_event* startEvents=NULL,
365 cl_event* endEvent=NULL)
const;
381 unsigned int numStartEvents=0,
382 const cl_event* startEvents=NULL,
383 cl_event* endEvent=NULL)
const;
439 template <
typename SRC_BUFFER,
typename DST_BUFFER,
440 typename PATCHCOORD_BUFFER,
typename PATCH_TABLE,
441 typename DEVICE_CONTEXT>
446 PATCHCOORD_BUFFER *patchCoords,
447 PATCH_TABLE *patchTable,
449 DEVICE_CONTEXT deviceContext,
450 unsigned int numStartEvents=0,
451 const cl_event* startEvents=NULL,
452 cl_event* endEvent=NULL) {
457 numPatchCoords, patchCoords,
459 numStartEvents, startEvents, endEvent);
463 instance = Create(srcDesc, dstDesc,
470 numPatchCoords, patchCoords,
472 numStartEvents, startEvents, endEvent);
536 template <
typename SRC_BUFFER,
typename DST_BUFFER,
537 typename PATCHCOORD_BUFFER,
typename PATCH_TABLE,
538 typename DEVICE_CONTEXT>
545 PATCHCOORD_BUFFER *patchCoords,
546 PATCH_TABLE *patchTable,
548 DEVICE_CONTEXT deviceContext,
549 unsigned int numStartEvents=0,
550 const cl_event* startEvents=NULL,
551 cl_event* endEvent=NULL) {
558 numPatchCoords, patchCoords,
560 numStartEvents, startEvents, endEvent);
564 instance = Create(srcDesc, dstDesc, duDesc, dvDesc, deviceContext);
570 numPatchCoords, patchCoords,
572 numStartEvents, startEvents, endEvent);
617 template <
typename SRC_BUFFER,
typename DST_BUFFER,
618 typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
623 PATCHCOORD_BUFFER *patchCoords,
624 PATCH_TABLE *patchTable,
625 unsigned int numStartEvents=0,
626 const cl_event* startEvents=NULL,
627 cl_event* endEvent=NULL)
const {
629 return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
630 dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
634 patchCoords->BindCLBuffer(_clCommandQueue),
635 patchTable->GetPatchArrayBuffer(),
636 patchTable->GetPatchIndexBuffer(),
637 patchTable->GetPatchParamBuffer(),
638 numStartEvents, startEvents, endEvent);
688 template <
typename SRC_BUFFER,
typename DST_BUFFER,
689 typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
696 PATCHCOORD_BUFFER *patchCoords,
697 PATCH_TABLE *patchTable,
698 unsigned int numStartEvents=0,
699 const cl_event* startEvents=NULL,
700 cl_event* endEvent=NULL)
const {
702 return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
703 dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
704 duBuffer->BindCLBuffer(_clCommandQueue), duDesc,
705 dvBuffer->BindCLBuffer(_clCommandQueue), dvDesc,
707 patchCoords->BindCLBuffer(_clCommandQueue),
708 patchTable->GetPatchArrayBuffer(),
709 patchTable->GetPatchIndexBuffer(),
710 patchTable->GetPatchParamBuffer(),
711 numStartEvents, startEvents, endEvent);
719 cl_mem patchCoordsBuffer,
720 cl_mem patchArrayBuffer,
721 cl_mem patchIndexBuffer,
722 cl_mem patchParamsBuffer,
723 unsigned int numStartEvents=0,
724 const cl_event* startEvents=NULL,
725 cl_event* endEvent=NULL)
const;
775 template <
typename SRC_BUFFER,
typename DST_BUFFER,
776 typename PATCHCOORD_BUFFER,
typename PATCH_TABLE,
777 typename DEVICE_CONTEXT>
782 PATCHCOORD_BUFFER *patchCoords,
783 PATCH_TABLE *patchTable,
785 DEVICE_CONTEXT deviceContext,
786 unsigned int numStartEvents=0,
787 const cl_event* startEvents=NULL,
788 cl_event* endEvent=NULL) {
794 numPatchCoords, patchCoords,
796 numStartEvents, startEvents, endEvent);
800 instance = Create(srcDesc, dstDesc,
808 numPatchCoords, patchCoords,
810 numStartEvents, startEvents, endEvent);
855 template <
typename SRC_BUFFER,
typename DST_BUFFER,
856 typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
861 PATCHCOORD_BUFFER *patchCoords,
862 PATCH_TABLE *patchTable,
863 unsigned int numStartEvents=0,
864 const cl_event* startEvents=NULL,
865 cl_event* endEvent=NULL)
const {
867 return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
868 dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
872 patchCoords->BindCLBuffer(_clCommandQueue),
873 patchTable->GetVaryingPatchArrayBuffer(),
874 patchTable->GetVaryingPatchIndexBuffer(),
875 patchTable->GetPatchParamBuffer(),
876 numStartEvents, startEvents, endEvent);
929 template <
typename SRC_BUFFER,
typename DST_BUFFER,
930 typename PATCHCOORD_BUFFER,
typename PATCH_TABLE,
931 typename DEVICE_CONTEXT>
936 PATCHCOORD_BUFFER *patchCoords,
937 PATCH_TABLE *patchTable,
940 DEVICE_CONTEXT deviceContext,
941 unsigned int numStartEvents=0,
942 const cl_event* startEvents=NULL,
943 cl_event* endEvent=NULL) {
949 numPatchCoords, patchCoords,
950 patchTable, fvarChannel,
951 numStartEvents, startEvents, endEvent);
955 instance = Create(srcDesc, dstDesc,
963 numPatchCoords, patchCoords,
964 patchTable, fvarChannel,
965 numStartEvents, startEvents, endEvent);
1012 template <
typename SRC_BUFFER,
typename DST_BUFFER,
1013 typename PATCHCOORD_BUFFER,
typename PATCH_TABLE>
1018 PATCHCOORD_BUFFER *patchCoords,
1019 PATCH_TABLE *patchTable,
1020 int fvarChannel = 0,
1021 unsigned int numStartEvents=0,
1022 const cl_event* startEvents=NULL,
1023 cl_event* endEvent=NULL)
const {
1025 return EvalPatches(srcBuffer->BindCLBuffer(_clCommandQueue), srcDesc,
1026 dstBuffer->BindCLBuffer(_clCommandQueue), dstDesc,
1030 patchCoords->BindCLBuffer(_clCommandQueue),
1031 patchTable->GetFVarPatchArrayBuffer(fvarChannel),
1032 patchTable->GetFVarPatchIndexBuffer(fvarChannel),
1033 patchTable->GetFVarPatchParamBuffer(fvarChannel),
1034 numStartEvents, startEvents, endEvent);
1051 template <
typename DEVICE_CONTEXT>
1053 Synchronize(deviceContext->GetCommandQueue());
1056 static void Synchronize(cl_command_queue queue);
1059 cl_context _clContext;
1060 cl_command_queue _clCommandQueue;
1061 cl_program _program;
1062 cl_kernel _stencilKernel;
1063 cl_kernel _stencilDerivKernel;
1064 cl_kernel _patchKernel;
1071 using namespace OPENSUBDIV_VERSION;
1076 #endif // OPENSUBDIV_OPENSUBDIV3_OSD_CL_EVALUATOR_H bool Compile(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc)
static CLEvaluator * Create(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, DEVICE_CONTEXT deviceContext)
Generic creator template.
static CLStencilTable * Create(Far::LimitStencilTable const *limitStencilTable, DEVICE_CONTEXT context)
static bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
static CLEvaluator * Create(BufferDescriptor const &srcDesc, BufferDescriptor const &dstDesc, BufferDescriptor const &duDesc, BufferDescriptor const &dvDesc, cl_context clContext, cl_command_queue clCommandQueue)
Table of subdivision stencils.
bool EvalPatchesVarying(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL) const
Generic limit eval function. This function has a same signature as other device kernels have so that ...
bool EvalStencils(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, STENCIL_TABLE const *stencilTable, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL) const
bool EvalPatches(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL) const
Generic limit eval function with derivatives. This function has a same signature as other device kern...
static bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
cl_mem GetSizesBuffer() const
static bool EvalPatches(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
Table of limit subdivision stencils.
bool EvalPatchesFaceVarying(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, int fvarChannel=0, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL) const
Generic limit eval function. This function has a same signature as other device kernels have so that ...
cl_mem GetDvWeightsBuffer() const
static bool EvalPatches(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL)
Generic limit eval function. This function has a same signature as other device kernels have so that ...
bool EvalStencils(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL) const
cl_mem GetIndicesBuffer() const
bool EvalPatches(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, int numPatchCoords, PATCHCOORD_BUFFER *patchCoords, PATCH_TABLE *patchTable, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL) const
Generic limit eval function. This function has a same signature as other device kernels have so that ...
BufferDescriptor is a struct which describes buffer elements in interleaved data buffers. Almost all Osd Evaluator APIs take BufferDescriptors along with device-specific buffer objects.
static void Synchronize(DEVICE_CONTEXT deviceContext)
Wait the OpenCL kernels finish.
static bool EvalStencils(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, STENCIL_TABLE const *stencilTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL)
Generic static compute function. This function has a same signature as other device kernels have so t...
int GetNumStencils() const
static CLStencilTable * Create(Far::StencilTable const *stencilTable, DEVICE_CONTEXT context)
cl_mem GetWeightsBuffer() const
cl_mem GetDuWeightsBuffer() const
cl_mem GetOffsetsBuffer() const
static bool EvalStencils(SRC_BUFFER *srcBuffer, BufferDescriptor const &srcDesc, DST_BUFFER *dstBuffer, BufferDescriptor const &dstDesc, DST_BUFFER *duBuffer, BufferDescriptor const &duDesc, DST_BUFFER *dvBuffer, BufferDescriptor const &dvDesc, STENCIL_TABLE const *stencilTable, CLEvaluator const *instance, DEVICE_CONTEXT deviceContext, unsigned int numStartEvents=0, const cl_event *startEvents=NULL, cl_event *endEvent=NULL)
Generic static compute function. This function has a same signature as other device kernels have so t...