31 #ifndef SPECIAL_PURPOSE_IMPL_H_ 32 #define SPECIAL_PURPOSE_IMPL_H_ 45 #endif // HAVE_VIENNACL 53 namespace implementation
56 namespace special_purpose
62 template <enum Backend,
class Matrix>
66 typedef typename Matrix::Scalar
T;
69 static void compute(Matrix A, Matrix result);
75 template <
class Matrix>
79 typedef typename Matrix::Scalar
T;
85 for (int32_t i=0; i<len; i++)
94 template <
class Matrix>
95 struct logistic<Backend::VIENNACL, Matrix>
98 typedef typename Matrix::Scalar
T;
101 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
103 const std::string operation =
"return 1.0/(1+exp(-1*element));";
105 std::string kernel_name =
"logistic_" + ocl::get_type_string<T>();
106 viennacl::ocl::kernel& kernel =
107 ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
109 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
111 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
112 cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
113 result.vcl_matrix(), cl_int(result.offset)));
117 #endif // HAVE_VIENNACL 122 template <enum Backend,
class Matrix>
126 typedef typename Matrix::Scalar
T;
129 static void compute(Matrix A, Matrix C);
135 template <
class Matrix>
139 typedef typename Matrix::Scalar
T;
145 for (int32_t i=0; i<len; i++)
146 C[i] *= A[i] * (1.0-A[i]);
149 #endif // HAVE_EIGEN3 154 template <
class Matrix>
158 typedef typename Matrix::Scalar
T;
161 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
163 const std::string operation =
"return element2 * element1*(1.0-element1);";
165 std::string kernel_name =
"multiply_by_logistic_derivative_" + ocl::get_type_string<T>();
166 viennacl::ocl::kernel& kernel =
167 ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
169 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
171 viennacl::ocl::enqueue(kernel(
172 A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
173 C.vcl_matrix(), cl_int(C.offset),
174 C.vcl_matrix(), cl_int(C.offset)));
178 #endif // HAVE_VIENNACL 183 template <enum Backend,
class Matrix>
187 typedef typename Matrix::Scalar
T;
190 static void compute(Matrix A, Matrix result);
196 template <
class Matrix>
200 typedef typename Matrix::Scalar
T;
206 for (int32_t i=0; i<len; i++)
210 #endif // HAVE_EIGEN3 215 template <
class Matrix>
219 typedef typename Matrix::Scalar
T;
222 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> result)
224 const std::string operation =
"return max((DATATYPE)0,element);";
226 std::string kernel_name =
"rectified_linear_" + ocl::get_type_string<T>();
227 viennacl::ocl::kernel& kernel =
228 ocl::generate_single_arg_elementwise_kernel<T>(kernel_name, operation);
230 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
232 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
233 cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
234 result.vcl_matrix(), cl_int(result.offset)));
238 #endif // HAVE_VIENNACL 243 template <enum Backend,
class Matrix>
247 typedef typename Matrix::Scalar
T;
250 static void compute(Matrix A, Matrix C);
256 template <
class Matrix>
260 typedef typename Matrix::Scalar
T;
266 for (int32_t i=0; i<len; i++)
271 #endif // HAVE_EIGEN3 276 template <
class Matrix>
280 typedef typename Matrix::Scalar
T;
283 static void compute(CGPUMatrix<T> A, CGPUMatrix<T> C)
285 const std::string operation =
"return element1==0 ? 0 : element2;";
287 std::string kernel_name =
"multiply_by_rectified_linear_derivative_" + ocl::get_type_string<T>();
288 viennacl::ocl::kernel& kernel =
289 ocl::generate_two_arg_elementwise_kernel<T>(kernel_name, operation);
291 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_rows*A.num_cols));
293 viennacl::ocl::enqueue(kernel(
294 A.vcl_matrix(), cl_int(A.num_rows*A.num_cols), cl_int(A.offset),
295 C.vcl_matrix(), cl_int(C.offset),
296 C.vcl_matrix(), cl_int(C.offset)));
300 #endif // HAVE_VIENNACL 305 template <enum Backend,
class Matrix>
309 typedef typename Matrix::Scalar
T;
320 template <
class Matrix>
324 typedef typename Matrix::Scalar
T;
338 for (int32_t j=0; j<A.
num_cols; j++)
341 for (int32_t i=0; i<A.
num_rows; i++)
345 for (int32_t k=0; k<A.
num_rows; k++)
350 #endif // HAVE_EIGEN3 355 template <
class Matrix>
356 struct softmax<Backend::VIENNACL, Matrix>
359 typedef typename Matrix::Scalar
T;
363 static viennacl::ocl::kernel& generate_kernel()
365 std::string kernel_name =
"softmax_" + ocl::get_type_string<T>();
367 if (ocl::kernel_exists(kernel_name))
368 return ocl::get_kernel(kernel_name);
370 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
374 __kernel void KERNEL_NAME( 375 __global DATATYPE* A, int nrows, int ncols, int offset) 377 int j = get_global_id(0); 382 DATATYPE col_max = -INFINITY; 383 for (int i=0; i<nrows; i++) 384 col_max = max(col_max, A[offset + i+j*nrows]); 386 DATATYPE col_sum = 0; 387 for (int i=0; i<nrows; i++) 388 col_sum += exp(A[offset + i+j*nrows]-col_max); 390 DATATYPE normalizer = log(col_sum); 391 for (int i=0; i<nrows; i++) 393 int index = offset + i+j*nrows; 394 A[index] = exp(A[index]-col_max-normalizer); 400 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 402 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 410 static void compute(CGPUMatrix<T> A)
412 viennacl::ocl::kernel& kernel = generate_kernel<T>();
413 kernel.global_work_size(0, ocl::align_to_multiple_1d(A.num_cols));
415 viennacl::ocl::enqueue(kernel(A.vcl_matrix(),
416 cl_int(A.num_rows), cl_int(A.num_cols), cl_int(A.offset)));
420 #endif // HAVE_VIENNACL 425 template <enum Backend,
class Matrix>
429 typedef typename Matrix::Scalar
T;
434 static T
compute(Matrix P, Matrix Q);
439 template <
class Matrix>
443 typedef typename Matrix::Scalar
T;
456 return -1*(P_eig.array() * (Q_eig.array()+1e-30).log()).
sum();
459 #endif // HAVE_EIGEN3 463 template <
class Matrix>
467 typedef typename Matrix::Scalar
T;
471 static viennacl::ocl::kernel& generate_kernel()
473 std::string kernel_name =
"cross_entropy_" + ocl::get_type_string<T>();
475 if (ocl::kernel_exists(kernel_name))
476 return ocl::get_kernel(kernel_name);
478 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
482 __kernel void KERNEL_NAME( 483 __global DATATYPE* p, int size, int p_offset, 484 __global DATATYPE* q, int q_offset, 485 __global DATATYPE* result) 487 __local DATATYPE buffer[WORK_GROUP_SIZE_1D]; 489 int local_id = get_local_id(0); 491 DATATYPE thread_sum = 0; 492 for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D) 493 thread_sum += p[i+p_offset]*log(q[i+q_offset]+1e-30); 495 buffer[local_id] = thread_sum; 497 for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1) 499 barrier(CLK_LOCAL_MEM_FENCE); 501 buffer[local_id] += buffer[local_id + j]; 504 barrier(CLK_LOCAL_MEM_FENCE); 506 if (get_global_id(0)==0) 507 *result = -1*buffer[0]; 512 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 514 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 515 kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D); 523 static T
compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
525 viennacl::ocl::kernel& kernel = generate_kernel<T>();
527 CGPUVector<T> result(1);
529 viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
530 cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
531 Q.vcl_matrix(), cl_int(Q.offset),
532 result.vcl_vector()));
537 #endif // HAVE_VIENNACL 542 template <enum Backend,
class Matrix>
546 typedef typename Matrix::Scalar
T;
551 static T
compute(Matrix P, Matrix Q);
556 template <
class Matrix>
560 typedef typename Matrix::Scalar
T;
573 return 0.5 * (P_eig - Q_eig).array().square().sum();
576 #endif // HAVE_EIGEN3 580 template <
class Matrix>
584 typedef typename Matrix::Scalar
T;
588 static viennacl::ocl::kernel& generate_kernel()
590 std::string kernel_name =
"squared_error_" + ocl::get_type_string<T>();
592 if (ocl::kernel_exists(kernel_name))
593 return ocl::get_kernel(kernel_name);
595 std::string source = ocl::generate_kernel_preamble<T>(kernel_name);
599 __kernel void KERNEL_NAME( 600 __global DATATYPE* p, int size, int p_offset, 601 __global DATATYPE* q, int q_offset, 602 __global DATATYPE* result) 604 __local DATATYPE buffer[WORK_GROUP_SIZE_1D]; 606 int local_id = get_local_id(0); 608 DATATYPE thread_sum = 0; 609 for (int i=local_id; i<size; i+=WORK_GROUP_SIZE_1D) 610 thread_sum += pown(p[i+p_offset]-q[i+q_offset], 2); 612 buffer[local_id] = thread_sum; 614 for (int j = WORK_GROUP_SIZE_1D/2; j > 0; j = j>>1) 616 barrier(CLK_LOCAL_MEM_FENCE); 618 buffer[local_id] += buffer[local_id + j]; 621 barrier(CLK_LOCAL_MEM_FENCE); 623 if (get_global_id(0)==0) 624 *result = 0.5*buffer[0]; 629 viennacl::ocl::kernel& kernel = ocl::compile_kernel(kernel_name, source); 631 kernel.local_work_size(0, OCL_WORK_GROUP_SIZE_1D); 632 kernel.global_work_size(0, OCL_WORK_GROUP_SIZE_1D); 640 static T
compute(CGPUMatrix<T> P, CGPUMatrix<T> Q)
642 viennacl::ocl::kernel& kernel = generate_kernel<T>();
644 CGPUVector<T> result(1);
646 viennacl::ocl::enqueue(kernel(P.vcl_matrix(),
647 cl_int(P.num_rows*P.num_cols), cl_int(P.offset),
648 Q.vcl_matrix(), cl_int(Q.offset),
649 result.vcl_vector()));
654 #endif // HAVE_VIENNACL 663 #endif // SPECIAL_PURPOSE_IMPL_H_ Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static void compute(SGMatrix< T > A, SGMatrix< T > result)
static void compute(SGMatrix< T > A)
static void compute(SGMatrix< T > A, SGMatrix< T > result)
Generic class sum which provides a static compute method. This class is specialized for different typ...
static void compute(SGMatrix< T > A, SGMatrix< T > C)
Generic class which is specialized for different backends to perform the max operation.
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static T compute(SGMatrix< T > P, SGMatrix< T > Q)
static void compute(Matrix A, Matrix result)
static void compute(SGMatrix< T > A, SGMatrix< T > C)
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
all of classes and functions are contained in the shogun namespace
Eigen::Matrix< T, Eigen::Dynamic, Eigen::Dynamic > MatrixXt
static float64_t exp(float64_t x)
static float64_t log(float64_t v)