5 #ifndef VA_TRIPLE_DECOMPOSITION_H
6 #define VA_TRIPLE_DECOMPOSITION_H
8 #include "common_defs.h"
38 VA_DEVICE_FUN
void valib_get_brf(VA_DEVICE_ADDR VA_REAL *A,
int *num_intervals,
43 int ialpha, ibeta, igamma;
47 extern __shared__ VA_REAL s_data[];
48 VA_REAL *Q = &s_data[0];
51 __private VA_REAL Q[9];
58 VA_REAL alpha, beta, gamma;
63 VA_REAL goal_function;
66 VA_REAL goal_brf = 0.;
69 VA_REAL stepSize = VA_PI / (VA_REAL) *num_intervals;
77 for (ialpha = 0; ialpha < *num_intervals + 1; ialpha++) {
78 alpha = ialpha*stepSize;
80 sincos(alpha, &sina, &cosa);
82 sina = native_sin(alpha);
83 cosa = native_cos(alpha);
88 for (ibeta = 0; ibeta < *num_intervals + 1; ibeta++) {
89 beta = ibeta*stepSize;
91 sincos(beta, &sinb, &cosb);
93 sinb = native_sin(beta);
94 cosb = native_cos(beta);
99 for (igamma = 0; igamma < *num_intervals / 2 + 1; igamma++) {
100 gamma = igamma*stepSize;
102 sincos(gamma, &sing, &cosg);
103 #elif defined(OPENCL)
104 sing = native_sin(gamma);
105 cosg = native_cos(gamma);
114 if (threadIdx.x == 0) {
128 goal_function = fabs(SO[1]*SO[3])
133 if (goal_function > goal_brf) {
134 goal_brf = goal_function;
194 VA_DEVICE_ADDR VA_REAL *A,
196 VA_DEVICE_ADDR VA_REAL *residual_vorticity,
197 VA_DEVICE_ADDR VA_REAL *residual_strain,
198 VA_DEVICE_ADDR VA_REAL *shear)
205 VA_REAL alpha_brf, beta_brf, gamma_brf;
211 VA_REAL res_tensor_norm_2;
217 valib_get_brf(A, num_intervals, &alpha_brf, &beta_brf, &gamma_brf);
220 sincos(alpha_brf, &sina, &cosa);
221 sincos(beta_brf, &sinb, &cosb);
222 sincos(gamma_brf, &sing, &cosg);
223 #elif defined(OPENCL)
224 sina = native_sin(alpha_brf);
225 cosa = native_cos(alpha_brf);
226 sinb = native_sin(beta_brf);
227 cosb = native_cos(beta_brf);
228 sing = native_sin(gamma_brf);
229 cosg = native_cos(gamma_brf);
231 sina = sin(alpha_brf);
232 cosa = cos(alpha_brf);
233 sinb = sin(beta_brf);
234 cosb = cos(beta_brf);
235 sing = sin(gamma_brf);
236 cosg = cos(gamma_brf);
248 for (i = 0; i < 9; i++) {
249 norm_A_2 = norm_A_2 + SO[i]*SO[i];
253 aux =
valib_min(fabs(SO[3]), fabs(SO[1]));
257 aux =
valib_min(fabs(SO[6]), fabs(SO[2]));
261 aux =
valib_min(fabs(SO[7]), fabs(SO[5]));
265 res_tensor_norm_2 = SO[0]*SO[0] + SO[4]*SO[4] + SO[8]*SO[8]
266 + 2.*(SO[3]*SO[3] + SO[6]*SO[6] + SO[7]*SO[7]);
268 *shear = sqrt(norm_A_2 - res_tensor_norm_2);
274 *residual_vorticity = sqrt( 2.*SO[1]*SO[1]
279 *residual_strain = sqrt(2.*(SO[3]*SO[3] + SO[6]*SO[6] + SO[7]*SO[7])
280 + (SO[0]*SO[0] + SO[4]*SO[4] + SO[8]*SO[8]));
341 VA_DEVICE_ADDR VA_REAL *A,
int *num_intervals,
342 VA_DEVICE_ADDR VA_REAL *residual_vorticity,
343 VA_DEVICE_ADDR VA_REAL *residual_strain,
344 VA_DEVICE_ADDR VA_REAL *shear_vorticity,
345 VA_DEVICE_ADDR VA_REAL *shear_strain)
354 VA_REAL alpha_brf, beta_brf, gamma_brf;
363 valib_get_brf(A, num_intervals, &alpha_brf, &beta_brf, &gamma_brf);
366 sincos(alpha_brf, &sina, &cosa);
367 sincos(beta_brf, &sinb, &cosb);
368 sincos(gamma_brf, &sing, &cosg);
369 #elif defined(OPENCL)
370 sina = native_sin(alpha_brf);
371 cosa = native_cos(alpha_brf);
372 sinb = native_sin(beta_brf);
373 cosb = native_cos(beta_brf);
374 sing = native_sin(gamma_brf);
375 cosg = native_cos(gamma_brf);
377 sina = sin(alpha_brf);
378 cosa = cos(alpha_brf);
379 sinb = sin(beta_brf);
380 cosb = cos(beta_brf);
381 sing = sin(gamma_brf);
382 cosg = cos(gamma_brf);
393 aux =
valib_min(fabs(SO[3]), fabs(SO[1]));
397 aux =
valib_min(fabs(SO[6]), fabs(SO[2]));
401 aux =
valib_min(fabs(SO[7]), fabs(SO[5]));
408 *residual_vorticity = sqrt( 2.*SO[1]*SO[1]
412 *residual_strain = sqrt( 2.*(SO[3]*SO[3] + SO[6]*SO[6] + SO[7]*SO[7])
413 + (SO[0]*SO[0] + SO[4]*SO[4] + SO[8]*SO[8]));
422 aux =
valib_min(fabs(SO[3]), fabs(SO[1]));
426 aux =
valib_min(fabs(SO[6]), fabs(SO[2]));
430 aux =
valib_min(fabs(SO[7]), fabs(SO[5]));
435 for (i = 0; i < 9; i++) {
436 SO[i] = QAQ[i] - SO[i];
444 *shear_vorticity = sqrt(2.*(SO[1]*SO[1] + SO[2]*SO[2] + SO[5]*SO[5]));
447 *shear_strain = sqrt(2.*(SO[3]*SO[3] + SO[6]*SO[6] + SO[7]*SO[7])
448 + (SO[0]*SO[0] + SO[4]*SO[4] + SO[8]*SO[8]));
451 #endif // VA_TRIPLE_DECOMPOSITION_H