valib
Vortex Analysis LIBrary
triple_decomposition.h
1 
5 #ifndef VA_TRIPLE_DECOMPOSITION_H
6 #define VA_TRIPLE_DECOMPOSITION_H
7 
8 #include "common_defs.h"
9 #include "linalg3.h"
10 
11 /***************************************************************************/
16 /***************************************************************************/
38 VA_DEVICE_FUN void valib_get_brf(VA_DEVICE_ADDR VA_REAL *A, int *num_intervals,
39  VA_REAL *alpha_brf,
40  VA_REAL *beta_brf,
41  VA_REAL *gamma_brf)
42 {
43  int ialpha, ibeta, igamma;
44 
45 #if defined(CUDA)
46  // put Q into shared memory
47  extern __shared__ VA_REAL s_data[];
48  VA_REAL *Q = &s_data[0];
49 #elif defined(OPENCL)
50  // put Q into shared memory
51  __private VA_REAL Q[9];
52 #else
53  VA_REAL Q[9]; // orthogonal matrices of transformations
54 #endif
55  VA_REAL SO[9]; // symmetric (upper triangle) and antisymmetric (lower
56  // triangle) parts of velocity gradient
57 
58  VA_REAL alpha, beta, gamma; // rotation angles
59  VA_REAL sina, cosa; // sin(alpha), cos(alpha)
60  VA_REAL sinb, cosb; // sin(beta), cos(beta)
61  VA_REAL sing, cosg; // sin(gamma), cos(gamma)
62 
63  VA_REAL goal_function;
64 
65  // initialize maxima
66  VA_REAL goal_brf = 0.;
67 
68  // compute stepSize in radians
69  VA_REAL stepSize = VA_PI / (VA_REAL) *num_intervals;
70 
71  // initialize optimal angles
72  *alpha_brf = 0.;
73  *beta_brf = 0.;
74  *gamma_brf = 0.;
75 
76  // perform search over all combinations of spherical coordinates
77  for (ialpha = 0; ialpha < *num_intervals + 1; ialpha++) {
78  alpha = ialpha*stepSize; // alpha in [0,pi]
79 #if defined(CUDA)
80  sincos(alpha, &sina, &cosa);
81 #elif defined(OPENCL)
82  sina = native_sin(alpha);
83  cosa = native_cos(alpha);
84 #else
85  sina = sin(alpha);
86  cosa = cos(alpha);
87 #endif
88  for (ibeta = 0; ibeta < *num_intervals + 1; ibeta++) {
89  beta = ibeta*stepSize; // beta in [0,pi]
90 #if defined(CUDA)
91  sincos(beta, &sinb, &cosb);
92 #elif defined(OPENCL)
93  sinb = native_sin(beta);
94  cosb = native_cos(beta);
95 #else
96  sinb = sin(beta);
97  cosb = cos(beta);
98 #endif
99  for (igamma = 0; igamma < *num_intervals / 2 + 1; igamma++) {
100  gamma = igamma*stepSize; // gamma in [0,pi/2]
101 #if defined(CUDA)
102  sincos(gamma, &sing, &cosg);
103 #elif defined(OPENCL)
104  sing = native_sin(gamma);
105  cosg = native_cos(gamma);
106 #else
107  sing = sin(gamma);
108  cosg = cos(gamma);
109 #endif
110 
111  // generate matrix Q
112 #if defined(CUDA)
113  __syncthreads();
114  if (threadIdx.x == 0) {
115 #endif
116  valib_constructQ3(sina, cosa, sinb, cosb, sing, cosg, Q);
117 #if defined(CUDA)
118  }
119  __syncthreads();
120 #endif
121  // SO = QAQ^T
122  valib_tatt3(A, Q, SO);
123 
124  // get residual vorticity in 2D
125  valib_sym_antisym3(SO);
126 
127  // evaluate the goal function
128  goal_function = fabs(SO[1]*SO[3])
129  + fabs(SO[2]*SO[6])
130  + fabs(SO[5]*SO[7]);
131 
132  // update maxima
133  if (goal_function > goal_brf) {
134  goal_brf = goal_function;
135 
136  *alpha_brf = alpha;
137  *beta_brf = beta;
138  *gamma_brf = gamma;
139  }
140  }
141  }
142  }
143 }
144 
145 /***************************************************************************/
149 /***************************************************************************/
193 VA_DEVICE_FUN void valib_triple_decomposition(
194  VA_DEVICE_ADDR VA_REAL *A,
195  int *num_intervals,
196  VA_DEVICE_ADDR VA_REAL *residual_vorticity,
197  VA_DEVICE_ADDR VA_REAL *residual_strain,
198  VA_DEVICE_ADDR VA_REAL *shear)
199 {
200  VA_REAL SO[9]; // symmetric (upper triangle) and
201  // antisymmetric (lower triangle)
202  // parts of velocity gradient
203  VA_REAL Q[9]; // orthogonal matrices of
204  // transformations
205  VA_REAL alpha_brf, beta_brf, gamma_brf; // angles of basic reference
206  // frame (BRF)
207  VA_REAL sina, cosa; // sin(alpha), cos(alpha)
208  VA_REAL sinb, cosb; // sin(beta), cos(beta)
209  VA_REAL sing, cosg; // sin(gamma), cos(gamma)
210  VA_REAL norm_A_2; // norm of velocity gradient
211  VA_REAL res_tensor_norm_2; // norm of residual velocity gradient
212 
213  int i;
214  VA_REAL aux;
215 
216  // find BRF angles
217  valib_get_brf(A, num_intervals, &alpha_brf, &beta_brf, &gamma_brf);
218 
219 #if defined(CUDA)
220  sincos(alpha_brf, &sina, &cosa);
221  sincos(beta_brf, &sinb, &cosb);
222  sincos(gamma_brf, &sing, &cosg);
223 #elif defined(OPENCL)
224  sina = native_sin(alpha_brf);
225  cosa = native_cos(alpha_brf);
226  sinb = native_sin(beta_brf);
227  cosb = native_cos(beta_brf);
228  sing = native_sin(gamma_brf);
229  cosg = native_cos(gamma_brf);
230 #else
231  sina = sin(alpha_brf);
232  cosa = cos(alpha_brf);
233  sinb = sin(beta_brf);
234  cosb = cos(beta_brf);
235  sing = sin(gamma_brf);
236  cosg = cos(gamma_brf);
237 #endif
238 
239  // generate matrix Q (this call is not shared for CUDA because this time,
240  // each thread has different Q)
241  valib_constructQ3(sina, cosa, sinb, cosb, sing, cosg, Q);
242 
243  // SO = QAQ^T
244  valib_tatt3(A, Q, SO);
245 
246  // squared norm of the transformed velocity gradient tensor
247  norm_A_2 = 0.;
248  for (i = 0; i < 9; i++) {
249  norm_A_2 = norm_A_2 + SO[i]*SO[i];
250  }
251 
252  // Find the residual vorticity inside SO - filter out shear stress
253  aux = valib_min(fabs(SO[3]), fabs(SO[1]));
254  SO[3] = valib_sign(aux, SO[3]);
255  SO[1] = valib_sign(aux, SO[1]);
256 
257  aux = valib_min(fabs(SO[6]), fabs(SO[2]));
258  SO[6] = valib_sign(aux, SO[6]);
259  SO[2] = valib_sign(aux, SO[2]);
260 
261  aux = valib_min(fabs(SO[7]), fabs(SO[5]));
262  SO[7] = valib_sign(aux, SO[7]);
263  SO[5] = valib_sign(aux, SO[5]);
264 
265  res_tensor_norm_2 = SO[0]*SO[0] + SO[4]*SO[4] + SO[8]*SO[8]
266  + 2.*(SO[3]*SO[3] + SO[6]*SO[6] + SO[7]*SO[7]);
267  // shear magnitude
268  *shear = sqrt(norm_A_2 - res_tensor_norm_2);
269 
270  // ( QAQ^T - shear ) is left in SO - find symmetric and antisymmetric parts
271  valib_sym_antisym3(SO);
272 
273  // ||Omega_RES||_F
274  *residual_vorticity = sqrt( 2.*SO[1]*SO[1]
275  + 2.*SO[2]*SO[2]
276  + 2.*SO[5]*SO[5]);
277 
278  // ||S_RES||_F
279  *residual_strain = sqrt(2.*(SO[3]*SO[3] + SO[6]*SO[6] + SO[7]*SO[7])
280  + (SO[0]*SO[0] + SO[4]*SO[4] + SO[8]*SO[8]));
281 }
282 
283 /***************************************************************************/
341  VA_DEVICE_ADDR VA_REAL *A, int *num_intervals,
342  VA_DEVICE_ADDR VA_REAL *residual_vorticity,
343  VA_DEVICE_ADDR VA_REAL *residual_strain,
344  VA_DEVICE_ADDR VA_REAL *shear_vorticity,
345  VA_DEVICE_ADDR VA_REAL *shear_strain)
346 {
347 
348  VA_REAL SO[9]; // symmetric (upper triangle) and
349  // antisymmetric (lower triangle)
350  // parts of velocity gradient
351  VA_REAL QAQ[9]; // rotated velocity gradient
352  VA_REAL Q[9]; // orthogonal matrices of
353  // transformations
354  VA_REAL alpha_brf, beta_brf, gamma_brf; // angles of basic reference frame
355  VA_REAL sina, cosa; // sin(alpha), cos(alpha)
356  VA_REAL sinb, cosb; // sin(beta), cos(beta)
357  VA_REAL sing, cosg; // sin(gamma), cos(gamma)
358 
359  int i;
360  VA_REAL aux;
361 
362  // find BRF angles
363  valib_get_brf(A, num_intervals, &alpha_brf, &beta_brf, &gamma_brf);
364 
365 #if defined(CUDA)
366  sincos(alpha_brf, &sina, &cosa);
367  sincos(beta_brf, &sinb, &cosb);
368  sincos(gamma_brf, &sing, &cosg);
369 #elif defined(OPENCL)
370  sina = native_sin(alpha_brf);
371  cosa = native_cos(alpha_brf);
372  sinb = native_sin(beta_brf);
373  cosb = native_cos(beta_brf);
374  sing = native_sin(gamma_brf);
375  cosg = native_cos(gamma_brf);
376 #else
377  sina = sin(alpha_brf);
378  cosa = cos(alpha_brf);
379  sinb = sin(beta_brf);
380  cosb = cos(beta_brf);
381  sing = sin(gamma_brf);
382  cosg = cos(gamma_brf);
383 #endif
384 
385  // generate matrix Q ( this call is not shared for CUDA because this time,
386  // each thread has different Q )
387  valib_constructQ3(sina, cosa, sinb, cosb, sing, cosg, Q);
388 
389  // SO = QAQ^T
390  valib_tatt3(A, Q, SO);
391 
392  // Find the residual vorticity inside SO - filter out shear stress
393  aux = valib_min(fabs(SO[3]), fabs(SO[1]));
394  SO[3] = valib_sign(aux, SO[3]);
395  SO[1] = valib_sign(aux, SO[1]);
396 
397  aux = valib_min(fabs(SO[6]), fabs(SO[2]));
398  SO[6] = valib_sign(aux, SO[6]);
399  SO[2] = valib_sign(aux, SO[2]);
400 
401  aux = valib_min(fabs(SO[7]), fabs(SO[5]));
402  SO[7] = valib_sign(aux, SO[7]);
403  SO[5] = valib_sign(aux, SO[5]);
404 
405  // ( QAQ^T - shear ) is left in SO - find symmetric and antisymmetric parts
406  valib_sym_antisym3(SO);
407 
408  *residual_vorticity = sqrt( 2.*SO[1]*SO[1]
409  + 2.*SO[2]*SO[2]
410  + 2.*SO[5]*SO[5]);
411 
412  *residual_strain = sqrt( 2.*(SO[3]*SO[3] + SO[6]*SO[6] + SO[7]*SO[7])
413  + (SO[0]*SO[0] + SO[4]*SO[4] + SO[8]*SO[8]));
414 
415  // SO = QAQ^T
416  valib_tatt3(A, Q, QAQ);
417 
418  // copy QAQ^T to SO
419  valib_mat_copy3(QAQ, SO);
420 
421  // Find the residual vorticity inside SO
422  aux = valib_min(fabs(SO[3]), fabs(SO[1]));
423  SO[3] = valib_sign(aux, SO[3]);
424  SO[1] = valib_sign(aux, SO[1]);
425 
426  aux = valib_min(fabs(SO[6]), fabs(SO[2]));
427  SO[6] = valib_sign(aux, SO[6]);
428  SO[2] = valib_sign(aux, SO[2]);
429 
430  aux = valib_min(fabs(SO[7]), fabs(SO[5]));
431  SO[7] = valib_sign(aux, SO[7]);
432  SO[5] = valib_sign(aux, SO[5]);
433 
434  // store the difference, i.e. the shear tensor, in SO
435  for (i = 0; i < 9; i++) {
436  SO[i] = QAQ[i] - SO[i];
437  }
438 
439  // find the shear strain rate and shear vorticity
440  // (QAQ^T - shear) is left in SO - find symmetric and antisymmetric parts
441  valib_sym_antisym3(SO);
442 
443  // shear_vort = ||Omega_SH||_F
444  *shear_vorticity = sqrt(2.*(SO[1]*SO[1] + SO[2]*SO[2] + SO[5]*SO[5]));
445 
446  // shear_strain = ||S_SH||_F
447  *shear_strain = sqrt(2.*(SO[3]*SO[3] + SO[6]*SO[6] + SO[7]*SO[7])
448  + (SO[0]*SO[0] + SO[4]*SO[4] + SO[8]*SO[8]));
449 }
450 
451 #endif // VA_TRIPLE_DECOMPOSITION_H
valib_tatt3
VA_DEVICE_FUN void valib_tatt3(VA_REAL *A, VA_REAL *T, VA_REAL *TATT)
Definition: linalg3.h:406
valib_sign
VA_DEVICE_FUN VA_REAL valib_sign(VA_REAL a, VA_REAL b)
Definition: linalg3.h:20
valib_triple_decomposition
VA_DEVICE_FUN void valib_triple_decomposition(VA_DEVICE_ADDR VA_REAL *A, int *num_intervals, VA_DEVICE_ADDR VA_REAL *residual_vorticity, VA_DEVICE_ADDR VA_REAL *residual_strain, VA_DEVICE_ADDR VA_REAL *shear)
Triple Decomposition Method (TDM, a.k.a. residual vorticity and residual strain rate) ,...
Definition: triple_decomposition.h:193
valib_mat_copy3
VA_DEVICE_FUN void valib_mat_copy3(VA_REAL *A, VA_REAL *B)
Definition: linalg3.h:243
valib_triple_decomposition_4norms
VA_DEVICE_FUN void valib_triple_decomposition_4norms(VA_DEVICE_ADDR VA_REAL *A, int *num_intervals, VA_DEVICE_ADDR VA_REAL *residual_vorticity, VA_DEVICE_ADDR VA_REAL *residual_strain, VA_DEVICE_ADDR VA_REAL *shear_vorticity, VA_DEVICE_ADDR VA_REAL *shear_strain)
Triple Decomposition Method (TDM, a.k.a. residual vorticity and residual strain rate) ,...
Definition: triple_decomposition.h:340
valib_min
VA_DEVICE_FUN VA_REAL valib_min(VA_REAL a, VA_REAL b)
Definition: linalg3.h:49
valib_sym_antisym3
VA_DEVICE_FUN void valib_sym_antisym3(VA_REAL *A)
Definition: linalg3.h:320
valib_constructQ3
VA_DEVICE_FUN void valib_constructQ3(VA_REAL sina, VA_REAL cosa, VA_REAL sinb, VA_REAL cosb, VA_REAL sing, VA_REAL cosg, VA_REAL *Q)
Definition: linalg3.h:743
valib_get_brf
VA_DEVICE_FUN void valib_get_brf(VA_DEVICE_ADDR VA_REAL *A, int *num_intervals, VA_REAL *alpha_brf, VA_REAL *beta_brf, VA_REAL *gamma_brf)
Definition: triple_decomposition.h:38