22 return (b < (VA_REAL) 0.) ? -fabs(a) : fabs(a);
34 VA_REAL ar = hipCreal(a);
35 VA_REAL ai = hipCimag(a);
36 return sqrt(ar*ar + ai*ai);
46VA_DEVICE_FUN VA_DOUBLE_COMPLEX
valib_cmultiply(VA_DOUBLE_COMPLEX a, VA_DOUBLE_COMPLEX b)
51 double ar = hipCreal(a);
52 double ai = hipCimag(a);
53 double br = hipCreal(b);
54 double bi = hipCimag(b);
56 double cr = ar*br - ai*bi;
57 double ci = ar*bi + ai*br;
58 VA_DOUBLE_COMPLEX c = {cr,ci};
79VA_DEVICE_FUN VA_REAL
valib_max(VA_REAL a, VA_REAL b)
81 return (a > b) ? a : b;
89VA_DEVICE_FUN VA_REAL
valib_min(VA_REAL a, VA_REAL b)
91 return (a < b) ? a : b;
124 VA_REAL c, VA_REAL d,
138 double q = pow(bd/(3.*ad),3) - (bd*cd)/(6.*ad*ad) + dd/(2.*ad);
139 double p = (3*ad*cd - bd*bd) / (9*ad*ad);
141 double delta = - (p*p*p + q*q);
144 printf(
"q: %lf \n", q);
145 printf(
"p: %lf \n", p);
146 printf(
"delta: %lf \n", delta);
151 printf(
"This is a good case with two complex roots.\n");
156 printf(
"Discriminant is positive, this is not a good way to get roots.\n");
162 double sr = sqrt(-delta);
165 printf(
"sr: %lf \n", sr);
168 double mqpsr = -q + sr;
169 double mqmsr = -q - sr;
172 printf(
"mqpsr: %lf \n", mqpsr);
173 printf(
"mqmsr: %lf \n", mqmsr);
180 printf(
"fabs(u*v + p): %lf \n", fabs(u*v + p));
182 if (fabs(u*v + p) > 1.e-4) {
184 printf(
"Check of uv = -p failed, check = %lf.", fabs(u*v + p));
192 VA_DOUBLE_COMPLEX e1(-0.5, sqrt(3.)/2.);
193 VA_DOUBLE_COMPLEX e2(-0.5, -sqrt(3.)/2.);
195 VA_DOUBLE_COMPLEX uc = u;
196 VA_DOUBLE_COMPLEX vc = v;
198 VA_DOUBLE_COMPLEX e1 = {-0.5, sqrt(3.)/2.};
199 VA_DOUBLE_COMPLEX e2 = {-0.5, -sqrt(3.)/2.};
201 VA_DOUBLE_COMPLEX uc = {u, 0};
202 VA_DOUBLE_COMPLEX vc = {v, 0};
204 VA_DOUBLE_COMPLEX e1 = -0.5 + sqrt(3.)/2.*I;
205 VA_DOUBLE_COMPLEX e2 = -0.5 - sqrt(3.)/2.*I;
207 VA_DOUBLE_COMPLEX uc = u + 0.*I;
208 VA_DOUBLE_COMPLEX vc = v + 0.*I;
211 VA_DOUBLE_COMPLEX y1 = uc + vc;
215 double shift = -bd/(3.*ad);
217 VA_DOUBLE_COMPLEX shiftc = shift;
219 VA_DOUBLE_COMPLEX shiftc = {shift,0};
221 VA_DOUBLE_COMPLEX shiftc = shift + 0.*I;
229 VA_DOUBLE_COMPLEX ac = a;
230 VA_DOUBLE_COMPLEX bc = b;
231 VA_DOUBLE_COMPLEX cc = c;
232 VA_DOUBLE_COMPLEX dc = d;
234 VA_DOUBLE_COMPLEX ac = {a,0};
235 VA_DOUBLE_COMPLEX bc = {b,0};
236 VA_DOUBLE_COMPLEX cc = {c,0};
237 VA_DOUBLE_COMPLEX dc = {d,0};
239 VA_DOUBLE_COMPLEX ac = a + 0.*I;
240 VA_DOUBLE_COMPLEX bc = b + 0.*I;
241 VA_DOUBLE_COMPLEX cc = c + 0.*I;
242 VA_DOUBLE_COMPLEX dc = d + 0.*I;
252 printf(
"Check of x1 failed.");
257 printf(
"Check of x2 failed.");
262 printf(
"Check of x3 failed.");
284 for (i = 0; i < 3; i++) {
295 VA_REAL result = (VA_REAL) 0.;
297 for (i = 0; i < 3; i++)
299 result = result + v1[i]*v2[i];
310 v3[0] = v1[1] * v2[2] - v1[2] * v2[1];
311 v3[1] = v1[2] * v2[0] - v1[0] * v2[2];
312 v3[2] = v1[0] * v2[1] - v1[1] * v2[0];
329 for (j = 0; j < 3; j++) {
330 for (i = 0; i < 3; i++) {
343 for (j = 0; j < 3; j++) {
344 for (i = 0; i < 3; i++) {
358 for (j = 0; j < 3; j++) {
359 for (i = 0; i < 3; i++) {
360 *norm = *norm + A[j*3+i]*A[j*3+i];
372 *tr3 = A[0] + A[4] + A[8];
380 VA_REAL *second_invariant)
382 *second_invariant = ( A[0]*A[4] + A[4]*A[8] + A[0]*A[8] )
383 - ( A[1]*A[3] + A[2]*A[6] + A[5]*A[7] );
392 *det = A[0] * A[4] * A[8]
397 - A[6] * A[4] * A[2];
422 for (i = 0; i < 3; i++) {
423 for (j = i+1; j < 3; j++) {
427 A[i + j*3] = (VA_REAL) 0.5 * (ud + ld);
428 A[j + i*3] = (VA_REAL) 0.5 * (ld - ud);
448 A[0] = A[0] - (VA_REAL) (1./3.) * trace;
449 A[4] = A[4] - (VA_REAL) (1./3.) * trace;
450 A[8] = A[8] - (VA_REAL) (1./3.) * trace;
459 VA_REAL *x, VA_REAL *y)
462 for (i = 0; i < 3; i++) {
464 for (j = 0; j < 3; j++) {
466 y[i] = y[i] + A[j*3+i]*x[j];
469 y[i] = y[i] + A[i*3+j]*x[j];
481 VA_REAL *x, VA_REAL *y,
489 for (j = 0; j < 3; j++) {
491 for (i = 0; i < 3; i++) {
492 A[ind] = A[ind] + x[i]*tmp;
503VA_DEVICE_FUN
void valib_tatt3(VA_REAL *A, VA_REAL *T, VA_REAL *TATT)
512 for (i = 0; i < 3; i++) {
513 for (j = 0; j < 3; j++) {
527 VA_REAL *A, VA_REAL *B, VA_REAL *C)
530 for (i = 0; i < 3; i++) {
531 for (j = 0; j < 3; j++) {
532 for (k = 0; k < 3; k++) {
533 if (trans_a == 0 && trans_b == 0) {
534 C[j*3+i] = C[j*3+i] + A[k*3+i] * B[j*3+k];
536 else if (trans_a != 0 && trans_b == 0) {
537 C[j*3+i] = C[j*3+i] + A[i*3+k] * B[j*3+k];
539 else if (trans_a == 0 && trans_b != 0) {
540 C[j*3+i] = C[j*3+i] + A[k*3+i] * B[k*3+j];
543 C[j*3+i] = C[j*3+i] + A[i*3+k] * B[k*3+j];
555VA_DEVICE_FUN
void valib_ttat3(VA_REAL *A, VA_REAL *T, VA_REAL *TTAT)
579 for (j = 0; j < 3; j++) {
580 for (i = 0; i < j; i++) {
585 for (k = 0; k < 3; k++) {
586 Q[j*3+k] = Q[j*3+k] - factor * Q[i*3+k];
592 for (k = 0; k < 3; k++) {
593 Q[j*3+k] = Q[j*3+k] / sqrt(factor);
607 VA_REAL IdevMod = Idev / 3.;
612 dev[0] = dev[0] - IdevMod;
613 dev[4] = dev[4] - IdevMod;
614 dev[8] = dev[8] - IdevMod;
619 VA_REAL IIdevMod = -IIdev / 3.;
624 VA_REAL IIIdevMod = 0.5 * IIIdev;
627 VA_REAL aux = IIdevMod*IIdevMod*IIdevMod - IIIdevMod*IIIdevMod;
633 phi = atan2( sqrt(aux), IIIdevMod ) / 3.;
635 if (phi < 0.) phi += VA_PI / 3.;
638 eigval[0] = IdevMod + 2.*sqrt(IIdevMod) * cos(phi);
639 eigval[1] = IdevMod - sqrt(IIdevMod) * (cos(phi) + sqrt(3.) * sin(phi));
640 eigval[2] = IdevMod - sqrt(IIdevMod) * (cos(phi) - sqrt(3.) * sin(phi));
660 for (
int i = start+1; i<3; i++) {
661 if (fabs(A[i+jcol*3]) > fabs(A[ipiv+jcol*3]))
665 VA_REAL numerical_zero = 1.0e-5;
666 if (fabs(A[ipiv+jcol*3]) < numerical_zero) {
668 printf(
"Warning: no nonzero pivot in column %d.\n", jcol);
688 x[0] = 0.; x[1] = 0.; x[2] = 0.;
691 int num_zero_cols = 0;
694 for (
int ji = 0; ji < 3; ji++) {
699 printf(
"There is no meaningful pivot in the %d-th column.\n",j);
706 if (num_zero_cols > 0) {
707 if (num_zero_cols > 1) {
709 printf(
"Oops! The matrix has more than one zero columns. I search only for 1-dimensional nullspace.\n");
723 for (
int j = 0; j < 3; j++) {
729 printf(
"Matrix in GEM after 1st swap: \n");
730 for (
int i = 0; i < 3; i++) {
731 for (
int j = 0; j < 3; j++) {
732 printf(
" %lf ", A[i+j*3]);
741 for (
int i = 1; i < 3; i++) {
743 for (
int j = 0; j < 3; j++) {
744 A[i+j*3] -= coef*A[j*3];
749 printf(
"Matrix in GEM without 1st col: \n");
750 for (
int i = 0; i < 3; i++) {
751 for (
int j = 0; j < 3; j++) {
752 printf(
" %lf ", A[i+j*3]);
774 printf(
"Something is wrong with the matrix, the defect seems larger than one.");
781 for (
int j = 0; j < 3; j++) {
787 coef = A[2+jpiv*3] / A[1+jpiv*3];
788 for (
int j = 0; j < 3; j++) {
789 A[2+j*3] -= coef*A[1+j*3];
792 coef = A[0+jpiv*3] / A[1+jpiv*3];
793 for (
int j = 0; j < 3; j++) {
794 A[j*3] -= coef*A[1+j*3];
800 printf(
"Something is wrong, the matrix seems to be full rank.\n");
807 printf(
"Matrix in GEM before dividing by diagonal: \n");
808 for (
int i = 0; i < 3; i++) {
809 for (
int j = 0; j < 3; j++) {
810 printf(
" %lf ", A[i+j*3]);
818 for (
int j = 0; j < 3; j++) {
822 for (
int j = 0; j < 3; j++) {
827 printf(
"Matrix in GEM in the upper right echelon form:\n");
828 for (
int i = 0; i < 3; i++) {
829 for (
int j = 0; j < 3; j++) {
830 printf(
" %d %lf ", i+j*3, A[i+j*3]);
838 x[jpiv] = -A[jpiv+jfree*3];
843 for (
int i = 0; i < 3; i++) {
858 VA_REAL sinb, VA_REAL cosb,
859 VA_REAL sing, VA_REAL cosg,
862 Q[0] = cosa*cosb*cosg - sina*sing;
863 Q[1] = -cosa*cosb*sing - sina*cosg;
865 Q[3] = sina*cosb*cosg + cosa*sing;
866 Q[4] = -sina*cosb*sing + cosa*cosg;
885 Q[0] = n[0]; Q[1] = n[1]; Q[2] = n[2];
888 Q[3] = n[1]; Q[4] = n[2]; Q[5] = n[0];
889 Q[6] = n[2]; Q[7] = n[0]; Q[8] = n[1];
897 for (i = 0; i < 3; i++) {
909 for (i = 0; i < 3; i++) {
VA_DEVICE_FUN VA_REAL valib_cubic_root(VA_REAL x)
VA_DEVICE_FUN void valib_cross_prod3(VA_REAL *v1, VA_REAL *v2, VA_REAL *v3)
VA_DEVICE_FUN void valib_tatt3(VA_REAL *A, VA_REAL *T, VA_REAL *TATT)
VA_DEVICE_FUN void valib_solve_cubic_equation(VA_REAL a, VA_REAL b, VA_REAL c, VA_REAL d, VA_COMPLEX *x1, VA_COMPLEX *x2, VA_COMPLEX *x3, int *info)
VA_DEVICE_FUN void valib_rank1_update3(VA_REAL alpha, VA_REAL *x, VA_REAL *y, VA_REAL *A)
VA_DEVICE_FUN void valib_ttat3(VA_REAL *A, VA_REAL *T, VA_REAL *TTAT)
VA_DEVICE_FUN void valib_deviatorise3(VA_REAL *A)
VA_DEVICE_FUN VA_REAL valib_scalar_prod3(VA_REAL *v1, VA_REAL *v2)
VA_DEVICE_FUN void valib_vec_zero3(VA_REAL *v)
VA_DEVICE_FUN void valib_matvec3_prod(int trans_a, VA_REAL *A, VA_REAL *x, VA_REAL *y)
VA_DEVICE_FUN VA_REAL valib_min(VA_REAL a, VA_REAL b)
VA_DEVICE_FUN void valib_sym_antisym3(VA_REAL *A)
VA_DEVICE_FUN void valib_transpose3(VA_REAL *A)
VA_DEVICE_FUN void valib_mat_copy3(VA_REAL *A, VA_REAL *B)
VA_DEVICE_FUN VA_DOUBLE_COMPLEX valib_cmultiply(VA_DOUBLE_COMPLEX a, VA_DOUBLE_COMPLEX b)
VA_DEVICE_FUN void valib_swap_values(VA_REAL *a, VA_REAL *b)
VA_DEVICE_FUN VA_REAL valib_square(VA_REAL a)
VA_DEVICE_FUN void valib_second_invariant3(VA_REAL *A, VA_REAL *second_invariant)
VA_DEVICE_FUN void valib_find_nullspace3(VA_REAL *A, VA_REAL *x, int *info)
VA_DEVICE_FUN void valib_gram_schmidt3(VA_REAL *Q)
VA_DEVICE_FUN void valib_constructQfromN3(VA_REAL *n, VA_REAL *Q)
VA_DEVICE_FUN int valib_find_pivot3(VA_REAL *A, int jcol, int start)
VA_DEVICE_FUN void valib_mat_zero3(VA_REAL *A)
VA_DEVICE_FUN VA_REAL valib_sign(VA_REAL a, VA_REAL b)
VA_DEVICE_FUN VA_REAL valib_cabs(VA_DOUBLE_COMPLEX a)
VA_DEVICE_FUN void valib_eigenvalues_sym3(VA_REAL *A, VA_REAL *eigval)
VA_DEVICE_FUN void valib_constructQ3(VA_REAL sina, VA_REAL cosa, VA_REAL sinb, VA_REAL cosb, VA_REAL sing, VA_REAL cosg, VA_REAL *Q)
VA_DEVICE_FUN void valib_matmat3_prod(int trans_a, int trans_b, VA_REAL *A, VA_REAL *B, VA_REAL *C)
VA_DEVICE_FUN void valib_trace3(VA_REAL *A, VA_REAL *tr3)
VA_DEVICE_FUN void valib_norm_frobenius3(VA_REAL *A, VA_REAL *norm)
VA_DEVICE_FUN void valib_determinant3(VA_REAL *A, VA_REAL *det)
VA_DEVICE_FUN VA_REAL valib_max(VA_REAL a, VA_REAL b)