8 #include "common_defs.h"
22 return (b < (VA_REAL) 0.) ? -fabs(a) : fabs(a);
39 VA_DEVICE_FUN VA_REAL
valib_max(VA_REAL a, VA_REAL b)
41 return (a > b) ? a : b;
49 VA_DEVICE_FUN VA_REAL
valib_min(VA_REAL a, VA_REAL b)
51 return (a < b) ? a : b;
83 VA_REAL q = pow(b/(3.*a),3) - (b*c)/(6.*a*a) + d/(2.*a);
84 VA_REAL p = (3*a*c - b*b) / (9*a*a);
86 VA_REAL delta = - (p*p*p + q*q);
89 printf(
"q: %lf \n", q);
90 printf(
"p: %lf \n", p);
91 printf(
"delta: %lf \n", delta);
96 printf(
"This is a good case with two complex roots.\n");
101 printf(
"Discriminant is positive, this is not a good way to get roots.\n");
107 VA_REAL sr = sqrt(-delta);
110 printf(
"sr: %lf \n", sr);
113 VA_REAL mqpsr = -q + sr;
114 VA_REAL mqmsr = -q - sr;
117 printf(
"mqpsr: %lf \n", mqpsr);
118 printf(
"mqmsr: %lf \n", mqmsr);
125 printf(
"fabs(u*v + p): %lf \n", fabs(u*v + p));
127 if (fabs(u*v + p) > 1.e-7) {
129 printf(
"Check of uv = -p failed.");
135 VA_REAL complex e1 = -0.5 + sqrt(3.)/2.*I;
136 VA_REAL complex e2 = -0.5 - sqrt(3.)/2.*I;
138 VA_REAL complex y1 = u + v;
139 VA_REAL complex y2 = e1*u + e2*v;
140 VA_REAL complex y3 = e2*u + e1*v;
142 VA_REAL shift = -b/(3.*a);
149 VA_REAL complex check1 = pow(a*(*x1),3) + pow(b*(*x1),2) + c*(*x1) + d;
150 VA_REAL complex check2 = pow(a*(*x2),3) + pow(b*(*x2),2) + c*(*x2) + d;
151 VA_REAL complex check3 = pow(a*(*x3),3) + pow(b*(*x3),2) + c*(*x3) + d;
153 if (abs(check1) > 1.e-6) {
155 printf(
"Check of x1 failed.");
158 if (abs(check2) > 1.e-6) {
160 printf(
"Check of x2 failed.");
163 if (abs(check3) > 1.e-6) {
165 printf(
"Check of x3 failed.");
187 for (i = 0; i < 3; i++) {
198 VA_REAL result = (VA_REAL) 0.;
200 for (i = 0; i < 3; i++)
202 result = result + v1[i]*v2[i];
213 v3[0] = v1[1] * v2[2] - v1[2] * v2[1];
214 v3[1] = v1[2] * v2[0] - v1[0] * v2[2];
215 v3[2] = v1[0] * v2[1] - v1[1] * v2[0];
232 for (j = 0; j < 3; j++) {
233 for (i = 0; i < 3; i++) {
246 for (j = 0; j < 3; j++) {
247 for (i = 0; i < 3; i++) {
261 for (j = 0; j < 3; j++) {
262 for (i = 0; i < 3; i++) {
263 *norm = *norm + A[j*3+i]*A[j*3+i];
275 *tr3 = A[0] + A[4] + A[8];
283 VA_REAL *second_invariant)
285 *second_invariant = ( A[0]*A[4] + A[4]*A[8] + A[0]*A[8] )
286 - ( A[1]*A[3] + A[2]*A[6] + A[5]*A[7] );
295 *det = A[0] * A[4] * A[8]
300 - A[6] * A[4] * A[2];
325 for (i = 0; i < 3; i++) {
326 for (j = i+1; j < 3; j++) {
330 A[i + j*3] = (VA_REAL) 0.5 * (ud + ld);
331 A[j + i*3] = (VA_REAL) 0.5 * (ld - ud);
351 A[0] = A[0] - (VA_REAL) (1./3.) * trace;
352 A[4] = A[4] - (VA_REAL) (1./3.) * trace;
353 A[8] = A[8] - (VA_REAL) (1./3.) * trace;
362 VA_REAL *x, VA_REAL *y)
365 for (i = 0; i < 3; i++) {
367 for (j = 0; j < 3; j++) {
369 y[i] = y[i] + A[j*3+i]*x[j];
372 y[i] = y[i] + A[i*3+j]*x[j];
384 VA_REAL *x, VA_REAL *y,
392 for (j = 0; j < 3; j++) {
394 for (i = 0; i < 3; i++) {
395 A[ind] = A[ind] + x[i]*tmp;
406 VA_DEVICE_FUN
void valib_tatt3(VA_REAL *A, VA_REAL *T, VA_REAL *TATT)
415 for (i = 0; i < 3; i++) {
416 for (j = 0; j < 3; j++) {
430 VA_REAL *A, VA_REAL *B, VA_REAL *C)
433 for (i = 0; i < 3; i++) {
434 for (j = 0; j < 3; j++) {
435 for (k = 0; k < 3; k++) {
436 if (trans_a == 0 && trans_b == 0) {
437 C[j*3+i] = C[j*3+i] + A[k*3+i] * B[j*3+k];
439 else if (trans_a != 0 && trans_b == 0) {
440 C[j*3+i] = C[j*3+i] + A[i*3+k] * B[j*3+k];
442 else if (trans_a == 0 && trans_b != 0) {
443 C[j*3+i] = C[j*3+i] + A[k*3+i] * B[k*3+j];
446 C[j*3+i] = C[j*3+i] + A[i*3+k] * B[k*3+j];
463 for (j = 0; j < 3; j++) {
464 for (i = 0; i < j; i++) {
469 for (k = 0; k < 3; k++) {
470 Q[j*3+k] = Q[j*3+k] - factor * Q[i*3+k];
476 for (k = 0; k < 3; k++) {
477 Q[j*3+k] = Q[j*3+k] / sqrt(factor);
491 VA_REAL IdevMod = Idev / 3.;
496 dev[0] = dev[0] - IdevMod;
497 dev[4] = dev[4] - IdevMod;
498 dev[8] = dev[8] - IdevMod;
503 VA_REAL IIdevMod = -IIdev / 3.;
508 VA_REAL IIIdevMod = 0.5 * IIIdev;
511 VA_REAL aux = IIdevMod*IIdevMod*IIdevMod - IIIdevMod*IIIdevMod;
513 if (aux < 0. || fabs(IIIdevMod) < 1.e-15) {
517 phi = atan( sqrt(aux) / IIIdevMod ) / 3.;
521 if (phi < 0.) phi += VA_PI / 3.;
524 eigval[0] = IdevMod + 2.*sqrt(IIdevMod) * cos(phi);
525 eigval[1] = IdevMod - sqrt(IIdevMod) * (cos(phi) + sqrt(3.) * sin(phi));
526 eigval[2] = IdevMod - sqrt(IIdevMod) * (cos(phi) - sqrt(3.) * sin(phi));
546 for (
int i = start+1; i<3; i++) {
547 if (fabs(A[i+jcol*3]) > fabs(A[ipiv+jcol*3]))
551 VA_REAL numerical_zero = 1.0e-6;
552 if (fabs(A[ipiv+jcol*3]) < numerical_zero) {
554 printf(
"Warning: no nonzero pivot in column %d.\n", jcol);
574 x[0] = 0.; x[1] = 0.; x[2] = 0.;
577 int num_zero_cols = 0;
580 for (
int ji = 0; ji < 3; ji++) {
585 printf(
"There is no meaningful pivot in the %d-th column.\n",j);
592 if (num_zero_cols > 0) {
593 if (num_zero_cols > 1) {
595 printf(
"Oops! The matrix has more than one zero columns. I search only for 1-dimensional nullspace.\n");
609 for (
int j = 0; j < 3; j++) {
615 printf(
"Matrix in GEM after 1st swap: \n");
616 for (
int i = 0; i < 3; i++) {
617 for (
int j = 0; j < 3; j++) {
618 printf(
" %lf ", A[i+j*3]);
627 for (
int i = 1; i < 3; i++) {
629 for (
int j = 0; j < 3; j++) {
630 A[i+j*3] -= coef*A[j*3];
635 printf(
"Matrix in GEM without 1st col: \n");
636 for (
int i = 0; i < 3; i++) {
637 for (
int j = 0; j < 3; j++) {
638 printf(
" %lf ", A[i+j*3]);
660 printf(
"Something is wrong with the matrix, the defect seems larger than one.");
667 for (
int j = 0; j < 3; j++) {
673 coef = A[2+jpiv*3] / A[1+jpiv*3];
674 for (
int j = 0; j < 3; j++) {
675 A[2+j*3] -= coef*A[1+j*3];
678 coef = A[0+jpiv*3] / A[1+jpiv*3];
679 for (
int j = 0; j < 3; j++) {
680 A[j*3] -= coef*A[1+j*3];
686 printf(
"Something is wrong, the matrix seems to be full rank.\n");
693 printf(
"Matrix in GEM before dividing by diagonal: \n");
694 for (
int i = 0; i < 3; i++) {
695 for (
int j = 0; j < 3; j++) {
696 printf(
" %lf ", A[i+j*3]);
704 for (
int j = 0; j < 3; j++) {
708 for (
int j = 0; j < 3; j++) {
713 printf(
"Matrix in GEM in the upper right echelon form:\n");
714 for (
int i = 0; i < 3; i++) {
715 for (
int j = 0; j < 3; j++) {
716 printf(
" %d %lf ", i+j*3, A[i+j*3]);
724 x[jpiv] = -A[jpiv+jfree*3];
729 for (
int i = 0; i < 3; i++) {
744 VA_REAL sinb, VA_REAL cosb,
745 VA_REAL sing, VA_REAL cosg,
748 Q[0] = cosa*cosb*cosg - sina*sing;
749 Q[1] = -cosa*cosb*sing - sina*cosg;
751 Q[3] = sina*cosb*cosg + cosa*sing;
752 Q[4] = -sina*cosb*sing + cosa*cosg;
771 Q[0] = n[0]; Q[1] = n[1]; Q[2] = n[2];
774 Q[3] = n[1]; Q[4] = n[2]; Q[5] = n[0];
775 Q[6] = n[2]; Q[7] = n[0]; Q[8] = n[1];
783 for (i = 0; i < 3; i++) {
795 for (i = 0; i < 3; i++) {
808 #endif // VA_LINALG3_H