Actual source code: dshep.c

  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2013, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.

  8:    SLEPc is free software: you can redistribute it and/or modify it under  the
  9:    terms of version 3 of the GNU Lesser General Public License as published by
 10:    the Free Software Foundation.

 12:    SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
 13:    WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
 14:    FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
 15:    more details.

 17:    You  should have received a copy of the GNU Lesser General  Public  License
 18:    along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
 19:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 20: */

 22: #include <slepc-private/dsimpl.h>      /*I "slepcds.h" I*/
 23: #include <slepcblaslapack.h>

 27: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
 28: {

 32:   DSAllocateMat_Private(ds,DS_MAT_A);
 33:   DSAllocateMat_Private(ds,DS_MAT_Q);
 34:   DSAllocateMatReal_Private(ds,DS_MAT_T);
 35:   PetscFree(ds->perm);
 36:   PetscMalloc(ld*sizeof(PetscInt),&ds->perm);
 37:   PetscLogObjectMemory(ds,ld*sizeof(PetscInt));
 38:   return(0);
 39: }

 41: /*   0       l           k                 n-1
 42:     -----------------------------------------
 43:     |*       ·           ·                  |
 44:     |  *     ·           ·                  |
 45:     |    *   ·           ·                  |
 46:     |      * ·           ·                  |
 47:     |· · · · o           o                  |
 48:     |          o         o                  |
 49:     |            o       o                  |
 50:     |              o     o                  |
 51:     |                o   o                  |
 52:     |                  o o                  |
 53:     |· · · · o o o o o o o x                |
 54:     |                    x x x              |
 55:     |                      x x x            |
 56:     |                        x x x          |
 57:     |                          x x x        |
 58:     |                            x x x      |
 59:     |                              x x x    |
 60:     |                                x x x  |
 61:     |                                  x x x|
 62:     |                                    x x|
 63:     -----------------------------------------
 64: */

 68: static PetscErrorCode DSSwitchFormat_HEP(DS ds,PetscBool tocompact)
 69: {
 71:   PetscReal      *T = ds->rmat[DS_MAT_T];
 72:   PetscScalar    *A = ds->mat[DS_MAT_A];
 73:   PetscInt       i,n=ds->n,k=ds->k,ld=ds->ld;

 76:   if (tocompact) { /* switch from dense (arrow) to compact storage */
 77:     PetscMemzero(T,3*ld*sizeof(PetscReal));
 78:     for (i=0;i<k;i++) {
 79:       T[i] = PetscRealPart(A[i+i*ld]);
 80:       T[i+ld] = PetscRealPart(A[k+i*ld]);
 81:     }
 82:     for (i=k;i<n-1;i++) {
 83:       T[i] = PetscRealPart(A[i+i*ld]);
 84:       T[i+ld] = PetscRealPart(A[i+1+i*ld]);
 85:     }
 86:     T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
 87:     if (ds->extrarow) T[n-1+ld] = PetscRealPart(A[n+(n-1)*ld]);
 88:   } else { /* switch from compact (arrow) to dense storage */
 89:     PetscMemzero(A,ld*ld*sizeof(PetscScalar));
 90:     for (i=0;i<k;i++) {
 91:       A[i+i*ld] = T[i];
 92:       A[k+i*ld] = T[i+ld];
 93:       A[i+k*ld] = T[i+ld];
 94:     }
 95:     A[k+k*ld] = T[k];
 96:     for (i=k+1;i<n;i++) {
 97:       A[i+i*ld] = T[i];
 98:       A[i-1+i*ld] = T[i-1+ld];
 99:       A[i+(i-1)*ld] = T[i-1+ld];
100:     }
101:     if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
102:   }
103:   return(0);
104: }

108: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
109: {
110:   PetscErrorCode    ierr;
111:   PetscViewerFormat format;
112:   PetscInt          i,j,r,c,rows;
113:   PetscReal         value;
114:   const char        *methodname[] = {
115:                      "Implicit QR method (_steqr)",
116:                      "Relatively Robust Representations (_stevr)",
117:                      "Divide and Conquer method (_stedc)"
118:   };
119:   const int         nmeth=sizeof(methodname)/sizeof(methodname[0]);

122:   PetscViewerGetFormat(viewer,&format);
123:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
124:     if (ds->method>=nmeth) {
125:       PetscViewerASCIIPrintf(viewer,"solving the problem with: INVALID METHOD\n");
126:     } else {
127:       PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
128:     }
129:     return(0);
130:   }
131:   if (ds->compact) {
132:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
133:     rows = ds->extrarow? ds->n+1: ds->n;
134:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
135:       PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",rows,ds->n);
136:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
137:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
138:       for (i=0;i<ds->n;i++) {
139:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+1,*(ds->rmat[DS_MAT_T]+i));
140:       }
141:       for (i=0;i<rows-1;i++) {
142:         r = PetscMax(i+2,ds->k+1);
143:         c = i+1;
144:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",r,c,*(ds->rmat[DS_MAT_T]+ds->ld+i));
145:         if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
146:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",c,r,*(ds->rmat[DS_MAT_T]+ds->ld+i));
147:         }
148:       }
149:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
150:     } else {
151:       for (i=0;i<rows;i++) {
152:         for (j=0;j<ds->n;j++) {
153:           if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
154:           else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
155:           else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
156:           else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
157:           else value = 0.0;
158:           PetscViewerASCIIPrintf(viewer," %18.16e ",value);
159:         }
160:         PetscViewerASCIIPrintf(viewer,"\n");
161:       }
162:     }
163:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
164:     PetscViewerFlush(viewer);
165:   } else {
166:     DSViewMat_Private(ds,viewer,DS_MAT_A);
167:   }
168:   if (ds->state>DS_STATE_INTERMEDIATE) {
169:     DSViewMat_Private(ds,viewer,DS_MAT_Q);
170:   }
171:   return(0);
172: }

176: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
177: {
178:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
179:   PetscInt       ld = ds->ld,i;

183:   switch (mat) {
184:     case DS_MAT_X:
185:     case DS_MAT_Y:
186:       if (j) {
187:         if (ds->state>=DS_STATE_CONDENSED) {
188:           PetscMemcpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld*sizeof(PetscScalar));
189:         } else {
190:           PetscMemzero(ds->mat[mat]+(*j)*ld,ld*sizeof(PetscScalar));
191:           *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
192:         }
193:       } else {
194:         if (ds->state>=DS_STATE_CONDENSED) {
195:           PetscMemcpy(ds->mat[mat],Q,ld*ld*sizeof(PetscScalar));
196:         } else {
197:           PetscMemzero(ds->mat[mat],ld*ld*sizeof(PetscScalar));
198:           for (i=0;i<ds->n;i++) *(ds->mat[mat]+i+i*ld) = 1.0;
199:         }
200:       }
201:       if (rnorm) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
202:       break;
203:     case DS_MAT_U:
204:     case DS_MAT_VT:
205:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
206:       break;
207:     default:
208:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
209:   }
210:   return(0);
211: }

215: PetscErrorCode DSNormalize_HEP(DS ds,DSMatType mat,PetscInt col)
216: {
218:   switch (mat) {
219:     case DS_MAT_X:
220:     case DS_MAT_Y:
221:     case DS_MAT_Q:
222:       /* All the matrices resulting from DSVectors and DSSolve are already normalized */
223:       break;
224:     case DS_MAT_U:
225:     case DS_MAT_VT:
226:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented yet");
227:       break;
228:     default:
229:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
230:   }
231:   return(0);
232: }

236: /*
237:   ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form

239:                 [ d 0 0 0 e ]
240:                 [ 0 d 0 0 e ]
241:             A = [ 0 0 d 0 e ]
242:                 [ 0 0 0 d e ]
243:                 [ e e e e d ]

245:   to tridiagonal form

247:                 [ d e 0 0 0 ]
248:                 [ e d e 0 0 ]
249:    T = Q'*A*Q = [ 0 e d e 0 ],
250:                 [ 0 0 e d e ]
251:                 [ 0 0 0 e d ]

253:   where Q is an orthogonal matrix. Rutishauser's algorithm is used to
254:   perform the reduction, which requires O(n**2) flops. The accumulation
255:   of the orthogonal factor Q, however, requires O(n**3) flops.

257:   Arguments
258:   =========

260:   N       (input) INTEGER
261:           The order of the matrix A.  N >= 0.

263:   D       (input/output) DOUBLE PRECISION array, dimension (N)
264:           On entry, the diagonal entries of the matrix A to be
265:           reduced.
266:           On exit, the diagonal entries of the reduced matrix T.

268:   E       (input/output) DOUBLE PRECISION array, dimension (N-1)
269:           On entry, the off-diagonal entries of the matrix A to be
270:           reduced.
271:           On exit, the subdiagonal entries of the reduced matrix T.

273:   Q       (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
274:           On exit, the orthogonal matrix Q.

276:   LDQ     (input) INTEGER
277:           The leading dimension of the array Q.

279:   Note
280:   ====
281:   Based on Fortran code contributed by Daniel Kressner
282: */
283: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
284: {
285: #if defined(SLEPC_MISSING_LAPACK_LARTG)
287:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARTG - Lapack routine is unavailable");
288: #else
289:   PetscBLASInt i,j,j2,one=1;
290:   PetscReal    c,s,p,off,temp;

293:   if (n<=2) return(0);

295:   for (j=0;j<n-2;j++) {

297:     /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
298:     temp = e[j+1];
299:     PetscStackCallBLAS("LAPACKlartg",LAPACKlartg_(&temp,&e[j],&c,&s,&e[j+1]));
300:     s = -s;

302:     /* Apply rotation to diagonal elements */
303:     temp   = d[j+1];
304:     e[j]   = c*s*(temp-d[j]);
305:     d[j+1] = s*s*d[j] + c*c*temp;
306:     d[j]   = c*c*d[j] + s*s*temp;

308:     /* Apply rotation to Q */
309:     j2 = j+2;
310:     PetscStackCallBLAS("BLASrot",BLASrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));

312:     /* Chase newly introduced off-diagonal entry to the top left corner */
313:     for (i=j-1;i>=0;i--) {
314:       off  = -s*e[i];
315:       e[i] = c*e[i];
316:       temp = e[i+1];
317:       PetscStackCallBLAS("LAPACKlartg",LAPACKlartg_(&temp,&off,&c,&s,&e[i+1]));
318:       s = -s;
319:       temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
320:       p = s*temp;
321:       d[i+1] += p;
322:       d[i] -= p;
323:       e[i] = -e[i] - c*temp;
324:       j2 = j+2;
325:       PetscStackCallBLAS("BLASrot",BLASrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
326:     }
327:   }
328:   return(0);
329: #endif
330: }

334: /*
335:    Reduce to tridiagonal form by means of ArrowTridiag.
336: */
337: static PetscErrorCode DSIntermediate_HEP(DS ds)
338: {
339: #if defined(SLEPC_MISSING_LAPACK_SYTRD) || defined(SLEPC_MISSING_LAPACK_ORGTR)
341:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYTRD/ORGTR - Lapack routine is unavailable");
342: #else
344:   PetscInt       i;
345:   PetscBLASInt   n1,n2,n3,lwork,info,l,n,ld,off;
346:   PetscScalar    *A,*Q,*work,*tau;
347:   PetscReal      *d,*e;

350:   PetscBLASIntCast(ds->n,&n);
351:   PetscBLASIntCast(ds->l,&l);
352:   PetscBLASIntCast(ds->ld,&ld);
353:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
354:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
355:   n3 = n1+n2;
356:   off = l+l*ld;
357:   A  = ds->mat[DS_MAT_A];
358:   Q  = ds->mat[DS_MAT_Q];
359:   d  = ds->rmat[DS_MAT_T];
360:   e  = ds->rmat[DS_MAT_T]+ld;
361:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
362:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;

364:   if (ds->compact) {

366:     if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);

368:   } else {

370:     for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }

372:     if (ds->state<DS_STATE_INTERMEDIATE) {
373:       DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
374:       DSAllocateWork_Private(ds,ld+ld*ld,0,0);
375:       tau  = ds->work;
376:       work = ds->work+ld;
377:       lwork = ld*ld;
378:       PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n3,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
379:       if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSYTRD %d",info);
380:       PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n3,Q+off,&ld,tau,work,&lwork,&info));
381:       if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGTR %d",info);
382:     } else {
383:       /* copy tridiagonal to d,e */
384:       for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
385:       for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
386:     }
387:   }
388:   return(0);
389: #endif
390: }

394: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
395: {
397:   PetscInt       n,l,i,*perm,ld=ds->ld;
398:   PetscScalar    *A;
399:   PetscReal      *d;

402:   if (!ds->comparison) return(0);
403:   n = ds->n;
404:   l = ds->l;
405:   A  = ds->mat[DS_MAT_A];
406:   d = ds->rmat[DS_MAT_T];
407:   perm = ds->perm;
408:   if (!rr) {
409:     DSSortEigenvaluesReal_Private(ds,d,perm);
410:   } else {
411:     DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
412:   }
413:   for (i=l;i<n;i++) wr[i] = d[perm[i]];
414:   DSPermuteColumns_Private(ds,l,n,DS_MAT_Q,perm);
415:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
416:   if (!ds->compact) {
417:     for (i=l;i<n;i++) A[i+i*ld] = wr[i];
418:   }
419:   return(0);
420: }

424: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
425: {
427:   PetscInt       i;
428:   PetscBLASInt   n,ld,incx=1;
429:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;
430:   PetscReal      *e,beta;

433:   PetscBLASIntCast(ds->n,&n);
434:   PetscBLASIntCast(ds->ld,&ld);
435:   A  = ds->mat[DS_MAT_A];
436:   Q  = ds->mat[DS_MAT_Q];
437:   e  = ds->rmat[DS_MAT_T]+ld;

439:   if (ds->compact) {
440:     beta = e[n-1];
441:     for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
442:     ds->k = n;
443:   } else {
444:     DSAllocateWork_Private(ds,2*ld,0,0);
445:     x = ds->work;
446:     y = ds->work+ld;
447:     for (i=0;i<n;i++) x[i] = A[n+i*ld];
448:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
449:     for (i=0;i<n;i++) A[n+i*ld] = y[i];
450:     ds->k = n;
451:   }
452:   return(0);
453: }

457: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
458: {
459: #if defined(PETSC_MISSING_LAPACK_STEQR)
461:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable");
462: #else
464:   PetscInt       i;
465:   PetscBLASInt   n1,n2,n3,info,l,n,ld,off;
466:   PetscScalar    *Q,*A;
467:   PetscReal      *d,*e;

470:   PetscBLASIntCast(ds->n,&n);
471:   PetscBLASIntCast(ds->l,&l);
472:   PetscBLASIntCast(ds->ld,&ld);
473:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
474:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
475:   n3 = n1+n2;
476:   off = l+l*ld;
477:   Q  = ds->mat[DS_MAT_Q];
478:   A  = ds->mat[DS_MAT_A];
479:   d  = ds->rmat[DS_MAT_T];
480:   e  = ds->rmat[DS_MAT_T]+ld;

482:   /* Reduce to tridiagonal form */
483:   DSIntermediate_HEP(ds);

485:   /* Solve the tridiagonal eigenproblem */
486:   for (i=0;i<l;i++) wr[i] = d[i];

488:   DSAllocateWork_Private(ds,0,2*ld,0);
489:   PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n3,d+l,e+l,Q+off,&ld,ds->rwork,&info));
490:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSTEQR %d",info);
491:   for (i=l;i<n;i++) wr[i] = d[i];

493:   /* Create diagonal matrix as a result */
494:   if (ds->compact) {
495:     PetscMemzero(e,(n-1)*sizeof(PetscReal));
496:   } else {
497:     for (i=l;i<n;i++) {
498:       PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
499:     }
500:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
501:   }

503:   /* Set zero wi */
504:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
505:   return(0);
506: #endif
507: }

511: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
512: {
513: #if defined(SLEPC_MISSING_LAPACK_STEVR)
515:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
516: #else
518:   PetscInt       i;
519:   PetscBLASInt   n1,n2,n3,lwork,liwork,info,l,n,m,ld,off,il,iu,*isuppz;
520:   PetscScalar    *A,*Q,*W=NULL,one=1.0,zero=0.0;
521:   PetscReal      *d,*e,abstol=0.0,vl,vu;
522: #if defined(PETSC_USE_COMPLEX)
523:   PetscInt       j;
524:   PetscReal      *ritz;
525: #endif

528:   PetscBLASIntCast(ds->n,&n);
529:   PetscBLASIntCast(ds->l,&l);
530:   PetscBLASIntCast(ds->ld,&ld);
531:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
532:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
533:   n3 = n1+n2;
534:   off = l+l*ld;
535:   A  = ds->mat[DS_MAT_A];
536:   Q  = ds->mat[DS_MAT_Q];
537:   d  = ds->rmat[DS_MAT_T];
538:   e  = ds->rmat[DS_MAT_T]+ld;

540:   /* Reduce to tridiagonal form */
541:   DSIntermediate_HEP(ds);

543:   /* Solve the tridiagonal eigenproblem */
544:   for (i=0;i<l;i++) wr[i] = d[i];

546:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* Q contains useful info */
547:     DSAllocateMat_Private(ds,DS_MAT_W);
548:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
549:     W = ds->mat[DS_MAT_W];
550:   }
551: #if defined(PETSC_USE_COMPLEX)
552:   DSAllocateMatReal_Private(ds,DS_MAT_Q);
553: #endif
554:   lwork = 20*ld;
555:   liwork = 10*ld;
556:   DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
557:   isuppz = ds->iwork+liwork;
558: #if defined(PETSC_USE_COMPLEX)
559:   ritz = ds->rwork+lwork;
560:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
561:   for (i=l;i<n;i++) wr[i] = ritz[i];
562: #else
563:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
564: #endif
565:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack DSTEVR %d",info);
566: #if defined(PETSC_USE_COMPLEX)
567:   for (i=l;i<n;i++)
568:     for (j=l;j<n;j++)
569:       Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
570: #endif
571:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* accumulate previous Q */
572:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
573:     DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
574:   }
575:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);

577:   /* Create diagonal matrix as a result */
578:   if (ds->compact) {
579:     PetscMemzero(e,(n-1)*sizeof(PetscReal));
580:   } else {
581:     for (i=l;i<n;i++) {
582:       PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
583:     }
584:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
585:   }

587:   /* Set zero wi */
588:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
589:   return(0);
590: #endif
591: }

595: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
596: {
597: #if defined(SLEPC_MISSING_LAPACK_STEDC) || defined(SLEPC_MISSING_LAPACK_ORMTR)
599:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEDC/ORMTR - Lapack routine is unavailable");
600: #else
602:   PetscInt       i;
603:   PetscBLASInt   n1,info,l,ld,off,lrwork,liwork;
604:   PetscScalar    *Q,*A;
605:   PetscReal      *d,*e;
606: #if defined(PETSC_USE_COMPLEX)
607:   PetscBLASInt   lwork;
608:   PetscInt       j;
609: #endif

612:   PetscBLASIntCast(ds->l,&l);
613:   PetscBLASIntCast(ds->ld,&ld);
614:   PetscBLASIntCast(ds->n-ds->l,&n1);
615:   off = l+l*ld;
616:   Q  = ds->mat[DS_MAT_Q];
617:   A  = ds->mat[DS_MAT_A];
618:   d  = ds->rmat[DS_MAT_T];
619:   e  = ds->rmat[DS_MAT_T]+ld;

621:   /* Reduce to tridiagonal form */
622:   DSIntermediate_HEP(ds);

624:   /* Solve the tridiagonal eigenproblem */
625:   for (i=0;i<l;i++) wr[i] = d[i];

627:   lrwork = 5*n1*n1+3*n1+1;
628:   liwork = 5*n1*n1+6*n1+6;
629: #if !defined(PETSC_USE_COMPLEX)
630:   DSAllocateWork_Private(ds,0,lrwork,liwork);
631:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
632: #else
633:   lwork = ld*ld;
634:   DSAllocateWork_Private(ds,lwork,lrwork,liwork);
635:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
636:   /* Fixing Lapack bug*/
637:   for (j=ds->l;j<ds->n;j++)
638:     for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
639: #endif
640:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xSTEDC %d",info);
641:   for (i=l;i<ds->n;i++) wr[i] = d[i];

643:   /* Create diagonal matrix as a result */
644:   if (ds->compact) {
645:     PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
646:   } else {
647:     for (i=l;i<ds->n;i++) {
648:       PetscMemzero(A+l+i*ld,(ds->n-l)*sizeof(PetscScalar));
649:     }
650:     for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
651:   }

653:   /* Set zero wi */
654:   if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
655:   return(0);
656: #endif
657: }

661: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n)
662: {
663:   PetscInt       i,ld=ds->ld,l=ds->l;
664:   PetscScalar    *A;

667:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
668:   A = ds->mat[DS_MAT_A];
669:   if (!ds->compact && ds->extrarow && ds->k==ds->n) {
670:     for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
671:   }
672:   if (ds->extrarow) ds->k = n;
673:   else ds->k = 0;
674:   ds->n = n;
675:   return(0);
676: }

680: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
681: {
682: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(SLEPC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
684:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
685: #else
687:   PetscScalar    *work;
688:   PetscReal      *rwork;
689:   PetscBLASInt   *ipiv;
690:   PetscBLASInt   lwork,info,n,ld;
691:   PetscReal      hn,hin;
692:   PetscScalar    *A;

695:   PetscBLASIntCast(ds->n,&n);
696:   PetscBLASIntCast(ds->ld,&ld);
697:   lwork = 8*ld;
698:   DSAllocateWork_Private(ds,lwork,ld,ld);
699:   work  = ds->work;
700:   rwork = ds->rwork;
701:   ipiv  = ds->iwork;
702:   DSSwitchFormat_HEP(ds,PETSC_FALSE);

704:   /* use workspace matrix W to avoid overwriting A */
705:   DSAllocateMat_Private(ds,DS_MAT_W);
706:   A = ds->mat[DS_MAT_W];
707:   PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);

709:   /* norm of A */
710:   hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);

712:   /* norm of inv(A) */
713:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
714:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRF %d",info);
715:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
716:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGETRI %d",info);
717:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

719:   *cond = hn*hin;
720:   return(0);
721: #endif
722: }

726: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
727: {
728: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(SLEPC_MISSING_LAPACK_ORGQR)
730:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
731: #else
733:   PetscInt       i,j,k=ds->k;
734:   PetscScalar    *Q,*A,*R,*tau,*work;
735:   PetscBLASInt   ld,n1,n0,lwork,info;

738:   PetscBLASIntCast(ds->ld,&ld);
739:   DSAllocateWork_Private(ds,ld*ld,0,0);
740:   tau = ds->work;
741:   work = ds->work+ld;
742:   PetscBLASIntCast(ld*(ld-1),&lwork);
743:   DSAllocateMat_Private(ds,DS_MAT_W);
744:   A  = ds->mat[DS_MAT_A];
745:   Q  = ds->mat[DS_MAT_Q];
746:   R  = ds->mat[DS_MAT_W];
747:   /* Copy I+alpha*A */
748:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
749:   PetscMemzero(R,ld*ld*sizeof(PetscScalar));
750:   for (i=0;i<k;i++) {
751:     Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
752:     Q[k+i*ld] = alpha*A[k+i*ld];
753:   }
754:   /* Compute qr */
755:   PetscBLASIntCast(k+1,&n1);
756:   PetscBLASIntCast(k,&n0);
757:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
758:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGEQRF %d",info);
759:   /* Copy R from Q */
760:   for (j=0;j<k;j++)
761:     for (i=0;i<=j;i++)
762:       R[i+j*ld] = Q[i+j*ld];
763:   /* Compute orthogonal matrix in Q */
764:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
765:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xORGQR %d",info);
766:   /* Compute the updated matrix of projected problem */
767:   for (j=0;j<k;j++)
768:     for (i=0;i<k+1;i++)
769:       A[j*ld+i] = Q[i*ld+j];
770:   alpha = -1.0/alpha;
771:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
772:   for (i=0;i<k;i++)
773:     A[ld*i+i]-=alpha;
774:   return(0);
775: #endif
776: }

780: PetscErrorCode DSFunction_EXP_HEP_DIAG(DS ds)
781: {
783:   PetscScalar    *eig,one=1.0,zero=0.0;
784:   PetscInt       i,j;
785:   PetscBLASInt   n,ld;
786:   PetscScalar    *F,*Q,*W;

789:   PetscBLASIntCast(ds->n,&n);
790:   PetscBLASIntCast(ds->ld,&ld);
791:   PetscMalloc(n*sizeof(PetscScalar),&eig);
792:   DSSolve(ds,eig,NULL);
793:   if (!ds->mat[DS_MAT_W]) {
794:     DSAllocateMat_Private(ds,DS_MAT_W);
795:   }
796:   W  = ds->mat[DS_MAT_W];
797:   Q  = ds->mat[DS_MAT_Q];
798:   F  = ds->mat[DS_MAT_F];

800:   /* W = exp(Lambda)*Q' */
801:   for (i=0;i<n;i++) {
802:     for (j=0;j<n;j++) {
803:       W[i+j*ld] = Q[j+i*ld]*PetscExpScalar(eig[i]);
804:     }
805:   }
806:   /* F = Q*W */
807:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,W,&ld,&zero,F,&ld));
808:   PetscFree(eig);
809:   return(0);
810: }

814: PETSC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
815: {
817:   ds->ops->allocate      = DSAllocate_HEP;
818:   ds->ops->view          = DSView_HEP;
819:   ds->ops->vectors       = DSVectors_HEP;
820:   ds->ops->solve[0]      = DSSolve_HEP_QR;
821:   ds->ops->solve[1]      = DSSolve_HEP_MRRR;
822:   ds->ops->solve[2]      = DSSolve_HEP_DC;
823:   ds->ops->sort          = DSSort_HEP;
824:   ds->ops->truncate      = DSTruncate_HEP;
825:   ds->ops->update        = DSUpdateExtraRow_HEP;
826:   ds->ops->cond          = DSCond_HEP;
827:   ds->ops->transrks      = DSTranslateRKS_HEP;
828:   ds->ops->normalize     = DSNormalize_HEP;

830:   ds->ops->computefun[SLEPC_FUNCTION_EXP][0] = DSFunction_EXP_HEP_DIAG;
831:   return(0);
832: }