Actual source code: ks-slice.c
slepc-3.15.0 2021-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc eigensolver: "krylovschur"
13: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
15: References:
17: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
18: solving sparse symmetric generalized eigenproblems", SIAM J.
19: Matrix Anal. Appl. 15(1):228-272, 1994.
21: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
22: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
23: 2012.
24: */
26: #include <slepc/private/epsimpl.h>
27: #include "krylovschur.h"
29: static PetscBool cited = PETSC_FALSE;
30: static const char citation[] =
31: "@Article{slepc-slice,\n"
32: " author = \"C. Campos and J. E. Roman\",\n"
33: " title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
34: " journal = \"Numer. Algorithms\",\n"
35: " volume = \"60\",\n"
36: " number = \"2\",\n"
37: " pages = \"279--295\",\n"
38: " year = \"2012,\"\n"
39: " doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
40: "}\n";
42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: #define InertiaMismatch(h,d) \
45: do { \
46: SETERRQ1(PetscObjectComm((PetscObject)h),1,"Mismatch between number of values found and information from inertia%s",d?"":", consider using EPSKrylovSchurSetDetectZeros()"); \
47: } while (0)
49: static PetscErrorCode EPSSliceResetSR(EPS eps)
50: {
51: PetscErrorCode ierr;
52: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
53: EPS_SR sr=ctx->sr;
54: EPS_shift s;
57: if (sr) {
58: if (ctx->npart>1) {
59: BVDestroy(&sr->V);
60: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
61: }
62: /* Reviewing list of shifts to free memory */
63: s = sr->s0;
64: if (s) {
65: while (s->neighb[1]) {
66: s = s->neighb[1];
67: PetscFree(s->neighb[0]);
68: }
69: PetscFree(s);
70: }
71: PetscFree(sr);
72: }
73: ctx->sr = NULL;
74: return(0);
75: }
77: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
78: {
79: PetscErrorCode ierr;
80: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
83: if (!ctx->global) return(0);
84: /* Reset auxiliary EPS */
85: EPSSliceResetSR(ctx->eps);
86: EPSReset(ctx->eps);
87: EPSSliceResetSR(eps);
88: PetscFree(ctx->inertias);
89: PetscFree(ctx->shifts);
90: return(0);
91: }
93: PetscErrorCode EPSDestroy_KrylovSchur_Slice(EPS eps)
94: {
95: PetscErrorCode ierr;
96: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
99: if (!ctx->global) return(0);
100: /* Destroy auxiliary EPS */
101: EPSReset_KrylovSchur_Slice(eps);
102: EPSDestroy(&ctx->eps);
103: if (ctx->npart>1) {
104: PetscSubcommDestroy(&ctx->subc);
105: if (ctx->commset) {
106: MPI_Comm_free(&ctx->commrank);CHKERRMPI(ierr);
107: ctx->commset = PETSC_FALSE;
108: }
109: ISDestroy(&ctx->isrow);
110: ISDestroy(&ctx->iscol);
111: MatDestroyMatrices(1,&ctx->submata);
112: MatDestroyMatrices(1,&ctx->submatb);
113: }
114: PetscFree(ctx->subintervals);
115: PetscFree(ctx->nconv_loc);
116: return(0);
117: }
119: /*
120: EPSSliceAllocateSolution - Allocate memory storage for common variables such
121: as eigenvalues and eigenvectors. The argument extra is used for methods
122: that require a working basis slightly larger than ncv.
123: */
124: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
125: {
126: PetscErrorCode ierr;
127: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
128: PetscReal eta;
129: PetscInt k;
130: PetscLogDouble cnt;
131: BVType type;
132: BVOrthogType orthog_type;
133: BVOrthogRefineType orthog_ref;
134: BVOrthogBlockType ob_type;
135: Mat matrix;
136: Vec t;
137: EPS_SR sr = ctx->sr;
140: /* allocate space for eigenvalues and friends */
141: k = PetscMax(1,sr->numEigs);
142: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
143: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
144: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
145: PetscLogObjectMemory((PetscObject)eps,cnt);
147: /* allocate sr->V and transfer options from eps->V */
148: BVDestroy(&sr->V);
149: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
150: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
151: if (!eps->V) { EPSGetBV(eps,&eps->V); }
152: if (!((PetscObject)(eps->V))->type_name) {
153: BVSetType(sr->V,BVSVEC);
154: } else {
155: BVGetType(eps->V,&type);
156: BVSetType(sr->V,type);
157: }
158: STMatCreateVecsEmpty(eps->st,&t,NULL);
159: BVSetSizesFromVec(sr->V,t,k);
160: VecDestroy(&t);
161: EPS_SetInnerProduct(eps);
162: BVGetMatrix(eps->V,&matrix,NULL);
163: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
164: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
165: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
166: return(0);
167: }
169: static PetscErrorCode EPSSliceGetEPS(EPS eps)
170: {
171: PetscErrorCode ierr;
172: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
173: BV V;
174: BVType type;
175: PetscReal eta;
176: BVOrthogType orthog_type;
177: BVOrthogRefineType orthog_ref;
178: BVOrthogBlockType ob_type;
179: PetscInt i;
180: PetscReal h,a,b;
181: PetscRandom rand;
182: EPS_SR sr=ctx->sr;
185: if (!ctx->eps) { EPSKrylovSchurGetChildEPS(eps,&ctx->eps); }
187: /* Determine subintervals */
188: if (ctx->npart==1) {
189: a = eps->inta; b = eps->intb;
190: } else {
191: if (!ctx->subintset) { /* uniform distribution if no set by user */
192: if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
193: h = (eps->intb-eps->inta)/ctx->npart;
194: a = eps->inta+ctx->subc->color*h;
195: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
196: PetscFree(ctx->subintervals);
197: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
198: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
199: ctx->subintervals[ctx->npart] = eps->intb;
200: } else {
201: a = ctx->subintervals[ctx->subc->color];
202: b = ctx->subintervals[ctx->subc->color+1];
203: }
204: }
205: EPSSetInterval(ctx->eps,a,b);
206: EPSSetConvergenceTest(ctx->eps,eps->conv);
207: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
208: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
210: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
211: ctx_local->detect = ctx->detect;
213: /* transfer options from eps->V */
214: EPSGetBV(ctx->eps,&V);
215: BVGetRandomContext(V,&rand); /* make sure the random context is available when duplicating */
216: if (!eps->V) { EPSGetBV(eps,&eps->V); }
217: if (!((PetscObject)(eps->V))->type_name) {
218: BVSetType(V,BVSVEC);
219: } else {
220: BVGetType(eps->V,&type);
221: BVSetType(V,type);
222: }
223: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
224: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
226: ctx->eps->which = eps->which;
227: ctx->eps->max_it = eps->max_it;
228: ctx->eps->tol = eps->tol;
229: ctx->eps->purify = eps->purify;
230: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
231: EPSSetProblemType(ctx->eps,eps->problem_type);
232: EPSSetUp(ctx->eps);
233: ctx->eps->nconv = 0;
234: ctx->eps->its = 0;
235: for (i=0;i<ctx->eps->ncv;i++) {
236: ctx->eps->eigr[i] = 0.0;
237: ctx->eps->eigi[i] = 0.0;
238: ctx->eps->errest[i] = 0.0;
239: }
240: return(0);
241: }
243: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
244: {
246: KSP ksp,kspr;
247: PC pc;
248: Mat F;
249: PetscReal nzshift=shift;
250: PetscBool flg;
253: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
254: if (inertia) *inertia = eps->n;
255: } else if (shift <= PETSC_MIN_REAL) {
256: if (inertia) *inertia = 0;
257: if (zeros) *zeros = 0;
258: } else {
259: /* If the shift is zero, perturb it to a very small positive value.
260: The goal is that the nonzero pattern is the same in all cases and reuse
261: the symbolic factorizations */
262: nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
263: STSetShift(eps->st,nzshift);
264: STGetKSP(eps->st,&ksp);
265: KSPGetPC(ksp,&pc);
266: PetscObjectTypeCompare((PetscObject)pc,PCREDUNDANT,&flg);
267: if (flg) {
268: PCRedundantGetKSP(pc,&kspr);
269: KSPGetPC(kspr,&pc);
270: }
271: PCFactorGetMatrix(pc,&F);
272: MatGetInertia(F,inertia,zeros,NULL);
273: }
274: if (inertia) { PetscInfo2(eps,"Computed inertia at shift %g: %D\n",(double)nzshift,*inertia); }
275: return(0);
276: }
278: /*
279: Dummy backtransform operation
280: */
281: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
282: {
284: return(0);
285: }
287: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
288: {
289: PetscErrorCode ierr;
290: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
291: EPS_SR sr,sr_loc,sr_glob;
292: PetscInt nEigs,dssz=1,i,zeros=0,off=0,method,hiteig=0;
293: PetscMPIInt nproc,rank=0,aux;
294: PetscReal r;
295: MPI_Request req;
296: Mat A,B=NULL;
297: DSParallelType ptype;
300: if (ctx->global) {
301: EPSCheckHermitianDefiniteCondition(eps,PETSC_TRUE," with spectrum slicing");
302: EPSCheckSinvertCayleyCondition(eps,PETSC_TRUE," with spectrum slicing");
303: if (eps->inta==eps->intb) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"This solver does not support computing all eigenvalues unless you provide a computational interval with EPSSetInterval()");
304: if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
305: EPSCheckUnsupportedCondition(eps,EPS_FEATURE_ARBITRARY | EPS_FEATURE_REGION | EPS_FEATURE_STOPPING,PETSC_TRUE," with spectrum slicing");
306: EPSCheckIgnoredCondition(eps,EPS_FEATURE_BALANCE,PETSC_TRUE," with spectrum slicing");
307: if (eps->tol==PETSC_DEFAULT) {
308: #if defined(PETSC_USE_REAL_SINGLE)
309: eps->tol = SLEPC_DEFAULT_TOL;
310: #else
311: /* use tighter tolerance */
312: eps->tol = SLEPC_DEFAULT_TOL*1e-2;
313: #endif
314: }
315: if (eps->max_it==PETSC_DEFAULT) eps->max_it = 100;
316: if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n); /* nev not set, use default value */
317: if (eps->n>10 && ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
318: }
319: eps->ops->backtransform = EPSBackTransform_Skip;
321: /* create spectrum slicing context and initialize it */
322: EPSSliceResetSR(eps);
323: PetscNewLog(eps,&sr);
324: ctx->sr = sr;
325: sr->itsKs = 0;
326: sr->nleap = 0;
327: sr->nMAXCompl = eps->nev/4;
328: sr->iterCompl = eps->max_it/4;
329: sr->sPres = NULL;
330: sr->nS = 0;
332: if (ctx->npart==1 || ctx->global) {
333: /* check presence of ends and finding direction */
334: if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
335: sr->int0 = eps->inta;
336: sr->int1 = eps->intb;
337: sr->dir = 1;
338: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
339: sr->hasEnd = PETSC_FALSE;
340: } else sr->hasEnd = PETSC_TRUE;
341: } else {
342: sr->int0 = eps->intb;
343: sr->int1 = eps->inta;
344: sr->dir = -1;
345: sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
346: }
347: }
348: if (ctx->global) {
349: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
350: /* create subintervals and initialize auxiliary eps for slicing runs */
351: EPSKrylovSchurGetChildEPS(eps,&ctx->eps);
352: /* prevent computation of factorization in global eps */
353: STSetTransform(eps->st,PETSC_FALSE);
354: EPSSliceGetEPS(eps);
355: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
356: if (ctx->npart>1) {
357: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
358: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);CHKERRMPI(ierr);
359: if (!rank) {
360: MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);CHKERRMPI(ierr);
361: }
362: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
363: PetscFree(ctx->nconv_loc);
364: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
365: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);CHKERRMPI(ierr);
366: if (sr->dir<0) off = 1;
367: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
368: PetscMPIIntCast(sr_loc->numEigs,&aux);
369: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);CHKERRMPI(ierr);
370: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);CHKERRMPI(ierr);
371: } else {
372: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);CHKERRMPI(ierr);
373: if (!rank) {
374: PetscMPIIntCast(sr_loc->numEigs,&aux);
375: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);CHKERRMPI(ierr);
376: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);CHKERRMPI(ierr);
377: }
378: PetscMPIIntCast(ctx->npart,&aux);
379: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
380: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
381: }
382: nEigs = 0;
383: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
384: } else {
385: nEigs = sr_loc->numEigs;
386: sr->inertia0 = sr_loc->inertia0;
387: sr->dir = sr_loc->dir;
388: }
389: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
390: sr->numEigs = nEigs;
391: eps->nev = nEigs;
392: eps->ncv = nEigs;
393: eps->mpd = nEigs;
394: } else {
395: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
396: sr_glob = ctx_glob->sr;
397: if (ctx->npart>1) {
398: sr->dir = sr_glob->dir;
399: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
400: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
401: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
402: else sr->hasEnd = PETSC_TRUE;
403: }
404: /* sets first shift */
405: STSetShift(eps->st,(sr->int0==0.0)?10.0/PETSC_MAX_REAL:sr->int0);
406: STSetUp(eps->st);
408: /* compute inertia0 */
409: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
410: /* undocumented option to control what to do when an eigenvalue is found:
411: - error out if it's the endpoint of the user-provided interval (or sub-interval)
412: - if it's an endpoint computed internally:
413: + if hiteig=0 error out
414: + else if hiteig=1 the subgroup that hit the eigenvalue does nothing
415: + otherwise the subgroup that hit the eigenvalue perturbs the shift and recomputes inertia
416: */
417: PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&hiteig,NULL);
418: if (zeros) { /* error in factorization */
419: if (sr->int0==ctx->eps->inta || sr->int0==ctx->eps->intb) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in the interval endpoint");
420: else if (ctx_glob->subintset && !hiteig) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
421: else {
422: if (hiteig==1) { /* idle subgroup */
423: sr->inertia0 = -1;
424: } else { /* perturb shift */
425: sr->int0 *= (1.0+SLICE_PTOL);
426: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
427: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
428: }
429: }
430: }
431: if (ctx->npart>1) {
432: /* inertia1 is received from neighbour */
433: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);CHKERRMPI(ierr);
434: if (!rank) {
435: if (sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1))) { /* send inertia0 to neighbour0 */
436: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);CHKERRMPI(ierr);
437: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);CHKERRMPI(ierr);
438: }
439: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
440: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);CHKERRMPI(ierr);
441: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);CHKERRMPI(ierr);
442: }
443: if (sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
444: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
445: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);CHKERRMPI(ierr);
446: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);CHKERRMPI(ierr);
447: }
448: }
449: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
450: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
451: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
452: } else sr_glob->inertia1 = sr->inertia1;
453: }
455: /* last process in eps comm computes inertia1 */
456: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
457: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
458: if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
459: if (!rank && sr->inertia0==-1) {
460: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
461: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);CHKERRMPI(ierr);
462: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);CHKERRMPI(ierr);
463: }
464: if (sr->hasEnd) {
465: sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
466: i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
467: }
468: }
470: /* number of eigenvalues in interval */
471: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
472: if (ctx->npart>1) {
473: /* memory allocate for subinterval eigenpairs */
474: EPSSliceAllocateSolution(eps,1);
475: }
476: dssz = eps->ncv+1;
477: DSGetParallel(ctx->eps->ds,&ptype);
478: DSSetParallel(eps->ds,ptype);
479: DSGetMethod(ctx->eps->ds,&method);
480: DSSetMethod(eps->ds,method);
481: }
482: DSSetType(eps->ds,DSHEP);
483: DSSetCompact(eps->ds,PETSC_TRUE);
484: DSAllocate(eps->ds,dssz);
485: /* keep state of subcomm matrices to check that the user does not modify them */
486: EPSGetOperators(eps,&A,&B);
487: PetscObjectStateGet((PetscObject)A,&ctx->Astate);
488: PetscObjectGetId((PetscObject)A,&ctx->Aid);
489: if (B) {
490: PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
491: PetscObjectGetId((PetscObject)B,&ctx->Bid);
492: } else {
493: ctx->Bstate=0;
494: ctx->Bid=0;
495: }
496: return(0);
497: }
499: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
500: {
501: PetscErrorCode ierr;
502: Vec v,vg,v_loc;
503: IS is1,is2;
504: VecScatter vec_sc;
505: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
506: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
507: PetscScalar *array;
508: EPS_SR sr_loc;
509: BV V_loc;
512: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
513: V_loc = sr_loc->V;
515: /* Gather parallel eigenvectors */
516: BVGetColumn(eps->V,0,&v);
517: VecGetOwnershipRange(v,&n0,&m0);
518: BVRestoreColumn(eps->V,0,&v);
519: BVGetColumn(ctx->eps->V,0,&v);
520: VecGetLocalSize(v,&nloc);
521: BVRestoreColumn(ctx->eps->V,0,&v);
522: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
523: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
524: idx = -1;
525: for (si=0;si<ctx->npart;si++) {
526: j = 0;
527: for (i=n0;i<m0;i++) {
528: idx1[j] = i;
529: idx2[j++] = i+eps->n*si;
530: }
531: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
532: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
533: BVGetColumn(eps->V,0,&v);
534: VecScatterCreate(v,is1,vg,is2,&vec_sc);
535: BVRestoreColumn(eps->V,0,&v);
536: ISDestroy(&is1);
537: ISDestroy(&is2);
538: for (i=0;i<ctx->nconv_loc[si];i++) {
539: BVGetColumn(eps->V,++idx,&v);
540: if (ctx->subc->color==si) {
541: BVGetColumn(V_loc,i,&v_loc);
542: VecGetArray(v_loc,&array);
543: VecPlaceArray(vg,array);
544: }
545: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
546: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
547: if (ctx->subc->color==si) {
548: VecResetArray(vg);
549: VecRestoreArray(v_loc,&array);
550: BVRestoreColumn(V_loc,i,&v_loc);
551: }
552: BVRestoreColumn(eps->V,idx,&v);
553: }
554: VecScatterDestroy(&vec_sc);
555: }
556: PetscFree2(idx1,idx2);
557: VecDestroy(&vg);
558: return(0);
559: }
561: /*
562: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
563: */
564: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
565: {
566: PetscErrorCode ierr;
567: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
570: if (ctx->global && ctx->npart>1) {
571: EPSComputeVectors(ctx->eps);
572: EPSSliceGatherEigenVectors(eps);
573: }
574: return(0);
575: }
577: #define SWAP(a,b,t) {t=a;a=b;b=t;}
579: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
580: {
581: PetscErrorCode ierr;
582: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
583: PetscInt i=0,j,tmpi;
584: PetscReal v,tmpr;
585: EPS_shift s;
588: if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
589: if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
590: if (!ctx->sr->s0) { /* EPSSolve not called yet */
591: *n = 2;
592: } else {
593: *n = 1;
594: s = ctx->sr->s0;
595: while (s) {
596: (*n)++;
597: s = s->neighb[1];
598: }
599: }
600: PetscMalloc1(*n,shifts);
601: PetscMalloc1(*n,inertias);
602: if (!ctx->sr->s0) { /* EPSSolve not called yet */
603: (*shifts)[0] = ctx->sr->int0;
604: (*shifts)[1] = ctx->sr->int1;
605: (*inertias)[0] = ctx->sr->inertia0;
606: (*inertias)[1] = ctx->sr->inertia1;
607: } else {
608: s = ctx->sr->s0;
609: while (s) {
610: (*shifts)[i] = s->value;
611: (*inertias)[i++] = s->inertia;
612: s = s->neighb[1];
613: }
614: (*shifts)[i] = ctx->sr->int1;
615: (*inertias)[i] = ctx->sr->inertia1;
616: }
617: /* remove possible duplicate in last position */
618: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
619: /* sort result */
620: for (i=0;i<*n;i++) {
621: v = (*shifts)[i];
622: for (j=i+1;j<*n;j++) {
623: if (v > (*shifts)[j]) {
624: SWAP((*shifts)[i],(*shifts)[j],tmpr);
625: SWAP((*inertias)[i],(*inertias)[j],tmpi);
626: v = (*shifts)[i];
627: }
628: }
629: }
630: return(0);
631: }
633: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
634: {
635: PetscErrorCode ierr;
636: PetscMPIInt rank,nproc;
637: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
638: PetscInt i,idx,j;
639: PetscInt *perm_loc,off=0,*inertias_loc,ns;
640: PetscScalar *eigr_loc;
641: EPS_SR sr_loc;
642: PetscReal *shifts_loc;
643: PetscMPIInt *disp,*ns_loc,aux;
646: eps->nconv = 0;
647: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
648: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
650: /* Gather the shifts used and the inertias computed */
651: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
652: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
653: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
654: ns--;
655: for (i=0;i<ns;i++) {
656: inertias_loc[i] = inertias_loc[i+1];
657: shifts_loc[i] = shifts_loc[i+1];
658: }
659: }
660: PetscMalloc1(ctx->npart,&ns_loc);
661: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);CHKERRMPI(ierr);
662: PetscMPIIntCast(ns,&aux);
663: if (!rank) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank);CHKERRMPI(ierr); }
664: PetscMPIIntCast(ctx->npart,&aux);
665: MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
666: ctx->nshifts = 0;
667: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
668: PetscFree(ctx->inertias);
669: PetscFree(ctx->shifts);
670: PetscMalloc1(ctx->nshifts,&ctx->inertias);
671: PetscMalloc1(ctx->nshifts,&ctx->shifts);
673: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
674: eigr_loc = sr_loc->eigr;
675: perm_loc = sr_loc->perm;
676: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);CHKERRMPI(ierr);
677: PetscMalloc1(ctx->npart,&disp);
678: disp[0] = 0;
679: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
680: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
681: PetscMPIIntCast(sr_loc->numEigs,&aux);
682: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank);CHKERRMPI(ierr); /* eigenvalues */
683: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank);CHKERRMPI(ierr); /* perm */
684: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
685: PetscMPIIntCast(ns,&aux);
686: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank);CHKERRMPI(ierr); /* shifts */
687: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank);CHKERRMPI(ierr); /* inertias */
688: MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
689: } else { /* subcommunicators with different size */
690: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);CHKERRMPI(ierr);
691: if (!rank) {
692: PetscMPIIntCast(sr_loc->numEigs,&aux);
693: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank);CHKERRMPI(ierr); /* eigenvalues */
694: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank);CHKERRMPI(ierr); /* perm */
695: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
696: PetscMPIIntCast(ns,&aux);
697: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank);CHKERRMPI(ierr); /* shifts */
698: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank);CHKERRMPI(ierr); /* inertias */
699: MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
700: }
701: PetscMPIIntCast(eps->nconv,&aux);
702: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
703: MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
704: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
705: PetscMPIIntCast(ctx->nshifts,&aux);
706: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
707: MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));CHKERRMPI(ierr);
708: }
709: /* Update global array eps->perm */
710: idx = ctx->nconv_loc[0];
711: for (i=1;i<ctx->npart;i++) {
712: off += ctx->nconv_loc[i-1];
713: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
714: }
716: /* Gather parallel eigenvectors */
717: PetscFree(ns_loc);
718: PetscFree(disp);
719: PetscFree(shifts_loc);
720: PetscFree(inertias_loc);
721: return(0);
722: }
724: /*
725: Fills the fields of a shift structure
726: */
727: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
728: {
729: PetscErrorCode ierr;
730: EPS_shift s,*pending2;
731: PetscInt i;
732: EPS_SR sr;
733: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
736: sr = ctx->sr;
737: if ((neighb0 && val==neighb0->value) || (neighb1 && val==neighb1->value)) {
738: sr->nPend++;
739: return(0);
740: }
741: PetscNewLog(eps,&s);
742: s->value = val;
743: s->neighb[0] = neighb0;
744: if (neighb0) neighb0->neighb[1] = s;
745: s->neighb[1] = neighb1;
746: if (neighb1) neighb1->neighb[0] = s;
747: s->comp[0] = PETSC_FALSE;
748: s->comp[1] = PETSC_FALSE;
749: s->index = -1;
750: s->neigs = 0;
751: s->nconv[0] = s->nconv[1] = 0;
752: s->nsch[0] = s->nsch[1]=0;
753: /* Inserts in the stack of pending shifts */
754: /* If needed, the array is resized */
755: if (sr->nPend >= sr->maxPend) {
756: sr->maxPend *= 2;
757: PetscMalloc1(sr->maxPend,&pending2);
758: PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
759: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
760: PetscFree(sr->pending);
761: sr->pending = pending2;
762: }
763: sr->pending[sr->nPend++]=s;
764: return(0);
765: }
767: /* Prepare for Rational Krylov update */
768: static PetscErrorCode EPSPrepareRational(EPS eps)
769: {
770: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
771: PetscErrorCode ierr;
772: PetscInt dir,i,k,ld,nv;
773: PetscScalar *A;
774: EPS_SR sr = ctx->sr;
775: Vec v;
778: DSGetLeadingDimension(eps->ds,&ld);
779: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
780: dir*=sr->dir;
781: k = 0;
782: for (i=0;i<sr->nS;i++) {
783: if (dir*PetscRealPart(sr->S[i])>0.0) {
784: sr->S[k] = sr->S[i];
785: sr->S[sr->nS+k] = sr->S[sr->nS+i];
786: BVGetColumn(sr->Vnext,k,&v);
787: BVCopyVec(eps->V,eps->nconv+i,v);
788: BVRestoreColumn(sr->Vnext,k,&v);
789: k++;
790: if (k>=sr->nS/2)break;
791: }
792: }
793: /* Copy to DS */
794: DSGetArray(eps->ds,DS_MAT_A,&A);
795: PetscArrayzero(A,ld*ld);
796: for (i=0;i<k;i++) {
797: A[i*(1+ld)] = sr->S[i];
798: A[k+i*ld] = sr->S[sr->nS+i];
799: }
800: sr->nS = k;
801: DSRestoreArray(eps->ds,DS_MAT_A,&A);
802: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
803: DSSetDimensions(eps->ds,nv,0,0,k);
804: /* Append u to V */
805: BVGetColumn(sr->Vnext,sr->nS,&v);
806: BVCopyVec(eps->V,sr->nv,v);
807: BVRestoreColumn(sr->Vnext,sr->nS,&v);
808: return(0);
809: }
811: /* Provides next shift to be computed */
812: static PetscErrorCode EPSExtractShift(EPS eps)
813: {
814: PetscErrorCode ierr;
815: PetscInt iner,zeros=0;
816: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
817: EPS_SR sr;
818: PetscReal newShift,diam,ptol;
819: EPS_shift sPres;
822: sr = ctx->sr;
823: if (sr->nPend > 0) {
824: if (sr->sPres==sr->pending[sr->nPend-1]) {
825: eps->reason = EPS_CONVERGED_ITERATING;
826: eps->its = 0;
827: sr->nPend--;
828: sr->sPres->rep = PETSC_TRUE;
829: return(0);
830: }
831: sr->sPrev = sr->sPres;
832: sr->sPres = sr->pending[--sr->nPend];
833: sPres = sr->sPres;
834: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
835: if (zeros) {
836: diam = PetscMin(PetscAbsReal(sPres->neighb[0]->value-sPres->value),PetscAbsReal(sPres->neighb[1]->value-sPres->value));
837: ptol = PetscMin(SLICE_PTOL,diam/2);
838: newShift = sPres->value*(1.0+ptol);
839: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
840: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
841: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
842: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
843: sPres->value = newShift;
844: }
845: sr->sPres->inertia = iner;
846: eps->target = sr->sPres->value;
847: eps->reason = EPS_CONVERGED_ITERATING;
848: eps->its = 0;
849: } else sr->sPres = NULL;
850: return(0);
851: }
853: /*
854: Symmetric KrylovSchur adapted to spectrum slicing:
855: Allows searching an specific amount of eigenvalues in the subintervals left and right.
856: Returns whether the search has succeeded
857: */
858: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
859: {
860: PetscErrorCode ierr;
861: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
862: PetscInt i,k,l,ld,nv,*iwork,j,count0,count1,iterCompl=0,n0,n1;
863: Mat U,Op;
864: PetscScalar *Q,*A;
865: PetscReal *a,*b,beta,lambda;
866: EPS_shift sPres;
867: PetscBool breakdown,complIterating,sch0,sch1;
868: EPS_SR sr = ctx->sr;
869: char messg[100];
872: /* Spectrum slicing data */
873: sPres = sr->sPres;
874: complIterating =PETSC_FALSE;
875: sch1 = sch0 = PETSC_TRUE;
876: DSGetLeadingDimension(eps->ds,&ld);
877: PetscMalloc1(2*ld,&iwork);
878: count0=0;count1=0; /* Found on both sides */
879: if (!sPres->rep && sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
880: /* Rational Krylov */
881: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
882: DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
883: DSSetDimensions(eps->ds,l+1,0,0,0);
884: BVSetActiveColumns(eps->V,0,l+1);
885: DSGetMat(eps->ds,DS_MAT_Q,&U);
886: BVMultInPlace(eps->V,U,0,l+1);
887: MatDestroy(&U);
888: } else {
889: /* Get the starting Lanczos vector */
890: EPSGetStartVector(eps,0,NULL);
891: l = 0;
892: }
893: /* Restart loop */
894: while (eps->reason == EPS_CONVERGED_ITERATING) {
895: eps->its++; sr->itsKs++;
896: /* Compute an nv-step Lanczos factorization */
897: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
898: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
899: b = a + ld;
900: STGetOperator(eps->st,&Op);
901: BVMatLanczos(eps->V,Op,a,b,eps->nconv+l,&nv,&breakdown);
902: STRestoreOperator(eps->st,&Op);
903: sr->nv = nv;
904: beta = b[nv-1];
905: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
906: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
907: if (l==0) {
908: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
909: } else {
910: DSSetState(eps->ds,DS_STATE_RAW);
911: }
912: BVSetActiveColumns(eps->V,eps->nconv,nv);
914: /* Solve projected problem and compute residual norm estimates */
915: if (eps->its == 1 && l > 0) {/* After rational update */
916: DSGetArray(eps->ds,DS_MAT_A,&A);
917: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
918: b = a + ld;
919: k = eps->nconv+l;
920: A[k*ld+k-1] = A[(k-1)*ld+k];
921: A[k*ld+k] = a[k];
922: for (j=k+1; j< nv; j++) {
923: A[j*ld+j] = a[j];
924: A[j*ld+j-1] = b[j-1] ;
925: A[(j-1)*ld+j] = b[j-1];
926: }
927: DSRestoreArray(eps->ds,DS_MAT_A,&A);
928: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
929: DSSolve(eps->ds,eps->eigr,NULL);
930: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
931: DSSetCompact(eps->ds,PETSC_TRUE);
932: } else { /* Restart */
933: DSSolve(eps->ds,eps->eigr,NULL);
934: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
935: }
936: DSSynchronize(eps->ds,eps->eigr,NULL);
938: /* Residual */
939: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,0.0,1.0,&k);
940: /* Checking values obtained for completing */
941: for (i=0;i<k;i++) {
942: sr->back[i]=eps->eigr[i];
943: }
944: STBackTransform(eps->st,k,sr->back,eps->eigi);
945: count0=count1=0;
946: for (i=0;i<k;i++) {
947: lambda = PetscRealPart(sr->back[i]);
948: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
949: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
950: }
951: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
952: else {
953: /* Checks completion */
954: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
955: eps->reason = EPS_CONVERGED_TOL;
956: } else {
957: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
958: if (complIterating) {
959: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
960: } else if (k >= eps->nev) {
961: n0 = sPres->nsch[0]-count0;
962: n1 = sPres->nsch[1]-count1;
963: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
964: /* Iterating for completion*/
965: complIterating = PETSC_TRUE;
966: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
967: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
968: iterCompl = sr->iterCompl;
969: } else eps->reason = EPS_CONVERGED_TOL;
970: }
971: }
972: }
973: /* Update l */
974: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
975: else l = nv-k;
976: if (breakdown) l=0;
977: if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
979: if (eps->reason == EPS_CONVERGED_ITERATING) {
980: if (breakdown) {
981: /* Start a new Lanczos factorization */
982: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
983: EPSGetStartVector(eps,k,&breakdown);
984: if (breakdown) {
985: eps->reason = EPS_DIVERGED_BREAKDOWN;
986: PetscInfo(eps,"Unable to generate more start vectors\n");
987: }
988: } else {
989: /* Prepare the Rayleigh quotient for restart */
990: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
991: DSGetArray(eps->ds,DS_MAT_Q,&Q);
992: b = a + ld;
993: for (i=k;i<k+l;i++) {
994: a[i] = PetscRealPart(eps->eigr[i]);
995: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
996: }
997: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
998: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
999: }
1000: }
1001: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1002: DSGetMat(eps->ds,DS_MAT_Q,&U);
1003: BVMultInPlace(eps->V,U,eps->nconv,k+l);
1004: MatDestroy(&U);
1006: /* Normalize u and append it to V */
1007: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1008: BVCopyColumn(eps->V,nv,k+l);
1009: }
1010: eps->nconv = k;
1011: if (eps->reason != EPS_CONVERGED_ITERATING) {
1012: /* Store approximated values for next shift */
1013: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1014: sr->nS = l;
1015: for (i=0;i<l;i++) {
1016: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1017: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1018: }
1019: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1020: }
1021: }
1022: /* Check for completion */
1023: for (i=0;i< eps->nconv; i++) {
1024: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1025: else sPres->nconv[0]++;
1026: }
1027: sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
1028: sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
1029: PetscSNPrintf(messg,sizeof(messg),"Lanczos: %D evals in [%g,%g]%s and %D evals in [%g,%g]%s\n",count0,(double)(sr->dir==1)?sPres->ext[0]:sPres->value,(double)(sr->dir==1)?sPres->value:sPres->ext[0],(sPres->comp[0])?"*":"",count1,(double)(sr->dir==1)?sPres->value:sPres->ext[1],(double)(sr->dir==1)?sPres->ext[1]:sPres->value,(sPres->comp[1])?"*":"");
1030: PetscInfo(eps,messg);
1031: if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1]) InertiaMismatch(eps,ctx->detect);
1032: PetscFree(iwork);
1033: return(0);
1034: }
1036: /*
1037: Obtains value of subsequent shift
1038: */
1039: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1040: {
1041: PetscReal lambda,d_prev;
1042: PetscInt i,idxP;
1043: EPS_SR sr;
1044: EPS_shift sPres,s;
1045: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1048: sr = ctx->sr;
1049: sPres = sr->sPres;
1050: if (sPres->neighb[side]) {
1051: /* Completing a previous interval */
1052: *newS = (sPres->value + sPres->neighb[side]->value)/2;
1053: if (PetscAbsReal(sPres->value - *newS)/PetscAbsReal(sPres->value)<=100*PETSC_SQRT_MACHINE_EPSILON) *newS = sPres->value;
1054: } else { /* (Only for side=1). Creating a new interval. */
1055: if (sPres->neigs==0) {/* No value has been accepted*/
1056: if (sPres->neighb[0]) {
1057: /* Multiplying by 10 the previous distance */
1058: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1059: sr->nleap++;
1060: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1061: if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1062: } else { /* First shift */
1063: if (eps->nconv != 0) {
1064: /* Unaccepted values give information for next shift */
1065: idxP=0;/* Number of values left from shift */
1066: for (i=0;i<eps->nconv;i++) {
1067: lambda = PetscRealPart(eps->eigr[i]);
1068: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1069: else break;
1070: }
1071: /* Avoiding subtraction of eigenvalues (might be the same).*/
1072: if (idxP>0) {
1073: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1074: } else {
1075: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1076: }
1077: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1078: } else { /* No values found, no information for next shift */
1079: SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1080: }
1081: }
1082: } else { /* Accepted values found */
1083: sr->nleap = 0;
1084: /* Average distance of values in previous subinterval */
1085: s = sPres->neighb[0];
1086: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1087: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1088: }
1089: if (s) {
1090: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1091: } else { /* First shift. Average distance obtained with values in this shift */
1092: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1093: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1094: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1095: } else {
1096: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1097: }
1098: }
1099: /* Average distance is used for next shift by adding it to value on the right or to shift */
1100: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1101: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1102: } else { /* Last accepted value is on the left of shift. Adding to shift */
1103: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1104: }
1105: }
1106: /* End of interval can not be surpassed */
1107: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1108: }/* of neighb[side]==null */
1109: return(0);
1110: }
1112: /*
1113: Function for sorting an array of real values
1114: */
1115: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1116: {
1117: PetscReal re;
1118: PetscInt i,j,tmp;
1121: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1122: /* Insertion sort */
1123: for (i=1;i<nr;i++) {
1124: re = PetscRealPart(r[perm[i]]);
1125: j = i-1;
1126: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1127: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1128: }
1129: }
1130: return(0);
1131: }
1133: /* Stores the pairs obtained since the last shift in the global arrays */
1134: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1135: {
1136: PetscErrorCode ierr;
1137: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1138: PetscReal lambda,err,norm;
1139: PetscInt i,count;
1140: PetscBool iscayley;
1141: EPS_SR sr = ctx->sr;
1142: EPS_shift sPres;
1143: Vec v,w;
1146: sPres = sr->sPres;
1147: sPres->index = sr->indexEig;
1148: count = sr->indexEig;
1149: /* Back-transform */
1150: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1151: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1152: /* Sort eigenvalues */
1153: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1154: /* Values stored in global array */
1155: for (i=0;i<eps->nconv;i++) {
1156: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1157: err = eps->errest[eps->perm[i]];
1159: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1160: if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1161: sr->eigr[count] = lambda;
1162: sr->errest[count] = err;
1163: /* Explicit purification */
1164: BVGetColumn(eps->V,eps->perm[i],&w);
1165: if (eps->purify) {
1166: BVGetColumn(sr->V,count,&v);
1167: STApply(eps->st,w,v);
1168: BVRestoreColumn(sr->V,count,&v);
1169: } else {
1170: BVInsertVec(sr->V,count,w);
1171: }
1172: BVRestoreColumn(eps->V,eps->perm[i],&w);
1173: BVNormColumn(sr->V,count,NORM_2,&norm);
1174: BVScaleColumn(sr->V,count,1.0/norm);
1175: count++;
1176: }
1177: }
1178: sPres->neigs = count - sr->indexEig;
1179: sr->indexEig = count;
1180: /* Global ordering array updating */
1181: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1182: return(0);
1183: }
1185: static PetscErrorCode EPSLookForDeflation(EPS eps)
1186: {
1187: PetscErrorCode ierr;
1188: PetscReal val;
1189: PetscInt i,count0=0,count1=0;
1190: EPS_shift sPres;
1191: PetscInt ini,fin,k,idx0,idx1;
1192: EPS_SR sr;
1193: Vec v;
1194: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1197: sr = ctx->sr;
1198: sPres = sr->sPres;
1200: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1201: else ini = 0;
1202: fin = sr->indexEig;
1203: /* Selection of ends for searching new values */
1204: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1205: else sPres->ext[0] = sPres->neighb[0]->value;
1206: if (!sPres->neighb[1]) {
1207: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1208: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1209: } else sPres->ext[1] = sPres->neighb[1]->value;
1210: /* Selection of values between right and left ends */
1211: for (i=ini;i<fin;i++) {
1212: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1213: /* Values to the right of left shift */
1214: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1215: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1216: else count1++;
1217: } else break;
1218: }
1219: /* The number of values on each side are found */
1220: if (sPres->neighb[0]) {
1221: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1222: if (sPres->nsch[0]<0) InertiaMismatch(eps,ctx->detect);
1223: } else sPres->nsch[0] = 0;
1225: if (sPres->neighb[1]) {
1226: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1227: if (sPres->nsch[1]<0) InertiaMismatch(eps,ctx->detect);
1228: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1230: /* Completing vector of indexes for deflation */
1231: idx0 = ini;
1232: idx1 = ini+count0+count1;
1233: k=0;
1234: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1235: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1236: BVSetNumConstraints(sr->Vnext,k);
1237: for (i=0;i<k;i++) {
1238: BVGetColumn(sr->Vnext,-i-1,&v);
1239: BVCopyVec(sr->V,sr->idxDef[i],v);
1240: BVRestoreColumn(sr->Vnext,-i-1,&v);
1241: }
1243: /* For rational Krylov */
1244: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1245: EPSPrepareRational(eps);
1246: }
1247: eps->nconv = 0;
1248: /* Get rid of temporary Vnext */
1249: BVDestroy(&eps->V);
1250: eps->V = sr->Vnext;
1251: sr->Vnext = NULL;
1252: return(0);
1253: }
1255: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1256: {
1257: PetscErrorCode ierr;
1258: PetscInt i,lds,ti;
1259: PetscReal newS;
1260: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1261: EPS_SR sr=ctx->sr;
1262: Mat A,B=NULL;
1263: PetscObjectState Astate,Bstate=0;
1264: PetscObjectId Aid,Bid=0;
1267: PetscCitationsRegister(citation,&cited);
1268: if (ctx->global) {
1269: EPSSolve_KrylovSchur_Slice(ctx->eps);
1270: ctx->eps->state = EPS_STATE_SOLVED;
1271: eps->reason = EPS_CONVERGED_TOL;
1272: if (ctx->npart>1) {
1273: /* Gather solution from subsolvers */
1274: EPSSliceGatherSolution(eps);
1275: } else {
1276: eps->nconv = sr->numEigs;
1277: eps->its = ctx->eps->its;
1278: PetscFree(ctx->inertias);
1279: PetscFree(ctx->shifts);
1280: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1281: }
1282: } else {
1283: if (ctx->npart==1) {
1284: sr->eigr = ctx->eps->eigr;
1285: sr->eigi = ctx->eps->eigi;
1286: sr->perm = ctx->eps->perm;
1287: sr->errest = ctx->eps->errest;
1288: sr->V = ctx->eps->V;
1289: }
1290: /* Check that the user did not modify subcomm matrices */
1291: EPSGetOperators(eps,&A,&B);
1292: PetscObjectStateGet((PetscObject)A,&Astate);
1293: PetscObjectGetId((PetscObject)A,&Aid);
1294: if (B) {
1295: PetscObjectStateGet((PetscObject)B,&Bstate);
1296: PetscObjectGetId((PetscObject)B,&Bid);
1297: }
1298: if (Astate!=ctx->Astate || (B && Bstate!=ctx->Bstate) || Aid!=ctx->Aid || (B && Bid!=ctx->Bid)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcomm matrices have been modified by user");
1299: /* Only with eigenvalues present in the interval ...*/
1300: if (sr->numEigs==0) {
1301: eps->reason = EPS_CONVERGED_TOL;
1302: return(0);
1303: }
1304: /* Array of pending shifts */
1305: sr->maxPend = 100; /* Initial size */
1306: sr->nPend = 0;
1307: PetscMalloc1(sr->maxPend,&sr->pending);
1308: PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1309: EPSCreateShift(eps,sr->int0,NULL,NULL);
1310: /* extract first shift */
1311: sr->sPrev = NULL;
1312: sr->sPres = sr->pending[--sr->nPend];
1313: sr->sPres->inertia = sr->inertia0;
1314: eps->target = sr->sPres->value;
1315: sr->s0 = sr->sPres;
1316: sr->indexEig = 0;
1317: /* Memory reservation for auxiliary variables */
1318: lds = PetscMin(eps->mpd,eps->ncv);
1319: PetscCalloc1(lds*lds,&sr->S);
1320: PetscMalloc1(eps->ncv,&sr->back);
1321: PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1322: for (i=0;i<sr->numEigs;i++) {
1323: sr->eigr[i] = 0.0;
1324: sr->eigi[i] = 0.0;
1325: sr->errest[i] = 0.0;
1326: sr->perm[i] = i;
1327: }
1328: /* Vectors for deflation */
1329: PetscMalloc1(sr->numEigs,&sr->idxDef);
1330: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1331: sr->indexEig = 0;
1332: /* Main loop */
1333: while (sr->sPres) {
1334: /* Search for deflation */
1335: EPSLookForDeflation(eps);
1336: /* KrylovSchur */
1337: EPSKrylovSchur_Slice(eps);
1339: EPSStoreEigenpairs(eps);
1340: /* Select new shift */
1341: if (!sr->sPres->comp[1]) {
1342: EPSGetNewShiftValue(eps,1,&newS);
1343: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1344: }
1345: if (!sr->sPres->comp[0]) {
1346: /* Completing earlier interval */
1347: EPSGetNewShiftValue(eps,0,&newS);
1348: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1349: }
1350: /* Preparing for a new search of values */
1351: EPSExtractShift(eps);
1352: }
1354: /* Updating eps values prior to exit */
1355: PetscFree(sr->S);
1356: PetscFree(sr->idxDef);
1357: PetscFree(sr->pending);
1358: PetscFree(sr->back);
1359: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1360: BVSetNumConstraints(sr->Vnext,0);
1361: BVDestroy(&eps->V);
1362: eps->V = sr->Vnext;
1363: eps->nconv = sr->indexEig;
1364: eps->reason = EPS_CONVERGED_TOL;
1365: eps->its = sr->itsKs;
1366: eps->nds = 0;
1367: if (sr->dir<0) {
1368: for (i=0;i<eps->nconv/2;i++) {
1369: ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1370: }
1371: }
1372: }
1373: return(0);
1374: }