1       SUBROUTINE DGEHRD( N, ILO, IHI, A, LDA, TAU, WORK, LWORK, INFO )
  2 *
  3 *  -- LAPACK routine (version 3.3.1)                                  --
  4 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  5 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  6 *  -- April 2009                                                      --
  7 *
  8 *     .. Scalar Arguments ..
  9       INTEGER            IHI, ILO, INFO, LDA, LWORK, N
 10 *     ..
 11 *     .. Array Arguments ..
 12       DOUBLE PRECISION  A( LDA, * ), TAU( * ), WORK( * )
 13 *     ..
 14 *
 15 *  Purpose
 16 *  =======
 17 *
 18 *  DGEHRD reduces a real general matrix A to upper Hessenberg form H by
 19 *  an orthogonal similarity transformation:  Q**T * A * Q = H .
 20 *
 21 *  Arguments
 22 *  =========
 23 *
 24 *  N       (input) INTEGER
 25 *          The order of the matrix A.  N >= 0.
 26 *
 27 *  ILO     (input) INTEGER
 28 *  IHI     (input) INTEGER
 29 *          It is assumed that A is already upper triangular in rows
 30 *          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally
 31 *          set by a previous call to DGEBAL; otherwise they should be
 32 *          set to 1 and N respectively. See Further Details.
 33 *          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0.
 34 *
 35 *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
 36 *          On entry, the N-by-N general matrix to be reduced.
 37 *          On exit, the upper triangle and the first subdiagonal of A
 38 *          are overwritten with the upper Hessenberg matrix H, and the
 39 *          elements below the first subdiagonal, with the array TAU,
 40 *          represent the orthogonal matrix Q as a product of elementary
 41 *          reflectors. See Further Details.
 42 *
 43 *  LDA     (input) INTEGER
 44 *          The leading dimension of the array A.  LDA >= max(1,N).
 45 *
 46 *  TAU     (output) DOUBLE PRECISION array, dimension (N-1)
 47 *          The scalar factors of the elementary reflectors (see Further
 48 *          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to
 49 *          zero.
 50 *
 51 *  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
 52 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 53 *
 54 *  LWORK   (input) INTEGER
 55 *          The length of the array WORK.  LWORK >= max(1,N).
 56 *          For optimum performance LWORK >= N*NB, where NB is the
 57 *          optimal blocksize.
 58 *
 59 *          If LWORK = -1, then a workspace query is assumed; the routine
 60 *          only calculates the optimal size of the WORK array, returns
 61 *          this value as the first entry of the WORK array, and no error
 62 *          message related to LWORK is issued by XERBLA.
 63 *
 64 *  INFO    (output) INTEGER
 65 *          = 0:  successful exit
 66 *          < 0:  if INFO = -i, the i-th argument had an illegal value.
 67 *
 68 *  Further Details
 69 *  ===============
 70 *
 71 *  The matrix Q is represented as a product of (ihi-ilo) elementary
 72 *  reflectors
 73 *
 74 *     Q = H(ilo) H(ilo+1) . . . H(ihi-1).
 75 *
 76 *  Each H(i) has the form
 77 *
 78 *     H(i) = I - tau * v * v**T
 79 *
 80 *  where tau is a real scalar, and v is a real vector with
 81 *  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on
 82 *  exit in A(i+2:ihi,i), and tau in TAU(i).
 83 *
 84 *  The contents of A are illustrated by the following example, with
 85 *  n = 7, ilo = 2 and ihi = 6:
 86 *
 87 *  on entry,                        on exit,
 88 *
 89 *  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a )
 90 *  (     a   a   a   a   a   a )    (      a   h   h   h   h   a )
 91 *  (     a   a   a   a   a   a )    (      h   h   h   h   h   h )
 92 *  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h )
 93 *  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h )
 94 *  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h )
 95 *  (                         a )    (                          a )
 96 *
 97 *  where a denotes an element of the original matrix A, h denotes a
 98 *  modified element of the upper Hessenberg matrix H, and vi denotes an
 99 *  element of the vector defining H(i).
100 *
101 *  This file is a slight modification of LAPACK-3.0's DGEHRD
102 *  subroutine incorporating improvements proposed by Quintana-Orti and
103 *  Van de Geijn (2006). (See DLAHR2.)
104 *
105 *  =====================================================================
106 *
107 *     .. Parameters ..
108       INTEGER            NBMAX, LDT
109       PARAMETER          ( NBMAX = 64, LDT = NBMAX+1 )
110       DOUBLE PRECISION  ZERO, ONE
111       PARAMETER          ( ZERO = 0.0D+0
112      $                     ONE = 1.0D+0 )
113 *     ..
114 *     .. Local Scalars ..
115       LOGICAL            LQUERY
116       INTEGER            I, IB, IINFO, IWS, J, LDWORK, LWKOPT, NB,
117      $                   NBMIN, NH, NX
118       DOUBLE PRECISION  EI
119 *     ..
120 *     .. Local Arrays ..
121       DOUBLE PRECISION  T( LDT, NBMAX )
122 *     ..
123 *     .. External Subroutines ..
124       EXTERNAL           DAXPY, DGEHD2, DGEMM, DLAHR2, DLARFB, DTRMM,
125      $                   XERBLA
126 *     ..
127 *     .. Intrinsic Functions ..
128       INTRINSIC          MAXMIN
129 *     ..
130 *     .. External Functions ..
131       INTEGER            ILAENV
132       EXTERNAL           ILAENV
133 *     ..
134 *     .. Executable Statements ..
135 *
136 *     Test the input parameters
137 *
138       INFO = 0
139       NB = MIN( NBMAX, ILAENV( 1'DGEHRD'' ', N, ILO, IHI, -1 ) )
140       LWKOPT = N*NB
141       WORK( 1 ) = LWKOPT
142       LQUERY = ( LWORK.EQ.-1 )
143       IF( N.LT.0 ) THEN
144          INFO = -1
145       ELSE IF( ILO.LT.1 .OR. ILO.GT.MAX1, N ) ) THEN
146          INFO = -2
147       ELSE IF( IHI.LT.MIN( ILO, N ) .OR. IHI.GT.N ) THEN
148          INFO = -3
149       ELSE IF( LDA.LT.MAX1, N ) ) THEN
150          INFO = -5
151       ELSE IF( LWORK.LT.MAX1, N ) .AND. .NOT.LQUERY ) THEN
152          INFO = -8
153       END IF
154       IF( INFO.NE.0 ) THEN
155          CALL XERBLA( 'DGEHRD'-INFO )
156          RETURN
157       ELSE IF( LQUERY ) THEN
158          RETURN
159       END IF
160 *
161 *     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero
162 *
163       DO 10 I = 1, ILO - 1
164          TAU( I ) = ZERO
165    10 CONTINUE
166       DO 20 I = MAX1, IHI ), N - 1
167          TAU( I ) = ZERO
168    20 CONTINUE
169 *
170 *     Quick return if possible
171 *
172       NH = IHI - ILO + 1
173       IF( NH.LE.1 ) THEN
174          WORK( 1 ) = 1
175          RETURN
176       END IF
177 *
178 *     Determine the block size
179 *
180       NB = MIN( NBMAX, ILAENV( 1'DGEHRD'' ', N, ILO, IHI, -1 ) )
181       NBMIN = 2
182       IWS = 1
183       IF( NB.GT.1 .AND. NB.LT.NH ) THEN
184 *
185 *        Determine when to cross over from blocked to unblocked code
186 *        (last block is always handled by unblocked code)
187 *
188          NX = MAX( NB, ILAENV( 3'DGEHRD'' ', N, ILO, IHI, -1 ) )
189          IF( NX.LT.NH ) THEN
190 *
191 *           Determine if workspace is large enough for blocked code
192 *
193             IWS = N*NB
194             IF( LWORK.LT.IWS ) THEN
195 *
196 *              Not enough workspace to use optimal NB:  determine the
197 *              minimum value of NB, and reduce NB or force use of
198 *              unblocked code
199 *
200                NBMIN = MAX2, ILAENV( 2'DGEHRD'' ', N, ILO, IHI,
201      $                 -1 ) )
202                IF( LWORK.GE.N*NBMIN ) THEN
203                   NB = LWORK / N
204                ELSE
205                   NB = 1
206                END IF
207             END IF
208          END IF
209       END IF
210       LDWORK = N
211 *
212 
213       IF( NB.LT.NBMIN .OR. NB.GE.NH ) THEN
214 *
215 *        Use unblocked code below
216 *
217          I = ILO
218 *
219       ELSE
220 *
221 *        Use blocked code
222 *
223          DO 40 I = ILO, IHI - 1 - NX, NB
224             IB = MIN( NB, IHI-I )
225 *
226 *           Reduce columns i:i+ib-1 to Hessenberg form, returning the
227 *           matrices V and T of the block reflector H = I - V*T*V**T
228 *           which performs the reduction, and also the matrix Y = A*V*T
229 *
230             CALL DLAHR2( IHI, I, IB, A( 1, I ), LDA, TAU( I ), T, LDT,
231      $                   WORK, LDWORK )
232 *
233 *           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the
234 *           right, computing  A := A - Y * V**T. V(i+ib,ib-1) must be set
235 *           to 1
236 *
237             EI = A( I+IB, I+IB-1 )
238             A( I+IB, I+IB-1 ) = ONE
239             CALL DGEMM( 'No transpose''Transpose'
240      $                  IHI, IHI-I-IB+1,
241      $                  IB, -ONE, WORK, LDWORK, A( I+IB, I ), LDA, ONE,
242      $                  A( 1, I+IB ), LDA )
243             A( I+IB, I+IB-1 ) = EI
244 *
245 *           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the
246 *           right
247 *
248             CALL DTRMM( 'Right''Lower''Transpose',
249      $                  'Unit', I, IB-1,
250      $                  ONE, A( I+1, I ), LDA, WORK, LDWORK )
251             DO 30 J = 0, IB-2
252                CALL DAXPY( I, -ONE, WORK( LDWORK*J+1 ), 1,
253      $                     A( 1, I+J+1 ), 1 )
254    30       CONTINUE
255 *
256 *           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the
257 *           left
258 *
259             CALL DLARFB( 'Left''Transpose''Forward',
260      $                   'Columnwise',
261      $                   IHI-I, N-I-IB+1, IB, A( I+1, I ), LDA, T, LDT,
262      $                   A( I+1, I+IB ), LDA, WORK, LDWORK )
263    40    CONTINUE
264       END IF
265 *
266 *     Use unblocked code to reduce the rest of the matrix
267 *
268       CALL DGEHD2( N, I, IHI, A, LDA, TAU, WORK, IINFO )
269       WORK( 1 ) = IWS
270 *
271       RETURN
272 *
273 *     End of DGEHRD
274 *
275       END