1       SUBROUTINE ZTZRZF( M, N, A, LDA, TAU, WORK, LWORK, INFO )
  2 *
  3 *  -- LAPACK routine (version 3.3.1) --
  4 *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  5 *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  6 *  -- April 2011                                                      --
  7 * @precisions normal z -> s d c
  8 *
  9 *     .. Scalar Arguments ..
 10       INTEGER            INFO, LDA, LWORK, M, N
 11 *     ..
 12 *     .. Array Arguments ..
 13       COMPLEX*16         A( LDA, * ), TAU( * ), WORK( * )
 14 *     ..
 15 *
 16 *  Purpose
 17 *  =======
 18 *
 19 *  ZTZRZF reduces the M-by-N ( M<=N ) complex upper trapezoidal matrix A
 20 *  to upper triangular form by means of unitary transformations.
 21 *
 22 *  The upper trapezoidal matrix A is factored as
 23 *
 24 *     A = ( R  0 ) * Z,
 25 *
 26 *  where Z is an N-by-N unitary matrix and R is an M-by-M upper
 27 *  triangular matrix.
 28 *
 29 *  Arguments
 30 *  =========
 31 *
 32 *  M       (input) INTEGER
 33 *          The number of rows of the matrix A.  M >= 0.
 34 *
 35 *  N       (input) INTEGER
 36 *          The number of columns of the matrix A.  N >= M.
 37 *
 38 *  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
 39 *          On entry, the leading M-by-N upper trapezoidal part of the
 40 *          array A must contain the matrix to be factorized.
 41 *          On exit, the leading M-by-M upper triangular part of A
 42 *          contains the upper triangular matrix R, and elements M+1 to
 43 *          N of the first M rows of A, with the array TAU, represent the
 44 *          unitary matrix Z as a product of M elementary reflectors.
 45 *
 46 *  LDA     (input) INTEGER
 47 *          The leading dimension of the array A.  LDA >= max(1,M).
 48 *
 49 *  TAU     (output) COMPLEX*16 array, dimension (M)
 50 *          The scalar factors of the elementary reflectors.
 51 *
 52 *  WORK    (workspace/output) COMPLEX*16 array, dimension (MAX(1,LWORK))
 53 *          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
 54 *
 55 *  LWORK   (input) INTEGER
 56 *          The dimension of the array WORK.  LWORK >= max(1,M).
 57 *          For optimum performance LWORK >= M*NB, where NB is
 58 *          the optimal blocksize.
 59 *
 60 *          If LWORK = -1, then a workspace query is assumed; the routine
 61 *          only calculates the optimal size of the WORK array, returns
 62 *          this value as the first entry of the WORK array, and no error
 63 *          message related to LWORK is issued by XERBLA.
 64 *
 65 *  INFO    (output) INTEGER
 66 *          = 0:  successful exit
 67 *          < 0:  if INFO = -i, the i-th argument had an illegal value
 68 *
 69 *  Further Details
 70 *  ===============
 71 *
 72 *  Based on contributions by
 73 *    A. Petitet, Computer Science Dept., Univ. of Tenn., Knoxville, USA
 74 *
 75 *  The factorization is obtained by Householder's method.  The kth
 76 *  transformation matrix, Z( k ), which is used to introduce zeros into
 77 *  the ( m - k + 1 )th row of A, is given in the form
 78 *
 79 *     Z( k ) = ( I     0   ),
 80 *              ( 0  T( k ) )
 81 *
 82 *  where
 83 *
 84 *     T( k ) = I - tau*u( k )*u( k )**H,   u( k ) = (   1    ),
 85 *                                                 (   0    )
 86 *                                                 ( z( k ) )
 87 *
 88 *  tau is a scalar and z( k ) is an ( n - m ) element vector.
 89 *  tau and z( k ) are chosen to annihilate the elements of the kth row
 90 *  of X.
 91 *
 92 *  The scalar tau is returned in the kth element of TAU and the vector
 93 *  u( k ) in the kth row of A, such that the elements of z( k ) are
 94 *  in  a( k, m + 1 ), ..., a( k, n ). The elements of R are returned in
 95 *  the upper triangular part of A.
 96 *
 97 *  Z is given by
 98 *
 99 *     Z =  Z( 1 ) * Z( 2 ) * ... * Z( m ).
100 *
101 *  =====================================================================
102 *
103 *     .. Parameters ..
104       COMPLEX*16         ZERO
105       PARAMETER          ( ZERO = ( 0.0D+00.0D+0 ) )
106 *     ..
107 *     .. Local Scalars ..
108       LOGICAL            LQUERY
109       INTEGER            I, IB, IWS, KI, KK, LDWORK, LWKMIN, LWKOPT,
110      $                   M1, MU, NB, NBMIN, NX
111 *     ..
112 *     .. External Subroutines ..
113       EXTERNAL           XERBLA, ZLARZB, ZLARZT, ZLATRZ
114 *     ..
115 *     .. Intrinsic Functions ..
116       INTRINSIC          MAXMIN
117 *     ..
118 *     .. External Functions ..
119       INTEGER            ILAENV
120       EXTERNAL           ILAENV
121 *     ..
122 *     .. Executable Statements ..
123 *
124 *     Test the input arguments
125 *
126       INFO = 0
127       LQUERY = ( LWORK.EQ.-1 )
128       IF( M.LT.0 ) THEN
129          INFO = -1
130       ELSE IF( N.LT.M ) THEN
131          INFO = -2
132       ELSE IF( LDA.LT.MAX1, M ) ) THEN
133          INFO = -4
134       END IF
135 *
136       IF( INFO.EQ.0 ) THEN
137          IF( M.EQ.0 .OR. M.EQ.N ) THEN
138             LWKOPT = 1
139             LWKMIN = 1
140          ELSE
141 *
142 *           Determine the block size.
143 *
144             NB = ILAENV( 1'ZGERQF'' ', M, N, -1-1 )
145             LWKOPT = M*NB
146             LWKMIN = MAX1, M )
147          END IF
148          WORK( 1 ) = LWKOPT
149 *
150          IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
151             INFO = -7
152          END IF
153       END IF
154 *
155       IF( INFO.NE.0 ) THEN
156          CALL XERBLA( 'ZTZRZF'-INFO )
157          RETURN
158       ELSE IF( LQUERY ) THEN
159          RETURN
160       END IF
161 *
162 *     Quick return if possible
163 *
164       IF( M.EQ.0 ) THEN
165          RETURN
166       ELSE IF( M.EQ.N ) THEN
167          DO 10 I = 1, N
168             TAU( I ) = ZERO
169    10    CONTINUE
170          RETURN
171       END IF
172 *
173       NBMIN = 2
174       NX = 1
175       IWS = M
176       IF( NB.GT.1 .AND. NB.LT.M ) THEN
177 *
178 *        Determine when to cross over from blocked to unblocked code.
179 *
180          NX = MAX0, ILAENV( 3'ZGERQF'' ', M, N, -1-1 ) )
181          IF( NX.LT.M ) THEN
182 *
183 *           Determine if workspace is large enough for blocked code.
184 *
185             LDWORK = M
186             IWS = LDWORK*NB
187             IF( LWORK.LT.IWS ) THEN
188 *
189 *              Not enough workspace to use optimal NB:  reduce NB and
190 *              determine the minimum value of NB.
191 *
192                NB = LWORK / LDWORK
193                NBMIN = MAX2, ILAENV( 2'ZGERQF'' ', M, N, -1,
194      $                 -1 ) )
195             END IF
196          END IF
197       END IF
198 *
199       IF( NB.GE.NBMIN .AND. NB.LT..AND. NX.LT.M ) THEN
200 *
201 *        Use blocked code initially.
202 *        The last kk rows are handled by the block method.
203 *
204          M1 = MIN( M+1, N )
205          KI = ( ( M-NX-1 ) / NB )*NB
206          KK = MIN( M, KI+NB )
207 *
208          DO 20 I = M - KK + KI + 1, M - KK + 1-NB
209             IB = MIN( M-I+1, NB )
210 *
211 *           Compute the TZ factorization of the current block
212 *           A(i:i+ib-1,i:n)
213 *
214             CALL ZLATRZ( IB, N-I+1, N-M, A( I, I ), LDA, TAU( I ),
215      $                   WORK )
216             IF( I.GT.1 ) THEN
217 *
218 *              Form the triangular factor of the block reflector
219 *              H = H(i+ib-1) . . . H(i+1) H(i)
220 *
221                CALL ZLARZT( 'Backward''Rowwise', N-M, IB, A( I, M1 ),
222      $                      LDA, TAU( I ), WORK, LDWORK )
223 *
224 *              Apply H to A(1:i-1,i:n) from the right
225 *
226                CALL ZLARZB( 'Right''No transpose''Backward',
227      $                      'Rowwise', I-1, N-I+1, IB, N-M, A( I, M1 ),
228      $                      LDA, WORK, LDWORK, A( 1, I ), LDA,
229      $                      WORK( IB+1 ), LDWORK )
230             END IF
231    20    CONTINUE
232          MU = I + NB - 1
233       ELSE
234          MU = M
235       END IF
236 *
237 *     Use unblocked code to factor the last or only block
238 *
239       IF( MU.GT.0 )
240      $   CALL ZLATRZ( MU, N, N-M, A, LDA, TAU, WORK )
241 *
242       WORK( 1 ) = LWKOPT
243 *
244       RETURN
245 *
246 *     End of ZTZRZF
247 *
248       END