lapack-3.3.1/SRC/dgejsv.f

        1
        2
        3
        4
        5
        6
        7
        8
        9
       10
       11
       12
       13
       14
       15
       16
       17
       18
       19
       20
       21
       22
       23
       24
       25
       26
       27
       28
       29
       30
       31
       32
       33
       34
       35
       36
       37
       38
       39
       40
       41
       42
       43
       44
       45
       46
       47
       48
       49
       50
       51
       52
       53
       54
       55
       56
       57
       58
       59
       60
       61
       62
       63
       64
       65
       66
       67
       68
       69
       70
       71
       72
       73
       74
       75
       76
       77
       78
       79
       80
       81
       82
       83
       84
       85
       86
       87
       88
       89
       90
       91
       92
       93
       94
       95
       96
       97
       98
       99
      100
      101
      102
      103
      104
      105
      106
      107
      108
      109
      110
      111
      112
      113
      114
      115
      116
      117
      118
      119
      120
      121
      122
      123
      124
      125
      126
      127
      128
      129
      130
      131
      132
      133
      134
      135
      136
      137
      138
      139
      140
      141
      142
      143
      144
      145
      146
      147
      148
      149
      150
      151
      152
      153
      154
      155
      156
      157
      158
      159
      160
      161
      162
      163
      164
      165
      166
      167
      168
      169
      170
      171
      172
      173
      174
      175
      176
      177
      178
      179
      180
      181
      182
      183
      184
      185
      186
      187
      188
      189
      190
      191
      192
      193
      194
      195
      196
      197
      198
      199
      200
      201
      202
      203
      204
      205
      206
      207
      208
      209
      210
      211
      212
      213
      214
      215
      216
      217
      218
      219
      220
      221
      222
      223
      224
      225
      226
      227
      228
      229
      230
      231
      232
      233
      234
      235
      236
      237
      238
      239
      240
      241
      242
      243
      244
      245
      246
      247
      248
      249
      250
      251
      252
      253
      254
      255
      256
      257
      258
      259
      260
      261
      262
      263
      264
      265
      266
      267
      268
      269
      270
      271
      272
      273
      274
      275
      276
      277
      278
      279
      280
      281
      282
      283
      284
      285
      286
      287
      288
      289
      290
      291
      292
      293
      294
      295
      296
      297
      298
      299
      300
      301
      302
      303
      304
      305
      306
      307
      308
      309
      310
      311
      312
      313
      314
      315
      316
      317
      318
      319
      320
      321
      322
      323
      324
      325
      326
      327
      328
      329
      330
      331
      332
      333
      334
      335
      336
      337
      338
      339
      340
      341
      342
      343
      344
      345
      346
      347
      348
      349
      350
      351
      352
      353
      354
      355
      356
      357
      358
      359
      360
      361
      362
      363
      364
      365
      366
      367
      368
      369
      370
      371
      372
      373
      374
      375
      376
      377
      378
      379
      380
      381
      382
      383
      384
      385
      386
      387
      388
      389
      390
      391
      392
      393
      394
      395
      396
      397
      398
      399
      400
      401
      402
      403
      404
      405
      406
      407
      408
      409
      410
      411
      412
      413
      414
      415
      416
      417
      418
      419
      420
      421
      422
      423
      424
      425
      426
      427
      428
      429
      430
      431
      432
      433
      434
      435
      436
      437
      438
      439
      440
      441
      442
      443
      444
      445
      446
      447
      448
      449
      450
      451
      452
      453
      454
      455
      456
      457
      458
      459
      460
      461
      462
      463
      464
      465
      466
      467
      468
      469
      470
      471
      472
      473
      474
      475
      476
      477
      478
      479
      480
      481
      482
      483
      484
      485
      486
      487
      488
      489
      490
      491
      492
      493
      494
      495
      496
      497
      498
      499
      500
      501
      502
      503
      504
      505
      506
      507
      508
      509
      510
      511
      512
      513
      514
      515
      516
      517
      518
      519
      520
      521
      522
      523
      524
      525
      526
      527
      528
      529
      530
      531
      532
      533
      534
      535
      536
      537
      538
      539
      540
      541
      542
      543
      544
      545
      546
      547
      548
      549
      550
      551
      552
      553
      554
      555
      556
      557
      558
      559
      560
      561
      562
      563
      564
      565
      566
      567
      568
      569
      570
      571
      572
      573
      574
      575
      576
      577
      578
      579
      580
      581
      582
      583
      584
      585
      586
      587
      588
      589
      590
      591
      592
      593
      594
      595
      596
      597
      598
      599
      600
      601
      602
      603
      604
      605
      606
      607
      608
      609
      610
      611
      612
      613
      614
      615
      616
      617
      618
      619
      620
      621
      622
      623
      624
      625
      626
      627
      628
      629
      630
      631
      632
      633
      634
      635
      636
      637
      638
      639
      640
      641
      642
      643
      644
      645
      646
      647
      648
      649
      650
      651
      652
      653
      654
      655
      656
      657
      658
      659
      660
      661
      662
      663
      664
      665
      666
      667
      668
      669
      670
      671
      672
      673
      674
      675
      676
      677
      678
      679
      680
      681
      682
      683
      684
      685
      686
      687
      688
      689
      690
      691
      692
      693
      694
      695
      696
      697
      698
      699
      700
      701
      702
      703
      704
      705
      706
      707
      708
      709
      710
      711
      712
      713
      714
      715
      716
      717
      718
      719
      720
      721
      722
      723
      724
      725
      726
      727
      728
      729
      730
      731
      732
      733
      734
      735
      736
      737
      738
      739
      740
      741
      742
      743
      744
      745
      746
      747
      748
      749
      750
      751
      752
      753
      754
      755
      756
      757
      758
      759
      760
      761
      762
      763
      764
      765
      766
      767
      768
      769
      770
      771
      772
      773
      774
      775
      776
      777
      778
      779
      780
      781
      782
      783
      784
      785
      786
      787
      788
      789
      790
      791
      792
      793
      794
      795
      796
      797
      798
      799
      800
      801
      802
      803
      804
      805
      806
      807
      808
      809
      810
      811
      812
      813
      814
      815
      816
      817
      818
      819
      820
      821
      822
      823
      824
      825
      826
      827
      828
      829
      830
      831
      832
      833
      834
      835
      836
      837
      838
      839
      840
      841
      842
      843
      844
      845
      846
      847
      848
      849
      850
      851
      852
      853
      854
      855
      856
      857
      858
      859
      860
      861
      862
      863
      864
      865
      866
      867
      868
      869
      870
      871
      872
      873
      874
      875
      876
      877
      878
      879
      880
      881
      882
      883
      884
      885
      886
      887
      888
      889
      890
      891
      892
      893
      894
      895
      896
      897
      898
      899
      900
      901
      902
      903
      904
      905
      906
      907
      908
      909
      910
      911
      912
      913
      914
      915
      916
      917
      918
      919
      920
      921
      922
      923
      924
      925
      926
      927
      928
      929
      930
      931
      932
      933
      934
      935
      936
      937
      938
      939
      940
      941
      942
      943
      944
      945
      946
      947
      948
      949
      950
      951
      952
      953
      954
      955
      956
      957
      958
      959
      960
      961
      962
      963
      964
      965
      966
      967
      968
      969
      970
      971
      972
      973
      974
      975
      976
      977
      978
      979
      980
      981
      982
      983
      984
      985
      986
      987
      988
      989
      990
      991
      992
      993
      994
      995
      996
      997
      998
      999
     1000
     1001
     1002
     1003
     1004
     1005
     1006
     1007
     1008
     1009
     1010
     1011
     1012
     1013
     1014
     1015
     1016
     1017
     1018
     1019
     1020
     1021
     1022
     1023
     1024
     1025
     1026
     1027
     1028
     1029
     1030
     1031
     1032
     1033
     1034
     1035
     1036
     1037
     1038
     1039
     1040
     1041
     1042
     1043
     1044
     1045
     1046
     1047
     1048
     1049
     1050
     1051
     1052
     1053
     1054
     1055
     1056
     1057
     1058
     1059
     1060
     1061
     1062
     1063
     1064
     1065
     1066
     1067
     1068
     1069
     1070
     1071
     1072
     1073
     1074
     1075
     1076
     1077
     1078
     1079
     1080
     1081
     1082
     1083
     1084
     1085
     1086
     1087
     1088
     1089
     1090
     1091
     1092
     1093
     1094
     1095
     1096
     1097
     1098
     1099
     1100
     1101
     1102
     1103
     1104
     1105
     1106
     1107
     1108
     1109
     1110
     1111
     1112
     1113
     1114
     1115
     1116
     1117
     1118
     1119
     1120
     1121
     1122
     1123
     1124
     1125
     1126
     1127
     1128
     1129
     1130
     1131
     1132
     1133
     1134
     1135
     1136
     1137
     1138
     1139
     1140
     1141
     1142
     1143
     1144
     1145
     1146
     1147
     1148
     1149
     1150
     1151
     1152
     1153
     1154
     1155
     1156
     1157
     1158
     1159
     1160
     1161
     1162
     1163
     1164
     1165
     1166
     1167
     1168
     1169
     1170
     1171
     1172
     1173
     1174
     1175
     1176
     1177
     1178
     1179
     1180
     1181
     1182
     1183
     1184
     1185
     1186
     1187
     1188
     1189
     1190
     1191
     1192
     1193
     1194
     1195
     1196
     1197
     1198
     1199
     1200
     1201
     1202
     1203
     1204
     1205
     1206
     1207
     1208
     1209
     1210
     1211
     1212
     1213
     1214
     1215
     1216
     1217
     1218
     1219
     1220
     1221
     1222
     1223
     1224
     1225
     1226
     1227
     1228
     1229
     1230
     1231
     1232
     1233
     1234
     1235
     1236
     1237
     1238
     1239
     1240
     1241
     1242
     1243
     1244
     1245
     1246
     1247
     1248
     1249
     1250
     1251
     1252
     1253
     1254
     1255
     1256
     1257
     1258
     1259
     1260
     1261
     1262
     1263
     1264
     1265
     1266
     1267
     1268
     1269
     1270
     1271
     1272
     1273
     1274
     1275
     1276
     1277
     1278
     1279
     1280
     1281
     1282
     1283
     1284
     1285
     1286
     1287
     1288
     1289
     1290
     1291
     1292
     1293
     1294
     1295
     1296
     1297
     1298
     1299
     1300
     1301
     1302
     1303
     1304
     1305
     1306
     1307
     1308
     1309
     1310
     1311
     1312
     1313
     1314
     1315
     1316
     1317
     1318
     1319
     1320
     1321
     1322
     1323
     1324
     1325
     1326
     1327
     1328
     1329
     1330
     1331
     1332
     1333
     1334
     1335
     1336
     1337
     1338
     1339
     1340
     1341
     1342
     1343
     1344
     1345
     1346
     1347
     1348
     1349
     1350
     1351
     1352
     1353
     1354
     1355
     1356
     1357
     1358
     1359
     1360
     1361
     1362
     1363
     1364
     1365
     1366
     1367
     1368
     1369
     1370
     1371
     1372
     1373
     1374
     1375
     1376
     1377
     1378
     1379
     1380
     1381
     1382
     1383
     1384
     1385
     1386
     1387
     1388
     1389
     1390
     1391
     1392
     1393
     1394
     1395
     1396
     1397
     1398
     1399
     1400
     1401
     1402
     1403
     1404
     1405
     1406
     1407
     1408
     1409
     1410
     1411
     1412
     1413
     1414
     1415
     1416
     1417
     1418
     1419
     1420
     1421
     1422
     1423
     1424
     1425
     1426
     1427
     1428
     1429
     1430
     1431
     1432
     1433
     1434
     1435
     1436
     1437
     1438
     1439
     1440
     1441
     1442
     1443
     1444
     1445
     1446
     1447
     1448
     1449
     1450
     1451
     1452
     1453
     1454
     1455
     1456
     1457
     1458
     1459
     1460
     1461
     1462
     1463
     1464
     1465
     1466
     1467
     1468
     1469
     1470
     1471
     1472
     1473
     1474
     1475
     1476
     1477
     1478
     1479
     1480
     1481
     1482
     1483
     1484
     1485
     1486
     1487
     1488
     1489
     1490
     1491
     1492
     1493
     1494
     1495
     1496
     1497
     1498
     1499
     1500
     1501
     1502
     1503
     1504
     1505
     1506
     1507
     1508
     1509
     1510
     1511
     1512
     1513
     1514
     1515
     1516
     1517
     1518
     1519
     1520
     1521
     1522
     1523
     1524
     1525
     1526
     1527
     1528
     1529
     1530
     1531
     1532
     1533
     1534
     1535
     1536
     1537
     1538
     1539
     1540
     1541
     1542
     1543
     1544
     1545
     1546
     1547
     1548
     1549
     1550
     1551
     1552
     1553
     1554
     1555
     1556
     1557
     1558
     1559
     1560
     1561
     1562
     1563
     1564
     1565
     1566
     1567
     1568
     1569
     1570
     1571
     1572
     1573
     1574
     1575
     1576
     1577
     1578
     1579
     1580
     1581
     1582
     1583
     1584
     1585
     1586
     1587
     1588
     1589
     1590
     1591
     1592
     1593
     1594
     1595
     1596
     1597
     1598
     1599
     1600
     1601
     1602
     1603
     1604
     1605
     1606
     1607
     1608
     1609
     1610
     1611
     1612
     1613
     1614
     1615
     1616
     1617
     1618
     1619
     1620
     1621
     1622
     1623
     1624
     1625
     1626
     1627
     1628
     1629
     1630
     1631
     1632
     1633
     1634
     1635
     1636
     1637
     1638
     1639
     1640
     1641
     1642
     1643
     1644
     1645
     1646
     1647
     1648
     1649
     1650
     1651
     1652
     1653
     1654
     1655
     1656
     1657
     1658
     1659
     1660
     1661
     1662
     1663
     1664
     1665
     1666
     1667
     1668

      SUBROUTINE DGEJSV( JOBA, JOBU, JOBV, JOBR, JOBT, JOBP,
     $                   M, N, A, LDA, SVA, U, LDU, V, LDV,
     $                   WORK, LWORK, IWORK, INFO )
*
*  -- LAPACK routine (version 3.3.1)                                    --
*
*  -- Contributed by Zlatko Drmac of the University of Zagreb and     --
*  -- Kresimir Veselic of the Fernuniversitaet Hagen                  --
*  -- April 2011                                                      --
*
*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* This routine is also part of SIGMA (version 1.23, October 23. 2008.)
* SIGMA is a library of algorithms for highly accurate algorithms for
* computation of SVD, PSVD, QSVD, (H,K)-SVD, and for solution of the
* eigenvalue problems Hx = lambda M x, H M x = lambda x with H, M > 0.
*
*     .. Scalar Arguments ..
      IMPLICIT    NONE
      INTEGER     INFO, LDA, LDU, LDV, LWORK, M, N
*     ..
*     .. Array Arguments ..
      DOUBLE PRECISION A( LDA, * ), SVA( N ), U( LDU, * ), V( LDV, * ),
     $            WORK( LWORK )
      INTEGER     IWORK( * )
      CHARACTER*1 JOBA, JOBP, JOBR, JOBT, JOBU, JOBV
*     ..
*
*  Purpose
*  =======
*
*  DGEJSV computes the singular value decomposition (SVD) of a real M-by-N
*  matrix [A], where M >= N. The SVD of [A] is written as
*
*               [A] = [U] * [SIGMA] * [V]^t,
*
*  where [SIGMA] is an N-by-N (M-by-N) matrix which is zero except for its N
*  diagonal elements, [U] is an M-by-N (or M-by-M) orthonormal matrix, and
*  [V] is an N-by-N orthogonal matrix. The diagonal elements of [SIGMA] are
*  the singular values of [A]. The columns of [U] and [V] are the left and
*  the right singular vectors of [A], respectively. The matrices [U] and [V]
*  are computed and stored in the arrays U and V, respectively. The diagonal
*  of [SIGMA] is computed and stored in the array SVA.
*
*  Arguments
*  =========
*
*  JOBA    (input) CHARACTER*1
*        Specifies the level of accuracy:
*       = 'C': This option works well (high relative accuracy) if A = B * D,
*             with well-conditioned B and arbitrary diagonal matrix D.
*             The accuracy cannot be spoiled by COLUMN scaling. The
*             accuracy of the computed output depends on the condition of
*             B, and the procedure aims at the best theoretical accuracy.
*             The relative error max_{i=1:N}|d sigma_i| / sigma_i is
*             bounded by f(M,N)*epsilon* cond(B), independent of D.
*             The input matrix is preprocessed with the QRF with column
*             pivoting. This initial preprocessing and preconditioning by
*             a rank revealing QR factorization is common for all values of
*             JOBA. Additional actions are specified as follows:
*       = 'E': Computation as with 'C' with an additional estimate of the
*             condition number of B. It provides a realistic error bound.
*       = 'F': If A = D1 * C * D2 with ill-conditioned diagonal scalings
*             D1, D2, and well-conditioned matrix C, this option gives
*             higher accuracy than the 'C' option. If the structure of the
*             input matrix is not known, and relative accuracy is
*             desirable, then this option is advisable. The input matrix A
*             is preprocessed with QR factorization with FULL (row and
*             column) pivoting.
*       = 'G'  Computation as with 'F' with an additional estimate of the
*             condition number of B, where A=D*B. If A has heavily weighted
*             rows, then using this condition number gives too pessimistic
*             error bound.
*       = 'A': Small singular values are the noise and the matrix is treated
*             as numerically rank defficient. The error in the computed
*             singular values is bounded by f(m,n)*epsilon*||A||.
*             The computed SVD A = U * S * V^t restores A up to
*             f(m,n)*epsilon*||A||.
*             This gives the procedure the licence to discard (set to zero)
*             all singular values below N*epsilon*||A||.
*       = 'R': Similar as in 'A'. Rank revealing property of the initial
*             QR factorization is used do reveal (using triangular factor)
*             a gap sigma_{r+1} < epsilon * sigma_r in which case the
*             numerical RANK is declared to be r. The SVD is computed with
*             absolute error bounds, but more accurately than with 'A'.
*
*  JOBU    (input) CHARACTER*1
*        Specifies whether to compute the columns of U:
*       = 'U': N columns of U are returned in the array U.
*       = 'F': full set of M left sing. vectors is returned in the array U.
*       = 'W': U may be used as workspace of length M*N. See the description
*             of U.
*       = 'N': U is not computed.
*
*  JOBV    (input) CHARACTER*1
*        Specifies whether to compute the matrix V:
*       = 'V': N columns of V are returned in the array V; Jacobi rotations
*             are not explicitly accumulated.
*       = 'J': N columns of V are returned in the array V, but they are
*             computed as the product of Jacobi rotations. This option is
*             allowed only if JOBU .NE. 'N', i.e. in computing the full SVD.
*       = 'W': V may be used as workspace of length N*N. See the description
*             of V.
*       = 'N': V is not computed.
*
*  JOBR    (input) CHARACTER*1
*        Specifies the RANGE for the singular values. Issues the licence to
*        set to zero small positive singular values if they are outside
*        specified range. If A .NE. 0 is scaled so that the largest singular
*        value of c*A is around DSQRT(BIG), BIG=SLAMCH('O'), then JOBR issues
*        the licence to kill columns of A whose norm in c*A is less than
*        DSQRT(SFMIN) (for JOBR.EQ.'R'), or less than SMALL=SFMIN/EPSLN,
*        where SFMIN=SLAMCH('S'), EPSLN=SLAMCH('E').
*       = 'N': Do not kill small columns of c*A. This option assumes that
*             BLAS and QR factorizations and triangular solvers are
*             implemented to work in that range. If the condition of A
*             is greater than BIG, use DGESVJ.
*       = 'R': RESTRICTED range for sigma(c*A) is [DSQRT(SFMIN), DSQRT(BIG)]
*             (roughly, as described above). This option is recommended.
*                                            ~~~~~~~~~~~~~~~~~~~~~~~~~~~
*        For computing the singular values in the FULL range [SFMIN,BIG]
*        use DGESVJ.
*
*  JOBT    (input) CHARACTER*1
*        If the matrix is square then the procedure may determine to use
*        transposed A if A^t seems to be better with respect to convergence.
*        If the matrix is not square, JOBT is ignored. This is subject to
*        changes in the future.
*        The decision is based on two values of entropy over the adjoint
*        orbit of A^t * A. See the descriptions of WORK(6) and WORK(7).
*       = 'T': transpose if entropy test indicates possibly faster
*        convergence of Jacobi process if A^t is taken as input. If A is
*        replaced with A^t, then the row pivoting is included automatically.
*       = 'N': do not speculate.
*        This option can be used to compute only the singular values, or the
*        full SVD (U, SIGMA and V). For only one set of singular vectors
*        (U or V), the caller should provide both U and V, as one of the
*        matrices is used as workspace if the matrix A is transposed.
*        The implementer can easily remove this constraint and make the
*        code more complicated. See the descriptions of U and V.
*
*  JOBP    (input) CHARACTER*1
*        Issues the licence to introduce structured perturbations to drown
*        denormalized numbers. This licence should be active if the
*        denormals are poorly implemented, causing slow computation,
*        especially in cases of fast convergence (!). For details see [1,2].
*        For the sake of simplicity, this perturbations are included only
*        when the full SVD or only the singular values are requested. The
*        implementer/user can easily add the perturbation for the cases of
*        computing one set of singular vectors.
*       = 'P': introduce perturbation
*       = 'N': do not perturb
*
*  M       (input) INTEGER
*         The number of rows of the input matrix A.  M >= 0.
*
*  N       (input) INTEGER
*         The number of columns of the input matrix A. M >= N >= 0.
*
*  A       (input/workspace) DOUBLE PRECISION array, dimension (LDA,N)
*          On entry, the M-by-N matrix A.
*
*  LDA     (input) INTEGER
*          The leading dimension of the array A.  LDA >= max(1,M).
*
*  SVA     (workspace/output) DOUBLE PRECISION array, dimension (N)
*          On exit,
*          - For WORK(1)/WORK(2) = ONE: The singular values of A. During the
*            computation SVA contains Euclidean column norms of the
*            iterated matrices in the array A.
*          - For WORK(1) .NE. WORK(2): The singular values of A are
*            (WORK(1)/WORK(2)) * SVA(1:N). This factored form is used if
*            sigma_max(A) overflows or if small singular values have been
*            saved from underflow by scaling the input matrix A.
*          - If JOBR='R' then some of the singular values may be returned
*            as exact zeros obtained by "set to zero" because they are
*            below the numerical rank threshold or are denormalized numbers.
*
*  U       (workspace/output) DOUBLE PRECISION array, dimension ( LDU, N )
*          If JOBU = 'U', then U contains on exit the M-by-N matrix of
*                         the left singular vectors.
*          If JOBU = 'F', then U contains on exit the M-by-M matrix of
*                         the left singular vectors, including an ONB
*                         of the orthogonal complement of the Range(A).
*          If JOBU = 'W'  .AND. (JOBV.EQ.'V' .AND. JOBT.EQ.'T' .AND. M.EQ.N),
*                         then U is used as workspace if the procedure
*                         replaces A with A^t. In that case, [V] is computed
*                         in U as left singular vectors of A^t and then
*                         copied back to the V array. This 'W' option is just
*                         a reminder to the caller that in this case U is
*                         reserved as workspace of length N*N.
*          If JOBU = 'N'  U is not referenced.
*
* LDU      (input) INTEGER
*          The leading dimension of the array U,  LDU >= 1.
*          IF  JOBU = 'U' or 'F' or 'W',  then LDU >= M.
*
*  V       (workspace/output) DOUBLE PRECISION array, dimension ( LDV, N )
*          If JOBV = 'V', 'J' then V contains on exit the N-by-N matrix of
*                         the right singular vectors;
*          If JOBV = 'W', AND (JOBU.EQ.'U' AND JOBT.EQ.'T' AND M.EQ.N),
*                         then V is used as workspace if the pprocedure
*                         replaces A with A^t. In that case, [U] is computed
*                         in V as right singular vectors of A^t and then
*                         copied back to the U array. This 'W' option is just
*                         a reminder to the caller that in this case V is
*                         reserved as workspace of length N*N.
*          If JOBV = 'N'  V is not referenced.
*
*  LDV     (input) INTEGER
*          The leading dimension of the array V,  LDV >= 1.
*          If JOBV = 'V' or 'J' or 'W', then LDV >= N.
*
*  WORK    (workspace/output) DOUBLE PRECISION array, dimension at least LWORK.
*          On exit, if N.GT.0 .AND. M.GT.0 (else not referenced),
*          WORK(1) = SCALE = WORK(2) / WORK(1) is the scaling factor such
*                    that SCALE*SVA(1:N) are the computed singular values
*                    of A. (See the description of SVA().)
*          WORK(2) = See the description of WORK(1).
*          WORK(3) = SCONDA is an estimate for the condition number of
*                    column equilibrated A. (If JOBA .EQ. 'E' or 'G')
*                    SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
*                    It is computed using DPOCON. It holds
*                    N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
*                    where R is the triangular factor from the QRF of A.
*                    However, if R is truncated and the numerical rank is
*                    determined to be strictly smaller than N, SCONDA is
*                    returned as -1, thus indicating that the smallest
*                    singular values might be lost.
*
*          If full SVD is needed, the following two condition numbers are
*          useful for the analysis of the algorithm. They are provied for
*          a developer/implementer who is familiar with the details of
*          the method.
*
*          WORK(4) = an estimate of the scaled condition number of the
*                    triangular factor in the first QR factorization.
*          WORK(5) = an estimate of the scaled condition number of the
*                    triangular factor in the second QR factorization.
*          The following two parameters are computed if JOBT .EQ. 'T'.
*          They are provided for a developer/implementer who is familiar
*          with the details of the method.
*
*          WORK(6) = the entropy of A^t*A :: this is the Shannon entropy
*                    of diag(A^t*A) / Trace(A^t*A) taken as point in the
*                    probability simplex.
*          WORK(7) = the entropy of A*A^t.
*
*  LWORK   (input) INTEGER
*          Length of WORK to confirm proper allocation of work space.
*          LWORK depends on the job:
*
*          If only SIGMA is needed ( JOBU.EQ.'N', JOBV.EQ.'N' ) and
*            -> .. no scaled condition estimate required (JOBE.EQ.'N'):
*               LWORK >= max(2*M+N,4*N+1,7). This is the minimal requirement.
*               ->> For optimal performance (blocked code) the optimal value
*               is LWORK >= max(2*M+N,3*N+(N+1)*NB,7). Here NB is the optimal
*               block size for DGEQP3 and DGEQRF.
*               In general, optimal LWORK is computed as
*               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF), 7).
*            -> .. an estimate of the scaled condition number of A is
*               required (JOBA='E', 'G'). In this case, LWORK is the maximum
*               of the above and N*N+4*N, i.e. LWORK >= max(2*M+N,N*N+4*N,7).
*               ->> For optimal performance (blocked code) the optimal value
*               is LWORK >= max(2*M+N,3*N+(N+1)*NB, N*N+4*N, 7).
*               In general, the optimal length LWORK is computed as
*               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DGEQRF),
*                                                     N+N*N+LWORK(DPOCON),7).
*
*          If SIGMA and the right singular vectors are needed (JOBV.EQ.'V'),
*            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
*            -> For optimal performance, LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
*               where NB is the optimal block size for DGEQP3, DGEQRF, DGELQ,
*               DORMLQ. In general, the optimal length LWORK is computed as
*               LWORK >= max(2*M+N,N+LWORK(DGEQP3), N+LWORK(DPOCON),
*                       N+LWORK(DGELQ), 2*N+LWORK(DGEQRF), N+LWORK(DORMLQ)).
*
*          If SIGMA and the left singular vectors are needed
*            -> the minimal requirement is LWORK >= max(2*M+N,4*N+1,7).
*            -> For optimal performance:
*               if JOBU.EQ.'U' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,7),
*               if JOBU.EQ.'F' :: LWORK >= max(2*M+N,3*N+(N+1)*NB,N+M*NB,7),
*               where NB is the optimal block size for DGEQP3, DGEQRF, DORMQR.
*               In general, the optimal length LWORK is computed as
*               LWORK >= max(2*M+N,N+LWORK(DGEQP3),N+LWORK(DPOCON),
*                        2*N+LWORK(DGEQRF), N+LWORK(DORMQR)).
*               Here LWORK(DORMQR) equals N*NB (for JOBU.EQ.'U') or
*               M*NB (for JOBU.EQ.'F').
*
*          If the full SVD is needed: (JOBU.EQ.'U' or JOBU.EQ.'F') and
*            -> if JOBV.EQ.'V'
*               the minimal requirement is LWORK >= max(2*M+N,6*N+2*N*N).
*            -> if JOBV.EQ.'J' the minimal requirement is
*               LWORK >= max(2*M+N, 4*N+N*N,2*N+N*N+6).
*            -> For optimal performance, LWORK should be additionally
*               larger than N+M*NB, where NB is the optimal block size
*               for DORMQR.
*
*  IWORK   (workspace/output) INTEGER array, dimension M+3*N.
*          On exit,
*          IWORK(1) = the numerical rank determined after the initial
*                     QR factorization with pivoting. See the descriptions
*                     of JOBA and JOBR.
*          IWORK(2) = the number of the computed nonzero singular values
*          IWORK(3) = if nonzero, a warning message:
*                     If IWORK(3).EQ.1 then some of the column norms of A
*                     were denormalized floats. The requested high accuracy
*                     is not warranted by the data.
*
*  INFO    (output) INTEGER
*           < 0  : if INFO = -i, then the i-th argument had an illegal value.
*           = 0 :  successfull exit;
*           > 0 :  DGEJSV  did not converge in the maximal allowed number
*                  of sweeps. The computed values may be inaccurate.
*
*  Further Details
*  ===============
*
*  DGEJSV implements a preconditioned Jacobi SVD algorithm. It uses DGEQP3,
*  DGEQRF, and DGELQF as preprocessors and preconditioners. Optionally, an
*  additional row pivoting can be used as a preprocessor, which in some
*  cases results in much higher accuracy. An example is matrix A with the
*  structure A = D1 * C * D2, where D1, D2 are arbitrarily ill-conditioned
*  diagonal matrices and C is well-conditioned matrix. In that case, complete
*  pivoting in the first QR factorizations provides accuracy dependent on the
*  condition number of C, and independent of D1, D2. Such higher accuracy is
*  not completely understood theoretically, but it works well in practice.
*  Further, if A can be written as A = B*D, with well-conditioned B and some
*  diagonal D, then the high accuracy is guaranteed, both theoretically and
*  in software, independent of D. For more details see [1], [2].
*     The computational range for the singular values can be the full range
*  ( UNDERFLOW,OVERFLOW ), provided that the machine arithmetic and the BLAS
*  & LAPACK routines called by DGEJSV are implemented to work in that range.
*  If that is not the case, then the restriction for safe computation with
*  the singular values in the range of normalized IEEE numbers is that the
*  spectral condition number kappa(A)=sigma_max(A)/sigma_min(A) does not
*  overflow. This code (DGEJSV) is best used in this restricted range,
*  meaning that singular values of magnitude below ||A||_2 / DLAMCH('O') are
*  returned as zeros. See JOBR for details on this.
*     Further, this implementation is somewhat slower than the one described
*  in [1,2] due to replacement of some non-LAPACK components, and because
*  the choice of some tuning parameters in the iterative part (DGESVJ) is
*  left to the implementer on a particular machine.
*     The rank revealing QR factorization (in this code: DGEQP3) should be
*  implemented as in [3]. We have a new version of DGEQP3 under development
*  that is more robust than the current one in LAPACK, with a cleaner cut in
*  rank defficient cases. It will be available in the SIGMA library [4].
*  If M is much larger than N, it is obvious that the inital QRF with
*  column pivoting can be preprocessed by the QRF without pivoting. That
*  well known trick is not used in DGEJSV because in some cases heavy row
*  weighting can be treated with complete pivoting. The overhead in cases
*  M much larger than N is then only due to pivoting, but the benefits in
*  terms of accuracy have prevailed. The implementer/user can incorporate
*  this extra QRF step easily. The implementer can also improve data movement
*  (matrix transpose, matrix copy, matrix transposed copy) - this
*  implementation of DGEJSV uses only the simplest, naive data movement.
*
*  Contributors
*
*  Zlatko Drmac (Zagreb, Croatia) and Kresimir Veselic (Hagen, Germany)
*
*  References
*
* [1] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm I.
*     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1322-1342.
*     LAPACK Working note 169.
* [2] Z. Drmac and K. Veselic: New fast and accurate Jacobi SVD algorithm II.
*     SIAM J. Matrix Anal. Appl. Vol. 35, No. 2 (2008), pp. 1343-1362.
*     LAPACK Working note 170.
* [3] Z. Drmac and Z. Bujanovic: On the failure of rank-revealing QR
*     factorization software - a case study.
*     ACM Trans. Math. Softw. Vol. 35, No 2 (2008), pp. 1-28.
*     LAPACK Working note 176.
* [4] Z. Drmac: SIGMA - mathematical software library for accurate SVD, PSV,
*     QSVD, (H,K)-SVD computations.
*     Department of Mathematics, University of Zagreb, 2008.
*
*  Bugs, examples and comments
*
*  Please report all bugs and send interesting examples and/or comments to
*  drmac@math.hr. Thank you.
*
*  ===========================================================================
*
*     .. Local Parameters ..
      DOUBLE PRECISION   ZERO,  ONE
      PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
*     ..
*     .. Local Scalars ..
      DOUBLE PRECISION AAPP, AAQQ, AATMAX, AATMIN, BIG, BIG1, COND_OK,
     $        CONDR1, CONDR2, ENTRA,  ENTRAT, EPSLN,  MAXPRJ, SCALEM,
     $        SCONDA, SFMIN,  SMALL,  TEMP1,  USCAL1, USCAL2, XSC
      INTEGER IERR,   N1,     NR,     NUMRANK,        p, q,   WARNING
      LOGICAL ALMORT, DEFR,   ERREST, GOSCAL, JRACC,  KILL,   LSVEC,
     $        L2ABER, L2KILL, L2PERT, L2RANK, L2TRAN,
     $        NOSCAL, ROWPIV, RSVEC,  TRANSP
*     ..
*     .. Intrinsic Functions ..
      INTRINSIC DABS,  DLOG, DMAX1, DMIN1, DBLE,
     $          MAX0, MIN0, IDNINT,  DSIGN,  DSQRT
*     ..
*     .. External Functions ..
      DOUBLE PRECISION  DLAMCH, DNRM2
      INTEGER   IDAMAX
      LOGICAL   LSAME
      EXTERNAL  IDAMAX, LSAME, DLAMCH, DNRM2
*     ..
*     .. External Subroutines ..
      EXTERNAL  DCOPY,  DGELQF, DGEQP3, DGEQRF, DLACPY, DLASCL,
     $          DLASET, DLASSQ, DLASWP, DORGQR, DORMLQ,
     $          DORMQR, DPOCON, DSCAL,  DSWAP,  DTRSM,  XERBLA
*
      EXTERNAL  DGESVJ
*     ..
*
*     Test the input arguments
*
      LSVEC  = LSAME( JOBU, 'U' ) .OR. LSAME( JOBU, 'F' )
      JRACC  = LSAME( JOBV, 'J' )
      RSVEC  = LSAME( JOBV, 'V' ) .OR. JRACC
      ROWPIV = LSAME( JOBA, 'F' ) .OR. LSAME( JOBA, 'G' )
      L2RANK = LSAME( JOBA, 'R' )
      L2ABER = LSAME( JOBA, 'A' )
      ERREST = LSAME( JOBA, 'E' ) .OR. LSAME( JOBA, 'G' )
      L2TRAN = LSAME( JOBT, 'T' )
      L2KILL = LSAME( JOBR, 'R' )
      DEFR   = LSAME( JOBR, 'N' )
      L2PERT = LSAME( JOBP, 'P' )
*
      IF ( .NOT.(ROWPIV .OR. L2RANK .OR. L2ABER .OR.
     $     ERREST .OR. LSAME( JOBA, 'C' ) )) THEN
         INFO = - 1
      ELSE IF ( .NOT.( LSVEC  .OR. LSAME( JOBU, 'N' ) .OR.
     $                             LSAME( JOBU, 'W' )) ) THEN
         INFO = - 2
      ELSE IF ( .NOT.( RSVEC .OR. LSAME( JOBV, 'N' ) .OR.
     $   LSAME( JOBV, 'W' )) .OR. ( JRACC .AND. (.NOT.LSVEC) ) ) THEN
         INFO = - 3
      ELSE IF ( .NOT. ( L2KILL .OR. DEFR ) )    THEN
         INFO = - 4
      ELSE IF ( .NOT. ( L2TRAN .OR. LSAME( JOBT, 'N' ) ) ) THEN
         INFO = - 5
      ELSE IF ( .NOT. ( L2PERT .OR. LSAME( JOBP, 'N' ) ) ) THEN
         INFO = - 6
      ELSE IF ( M .LT. 0 ) THEN
         INFO = - 7
      ELSE IF ( ( N .LT. 0 ) .OR. ( N .GT. M ) ) THEN
         INFO = - 8
      ELSE IF ( LDA .LT. M ) THEN
         INFO = - 10
      ELSE IF ( LSVEC .AND. ( LDU .LT. M ) ) THEN
         INFO = - 13
      ELSE IF ( RSVEC .AND. ( LDV .LT. N ) ) THEN
         INFO = - 14
      ELSE IF ( (.NOT.(LSVEC .OR. RSVEC .OR. ERREST).AND.
     &                           (LWORK .LT. MAX0(7,4*N+1,2*M+N))) .OR.
     & (.NOT.(LSVEC .OR. RSVEC) .AND. ERREST .AND.
     &                         (LWORK .LT. MAX0(7,4*N+N*N,2*M+N))) .OR.
     & (LSVEC .AND. (.NOT.RSVEC) .AND. (LWORK .LT. MAX0(7,2*M+N,4*N+1)))
     & .OR.
     & (RSVEC .AND. (.NOT.LSVEC) .AND. (LWORK .LT. MAX0(7,2*M+N,4*N+1)))
     & .OR.
     & (LSVEC .AND. RSVEC .AND. (.NOT.JRACC) .AND.
     &                          (LWORK.LT.MAX0(2*M+N,6*N+2*N*N)))
     & .OR. (LSVEC .AND. RSVEC .AND. JRACC .AND.
     &                          LWORK.LT.MAX0(2*M+N,4*N+N*N,2*N+N*N+6)))
     &   THEN
         INFO = - 17
      ELSE
*        #:)
         INFO = 0
      END IF
*
      IF ( INFO .NE. 0 ) THEN
*       #:(
         CALL XERBLA( 'DGEJSV', - INFO )
         RETURN
      END IF
*
*     Quick return for void matrix (Y3K safe)
* #:)
      IF ( ( M .EQ. 0 ) .OR. ( N .EQ. 0 ) ) RETURN
*
*     Determine whether the matrix U should be M x N or M x M
*
      IF ( LSVEC ) THEN
         N1 = N
         IF ( LSAME( JOBU, 'F' ) ) N1 = M
      END IF
*
*     Set numerical parameters
*
*!    NOTE: Make sure DLAMCH() does not fail on the target architecture.
*
      EPSLN = DLAMCH('Epsilon')
      SFMIN = DLAMCH('SafeMinimum')
      SMALL = SFMIN / EPSLN
      BIG   = DLAMCH('O')
*     BIG   = ONE / SFMIN
*
*     Initialize SVA(1:N) = diag( ||A e_i||_2 )_1^N
*
*(!)  If necessary, scale SVA() to protect the largest norm from
*     overflow. It is possible that this scaling pushes the smallest
*     column norm left from the underflow threshold (extreme case).
*
      SCALEM  = ONE / DSQRT(DBLE(M)*DBLE(N))
      NOSCAL  = .TRUE.
      GOSCAL  = .TRUE.
      DO 1874 p = 1, N
         AAPP = ZERO
         AAQQ = ONE
         CALL DLASSQ( M, A(1,p), 1, AAPP, AAQQ )
         IF ( AAPP .GT. BIG ) THEN
            INFO = - 9
            CALL XERBLA( 'DGEJSV', -INFO )
            RETURN
         END IF
         AAQQ = DSQRT(AAQQ)
         IF ( ( AAPP .LT. (BIG / AAQQ) ) .AND. NOSCAL  ) THEN
            SVA(p)  = AAPP * AAQQ
         ELSE
            NOSCAL  = .FALSE.
            SVA(p)  = AAPP * ( AAQQ * SCALEM )
            IF ( GOSCAL ) THEN
               GOSCAL = .FALSE.
               CALL DSCAL( p-1, SCALEM, SVA, 1 )
            END IF
         END IF
1874 CONTINUE
*
      IF ( NOSCAL ) SCALEM = ONE
*
      AAPP = ZERO
      AAQQ = BIG
      DO 4781 p = 1, N
         AAPP = DMAX1( AAPP, SVA(p) )
         IF ( SVA(p) .NE. ZERO ) AAQQ = DMIN1( AAQQ, SVA(p) )
4781 CONTINUE
*
*     Quick return for zero M x N matrix
* #:)
      IF ( AAPP .EQ. ZERO ) THEN
         IF ( LSVEC ) CALL DLASET( 'G', M, N1, ZERO, ONE, U, LDU )
         IF ( RSVEC ) CALL DLASET( 'G', N, N,  ZERO, ONE, V, LDV )
         WORK(1) = ONE
         WORK(2) = ONE
         IF ( ERREST ) WORK(3) = ONE
         IF ( LSVEC .AND. RSVEC ) THEN
            WORK(4) = ONE
            WORK(5) = ONE
         END IF
         IF ( L2TRAN ) THEN
            WORK(6) = ZERO
            WORK(7) = ZERO
         END IF
         IWORK(1) = 0
         IWORK(2) = 0
         IWORK(3) = 0
         RETURN
      END IF
*
*     Issue warning if denormalized column norms detected. Override the
*     high relative accuracy request. Issue licence to kill columns
*     (set them to zero) whose norm is less than sigma_max / BIG (roughly).
* #:(
      WARNING = 0
      IF ( AAQQ .LE. SFMIN ) THEN
         L2RANK = .TRUE.
         L2KILL = .TRUE.
         WARNING = 1
      END IF
*
*     Quick return for one-column matrix
* #:)
      IF ( N .EQ. 1 ) THEN
*
         IF ( LSVEC ) THEN
            CALL DLASCL( 'G',0,0,SVA(1),SCALEM, M,1,A(1,1),LDA,IERR )
            CALL DLACPY( 'A', M, 1, A, LDA, U, LDU )
*           computing all M left singular vectors of the M x 1 matrix
            IF ( N1 .NE. N  ) THEN
               CALL DGEQRF( M, N, U,LDU, WORK, WORK(N+1),LWORK-N,IERR )
               CALL DORGQR( M,N1,1, U,LDU,WORK,WORK(N+1),LWORK-N,IERR )
               CALL DCOPY( M, A(1,1), 1, U(1,1), 1 )
            END IF
         END IF
         IF ( RSVEC ) THEN
             V(1,1) = ONE
         END IF
         IF ( SVA(1) .LT. (BIG*SCALEM) ) THEN
            SVA(1)  = SVA(1) / SCALEM
            SCALEM  = ONE
         END IF
         WORK(1) = ONE / SCALEM
         WORK(2) = ONE
         IF ( SVA(1) .NE. ZERO ) THEN
            IWORK(1) = 1
            IF ( ( SVA(1) / SCALEM) .GE. SFMIN ) THEN
               IWORK(2) = 1
            ELSE
               IWORK(2) = 0
            END IF
         ELSE
            IWORK(1) = 0
            IWORK(2) = 0
         END IF
         IF ( ERREST ) WORK(3) = ONE
         IF ( LSVEC .AND. RSVEC ) THEN
            WORK(4) = ONE
            WORK(5) = ONE
         END IF
         IF ( L2TRAN ) THEN
            WORK(6) = ZERO
            WORK(7) = ZERO
         END IF
         RETURN
*
      END IF
*
      TRANSP = .FALSE.
      L2TRAN = L2TRAN .AND. ( M .EQ. N )
*
      AATMAX = -ONE
      AATMIN =  BIG
      IF ( ROWPIV .OR. L2TRAN ) THEN
*
*     Compute the row norms, needed to determine row pivoting sequence
*     (in the case of heavily row weighted A, row pivoting is strongly
*     advised) and to collect information needed to compare the
*     structures of A * A^t and A^t * A (in the case L2TRAN.EQ..TRUE.).
*
         IF ( L2TRAN ) THEN
            DO 1950 p = 1, M
               XSC   = ZERO
               TEMP1 = ONE
               CALL DLASSQ( N, A(p,1), LDA, XSC, TEMP1 )
*              DLASSQ gets both the ell_2 and the ell_infinity norm
*              in one pass through the vector
               WORK(M+N+p)  = XSC * SCALEM
               WORK(N+p)    = XSC * (SCALEM*DSQRT(TEMP1))
               AATMAX = DMAX1( AATMAX, WORK(N+p) )
               IF (WORK(N+p) .NE. ZERO) AATMIN = DMIN1(AATMIN,WORK(N+p))
1950       CONTINUE
         ELSE
            DO 1904 p = 1, M
               WORK(M+N+p) = SCALEM*DABS( A(p,IDAMAX(N,A(p,1),LDA)) )
               AATMAX = DMAX1( AATMAX, WORK(M+N+p) )
               AATMIN = DMIN1( AATMIN, WORK(M+N+p) )
1904       CONTINUE
         END IF
*
      END IF
*
*     For square matrix A try to determine whether A^t  would be  better
*     input for the preconditioned Jacobi SVD, with faster convergence.
*     The decision is based on an O(N) function of the vector of column
*     and row norms of A, based on the Shannon entropy. This should give
*     the right choice in most cases when the difference actually matters.
*     It may fail and pick the slower converging side.
*
      ENTRA  = ZERO
      ENTRAT = ZERO
      IF ( L2TRAN ) THEN
*
         XSC   = ZERO
         TEMP1 = ONE
         CALL DLASSQ( N, SVA, 1, XSC, TEMP1 )
         TEMP1 = ONE / TEMP1
*
         ENTRA = ZERO
         DO 1113 p = 1, N
            BIG1  = ( ( SVA(p) / XSC )**2 ) * TEMP1
            IF ( BIG1 .NE. ZERO ) ENTRA = ENTRA + BIG1 * DLOG(BIG1)
1113    CONTINUE
         ENTRA = - ENTRA / DLOG(DBLE(N))
*
*        Now, SVA().^2/Trace(A^t * A) is a point in the probability simplex.
*        It is derived from the diagonal of  A^t * A.  Do the same with the
*        diagonal of A * A^t, compute the entropy of the corresponding
*        probability distribution. Note that A * A^t and A^t * A have the
*        same trace.
*
         ENTRAT = ZERO
         DO 1114 p = N+1, N+M
            BIG1 = ( ( WORK(p) / XSC )**2 ) * TEMP1
            IF ( BIG1 .NE. ZERO ) ENTRAT = ENTRAT + BIG1 * DLOG(BIG1)
1114    CONTINUE
         ENTRAT = - ENTRAT / DLOG(DBLE(M))
*
*        Analyze the entropies and decide A or A^t. Smaller entropy
*        usually means better input for the algorithm.
*
         TRANSP = ( ENTRAT .LT. ENTRA )
*
*        If A^t is better than A, transpose A.
*
         IF ( TRANSP ) THEN
*           In an optimal implementation, this trivial transpose
*           should be replaced with faster transpose.
            DO 1115 p = 1, N - 1
               DO 1116 q = p + 1, N
                   TEMP1 = A(q,p)
                  A(q,p) = A(p,q)
                  A(p,q) = TEMP1
1116          CONTINUE
1115       CONTINUE
            DO 1117 p = 1, N
               WORK(M+N+p) = SVA(p)
               SVA(p)      = WORK(N+p)
1117       CONTINUE
            TEMP1  = AAPP
            AAPP   = AATMAX
            AATMAX = TEMP1
            TEMP1  = AAQQ
            AAQQ   = AATMIN
            AATMIN = TEMP1
            KILL   = LSVEC
            LSVEC  = RSVEC
            RSVEC  = KILL
            IF ( LSVEC ) N1 = N
*
            ROWPIV = .TRUE.
         END IF
*
      END IF
*     END IF L2TRAN
*
*     Scale the matrix so that its maximal singular value remains less
*     than DSQRT(BIG) -- the matrix is scaled so that its maximal column
*     has Euclidean norm equal to DSQRT(BIG/N). The only reason to keep
*     DSQRT(BIG) instead of BIG is the fact that DGEJSV uses LAPACK and
*     BLAS routines that, in some implementations, are not capable of
*     working in the full interval [SFMIN,BIG] and that they may provoke
*     overflows in the intermediate results. If the singular values spread
*     from SFMIN to BIG, then DGESVJ will compute them. So, in that case,
*     one should use DGESVJ instead of DGEJSV.
*
      BIG1   = DSQRT( BIG )
      TEMP1  = DSQRT( BIG / DBLE(N) )
*
      CALL DLASCL( 'G', 0, 0, AAPP, TEMP1, N, 1, SVA, N, IERR )
      IF ( AAQQ .GT. (AAPP * SFMIN) ) THEN
          AAQQ = ( AAQQ / AAPP ) * TEMP1
      ELSE
          AAQQ = ( AAQQ * TEMP1 ) / AAPP
      END IF
      TEMP1 = TEMP1 * SCALEM
      CALL DLASCL( 'G', 0, 0, AAPP, TEMP1, M, N, A, LDA, IERR )
*
*     To undo scaling at the end of this procedure, multiply the
*     computed singular values with USCAL2 / USCAL1.
*
      USCAL1 = TEMP1
      USCAL2 = AAPP
*
      IF ( L2KILL ) THEN
*        L2KILL enforces computation of nonzero singular values in
*        the restricted range of condition number of the initial A,
*        sigma_max(A) / sigma_min(A) approx. DSQRT(BIG)/DSQRT(SFMIN).
         XSC = DSQRT( SFMIN )
      ELSE
         XSC = SMALL
*
*        Now, if the condition number of A is too big,
*        sigma_max(A) / sigma_min(A) .GT. DSQRT(BIG/N) * EPSLN / SFMIN,
*        as a precaution measure, the full SVD is computed using DGESVJ
*        with accumulated Jacobi rotations. This provides numerically
*        more robust computation, at the cost of slightly increased run
*        time. Depending on the concrete implementation of BLAS and LAPACK
*        (i.e. how they behave in presence of extreme ill-conditioning) the
*        implementor may decide to remove this switch.
         IF ( ( AAQQ.LT.DSQRT(SFMIN) ) .AND. LSVEC .AND. RSVEC ) THEN
            JRACC = .TRUE.
         END IF
*
      END IF
      IF ( AAQQ .LT. XSC ) THEN
         DO 700 p = 1, N
            IF ( SVA(p) .LT. XSC ) THEN
               CALL DLASET( 'A', M, 1, ZERO, ZERO, A(1,p), LDA )
               SVA(p) = ZERO
            END IF
700     CONTINUE
      END IF
*
*     Preconditioning using QR factorization with pivoting
*
      IF ( ROWPIV ) THEN
*        Optional row permutation (Bjoerck row pivoting):
*        A result by Cox and Higham shows that the Bjoerck's
*        row pivoting combined with standard column pivoting
*        has similar effect as Powell-Reid complete pivoting.
*        The ell-infinity norms of A are made nonincreasing.
         DO 1952 p = 1, M - 1
            q = IDAMAX( M-p+1, WORK(M+N+p), 1 ) + p - 1
            IWORK(2*N+p) = q
            IF ( p .NE. q ) THEN
               TEMP1       = WORK(M+N+p)
               WORK(M+N+p) = WORK(M+N+q)
               WORK(M+N+q) = TEMP1
            END IF
1952    CONTINUE
         CALL DLASWP( N, A, LDA, 1, M-1, IWORK(2*N+1), 1 )
      END IF
*
*     End of the preparation phase (scaling, optional sorting and
*     transposing, optional flushing of small columns).
*
*     Preconditioning
*
*     If the full SVD is needed, the right singular vectors are computed
*     from a matrix equation, and for that we need theoretical analysis
*     of the Businger-Golub pivoting. So we use DGEQP3 as the first RR QRF.
*     In all other cases the first RR QRF can be chosen by other criteria
*     (eg speed by replacing global with restricted window pivoting, such
*     as in SGEQPX from TOMS # 782). Good results will be obtained using
*     SGEQPX with properly (!) chosen numerical parameters.
*     Any improvement of DGEQP3 improves overal performance of DGEJSV.
*
*     A * P1 = Q1 * [ R1^t 0]^t:
      DO 1963 p = 1, N
*        .. all columns are free columns
         IWORK(p) = 0
1963 CONTINUE
      CALL DGEQP3( M,N,A,LDA, IWORK,WORK, WORK(N+1),LWORK-N, IERR )
*
*     The upper triangular matrix R1 from the first QRF is inspected for
*     rank deficiency and possibilities for deflation, or possible
*     ill-conditioning. Depending on the user specified flag L2RANK,
*     the procedure explores possibilities to reduce the numerical
*     rank by inspecting the computed upper triangular factor. If
*     L2RANK or L2ABER are up, then DGEJSV will compute the SVD of
*     A + dA, where ||dA|| <= f(M,N)*EPSLN.
*
      NR = 1
      IF ( L2ABER ) THEN
*        Standard absolute error bound suffices. All sigma_i with
*        sigma_i < N*EPSLN*||A|| are flushed to zero. This is an
*        agressive enforcement of lower numerical rank by introducing a
*        backward error of the order of N*EPSLN*||A||.
         TEMP1 = DSQRT(DBLE(N))*EPSLN
         DO 3001 p = 2, N
            IF ( DABS(A(p,p)) .GE. (TEMP1*DABS(A(1,1))) ) THEN
               NR = NR + 1
            ELSE
               GO TO 3002
            END IF
3001    CONTINUE
3002    CONTINUE
      ELSE IF ( L2RANK ) THEN
*        .. similarly as above, only slightly more gentle (less agressive).
*        Sudden drop on the diagonal of R1 is used as the criterion for
*        close-to-rank-defficient.
         TEMP1 = DSQRT(SFMIN)
         DO 3401 p = 2, N
            IF ( ( DABS(A(p,p)) .LT. (EPSLN*DABS(A(p-1,p-1))) ) .OR.
     $           ( DABS(A(p,p)) .LT. SMALL ) .OR.
     $           ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3402
            NR = NR + 1
3401    CONTINUE
3402    CONTINUE
*
      ELSE
*        The goal is high relative accuracy. However, if the matrix
*        has high scaled condition number the relative accuracy is in
*        general not feasible. Later on, a condition number estimator
*        will be deployed to estimate the scaled condition number.
*        Here we just remove the underflowed part of the triangular
*        factor. This prevents the situation in which the code is
*        working hard to get the accuracy not warranted by the data.
         TEMP1  = DSQRT(SFMIN)
         DO 3301 p = 2, N
            IF ( ( DABS(A(p,p)) .LT. SMALL ) .OR.
     $          ( L2KILL .AND. (DABS(A(p,p)) .LT. TEMP1) ) ) GO TO 3302
            NR = NR + 1
3301    CONTINUE
3302    CONTINUE
*
      END IF
*
      ALMORT = .FALSE.
      IF ( NR .EQ. N ) THEN
         MAXPRJ = ONE
         DO 3051 p = 2, N
            TEMP1  = DABS(A(p,p)) / SVA(IWORK(p))
            MAXPRJ = DMIN1( MAXPRJ, TEMP1 )
3051    CONTINUE
         IF ( MAXPRJ**2 .GE. ONE - DBLE(N)*EPSLN ) ALMORT = .TRUE.
      END IF
*
*
      SCONDA = - ONE
      CONDR1 = - ONE
      CONDR2 = - ONE
*
      IF ( ERREST ) THEN
         IF ( N .EQ. NR ) THEN
            IF ( RSVEC ) THEN
*              .. V is available as workspace
               CALL DLACPY( 'U', N, N, A, LDA, V, LDV )
               DO 3053 p = 1, N
                  TEMP1 = SVA(IWORK(p))
                  CALL DSCAL( p, ONE/TEMP1, V(1,p), 1 )
3053          CONTINUE
               CALL DPOCON( 'U', N, V, LDV, ONE, TEMP1,
     $              WORK(N+1), IWORK(2*N+M+1), IERR )
            ELSE IF ( LSVEC ) THEN
*              .. U is available as workspace
               CALL DLACPY( 'U', N, N, A, LDA, U, LDU )
               DO 3054 p = 1, N
                  TEMP1 = SVA(IWORK(p))
                  CALL DSCAL( p, ONE/TEMP1, U(1,p), 1 )
3054          CONTINUE
               CALL DPOCON( 'U', N, U, LDU, ONE, TEMP1,
     $              WORK(N+1), IWORK(2*N+M+1), IERR )
            ELSE
               CALL DLACPY( 'U', N, N, A, LDA, WORK(N+1), N )
               DO 3052 p = 1, N
                  TEMP1 = SVA(IWORK(p))
                  CALL DSCAL( p, ONE/TEMP1, WORK(N+(p-1)*N+1), 1 )
3052          CONTINUE
*           .. the columns of R are scaled to have unit Euclidean lengths.
               CALL DPOCON( 'U', N, WORK(N+1), N, ONE, TEMP1,
     $              WORK(N+N*N+1), IWORK(2*N+M+1), IERR )
            END IF
            SCONDA = ONE / DSQRT(TEMP1)
*           SCONDA is an estimate of DSQRT(||(R^t * R)^(-1)||_1).
*           N^(-1/4) * SCONDA <= ||R^(-1)||_2 <= N^(1/4) * SCONDA
         ELSE
            SCONDA = - ONE
         END IF
      END IF
*
      L2PERT = L2PERT .AND. ( DABS( A(1,1)/A(NR,NR) ) .GT. DSQRT(BIG1) )
*     If there is no violent scaling, artificial perturbation is not needed.
*
*     Phase 3:
*
      IF ( .NOT. ( RSVEC .OR. LSVEC ) ) THEN
*
*         Singular Values only
*
*         .. transpose A(1:NR,1:N)
         DO 1946 p = 1, MIN0( N-1, NR )
            CALL DCOPY( N-p, A(p,p+1), LDA, A(p+1,p), 1 )
1946    CONTINUE
*
*        The following two DO-loops introduce small relative perturbation
*        into the strict upper triangle of the lower triangular matrix.
*        Small entries below the main diagonal are also changed.
*        This modification is useful if the computing environment does not
*        provide/allow FLUSH TO ZERO underflow, for it prevents many
*        annoying denormalized numbers in case of strongly scaled matrices.
*        The perturbation is structured so that it does not introduce any
*        new perturbation of the singular values, and it does not destroy
*        the job done by the preconditioner.
*        The licence for this perturbation is in the variable L2PERT, which
*        should be .FALSE. if FLUSH TO ZERO underflow is active.
*
         IF ( .NOT. ALMORT ) THEN
*
            IF ( L2PERT ) THEN
*              XSC = DSQRT(SMALL)
               XSC = EPSLN / DBLE(N)
               DO 4947 q = 1, NR
                  TEMP1 = XSC*DABS(A(q,q))
                  DO 4949 p = 1, N
                     IF ( ( (p.GT.q) .AND. (DABS(A(p,q)).LE.TEMP1) )
     $                    .OR. ( p .LT. q ) )
     $                     A(p,q) = DSIGN( TEMP1, A(p,q) )
4949             CONTINUE
4947          CONTINUE
            ELSE
               CALL DLASET( 'U', NR-1,NR-1, ZERO,ZERO, A(1,2),LDA )
            END IF
*
*            .. second preconditioning using the QR factorization
*
            CALL DGEQRF( N,NR, A,LDA, WORK, WORK(N+1),LWORK-N, IERR )
*
*           .. and transpose upper to lower triangular
            DO 1948 p = 1, NR - 1
               CALL DCOPY( NR-p, A(p,p+1), LDA, A(p+1,p), 1 )
1948       CONTINUE
*
         END IF
*
*           Row-cyclic Jacobi SVD algorithm with column pivoting
*
*           .. again some perturbation (a "background noise") is added
*           to drown denormals
            IF ( L2PERT ) THEN
*              XSC = DSQRT(SMALL)
               XSC = EPSLN / DBLE(N)
               DO 1947 q = 1, NR
                  TEMP1 = XSC*DABS(A(q,q))
                  DO 1949 p = 1, NR
                     IF ( ( (p.GT.q) .AND. (DABS(A(p,q)).LE.TEMP1) )
     $                       .OR. ( p .LT. q ) )
     $                   A(p,q) = DSIGN( TEMP1, A(p,q) )
1949             CONTINUE
1947          CONTINUE
            ELSE
               CALL DLASET( 'U', NR-1, NR-1, ZERO, ZERO, A(1,2), LDA )
            END IF
*
*           .. and one-sided Jacobi rotations are started on a lower
*           triangular matrix (plus perturbation which is ignored in
*           the part which destroys triangular form (confusing?!))
*
            CALL DGESVJ( 'L', 'NoU', 'NoV', NR, NR, A, LDA, SVA,
     $                      N, V, LDV, WORK, LWORK, INFO )
*
            SCALEM  = WORK(1)
            NUMRANK = IDNINT(WORK(2))
*
*
      ELSE IF ( RSVEC .AND. ( .NOT. LSVEC ) ) THEN
*
*        -> Singular Values and Right Singular Vectors <-
*
         IF ( ALMORT ) THEN
*
*           .. in this case NR equals N
            DO 1998 p = 1, NR
               CALL DCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
1998       CONTINUE
            CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
*
            CALL DGESVJ( 'L','U','N', N, NR, V,LDV, SVA, NR, A,LDA,
     $                  WORK, LWORK, INFO )
            SCALEM  = WORK(1)
            NUMRANK = IDNINT(WORK(2))

         ELSE
*
*        .. two more QR factorizations ( one QRF is not enough, two require
*        accumulated product of Jacobi rotations, three are perfect )
*
            CALL DLASET( 'Lower', NR-1, NR-1, ZERO, ZERO, A(2,1), LDA )
            CALL DGELQF( NR, N, A, LDA, WORK, WORK(N+1), LWORK-N, IERR)
            CALL DLACPY( 'Lower', NR, NR, A, LDA, V, LDV )
            CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
            CALL DGEQRF( NR, NR, V, LDV, WORK(N+1), WORK(2*N+1),
     $                   LWORK-2*N, IERR )
            DO 8998 p = 1, NR
               CALL DCOPY( NR-p+1, V(p,p), LDV, V(p,p), 1 )
8998       CONTINUE
            CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
*
            CALL DGESVJ( 'Lower', 'U','N', NR, NR, V,LDV, SVA, NR, U,
     $                  LDU, WORK(N+1), LWORK, INFO )
            SCALEM  = WORK(N+1)
            NUMRANK = IDNINT(WORK(N+2))
            IF ( NR .LT. N ) THEN
               CALL DLASET( 'A',N-NR, NR, ZERO,ZERO, V(NR+1,1),   LDV )
               CALL DLASET( 'A',NR, N-NR, ZERO,ZERO, V(1,NR+1),   LDV )
               CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE, V(NR+1,NR+1), LDV )
            END IF
*
         CALL DORMLQ( 'Left', 'Transpose', N, N, NR, A, LDA, WORK,
     $               V, LDV, WORK(N+1), LWORK-N, IERR )
*
         END IF
*
         DO 8991 p = 1, N
            CALL DCOPY( N, V(p,1), LDV, A(IWORK(p),1), LDA )
8991    CONTINUE
         CALL DLACPY( 'All', N, N, A, LDA, V, LDV )
*
         IF ( TRANSP ) THEN
            CALL DLACPY( 'All', N, N, V, LDV, U, LDU )
         END IF
*
      ELSE IF ( LSVEC .AND. ( .NOT. RSVEC ) ) THEN
*
*        .. Singular Values and Left Singular Vectors                 ..
*
*        .. second preconditioning step to avoid need to accumulate
*        Jacobi rotations in the Jacobi iterations.
         DO 1965 p = 1, NR
            CALL DCOPY( N-p+1, A(p,p), LDA, U(p,p), 1 )
1965    CONTINUE
         CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU )
*
         CALL DGEQRF( N, NR, U, LDU, WORK(N+1), WORK(2*N+1),
     $              LWORK-2*N, IERR )
*
         DO 1967 p = 1, NR - 1
            CALL DCOPY( NR-p, U(p,p+1), LDU, U(p+1,p), 1 )
1967    CONTINUE
         CALL DLASET( 'Upper', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU )
*
         CALL DGESVJ( 'Lower', 'U', 'N', NR,NR, U, LDU, SVA, NR, A,
     $        LDA, WORK(N+1), LWORK-N, INFO )
         SCALEM  = WORK(N+1)
         NUMRANK = IDNINT(WORK(N+2))
*
         IF ( NR .LT. M ) THEN
            CALL DLASET( 'A',  M-NR, NR,ZERO, ZERO, U(NR+1,1), LDU )
            IF ( NR .LT. N1 ) THEN
               CALL DLASET( 'A',NR, N1-NR, ZERO, ZERO, U(1,NR+1), LDU )
               CALL DLASET( 'A',M-NR,N1-NR,ZERO,ONE,U(NR+1,NR+1), LDU )
            END IF
         END IF
*
         CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
     $               LDU, WORK(N+1), LWORK-N, IERR )
*
         IF ( ROWPIV )
     $       CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
*
         DO 1974 p = 1, N1
            XSC = ONE / DNRM2( M, U(1,p), 1 )
            CALL DSCAL( M, XSC, U(1,p), 1 )
1974    CONTINUE
*
         IF ( TRANSP ) THEN
            CALL DLACPY( 'All', N, N, U, LDU, V, LDV )
         END IF
*
      ELSE
*
*        .. Full SVD ..
*
         IF ( .NOT. JRACC ) THEN
*
         IF ( .NOT. ALMORT ) THEN
*
*           Second Preconditioning Step (QRF [with pivoting])
*           Note that the composition of TRANSPOSE, QRF and TRANSPOSE is
*           equivalent to an LQF CALL. Since in many libraries the QRF
*           seems to be better optimized than the LQF, we do explicit
*           transpose and use the QRF. This is subject to changes in an
*           optimized implementation of DGEJSV.
*
            DO 1968 p = 1, NR
               CALL DCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
1968       CONTINUE
*
*           .. the following two loops perturb small entries to avoid
*           denormals in the second QR factorization, where they are
*           as good as zeros. This is done to avoid painfully slow
*           computation with denormals. The relative size of the perturbation
*           is a parameter that can be changed by the implementer.
*           This perturbation device will be obsolete on machines with
*           properly implemented arithmetic.
*           To switch it off, set L2PERT=.FALSE. To remove it from  the
*           code, remove the action under L2PERT=.TRUE., leave the ELSE part.
*           The following two loops should be blocked and fused with the
*           transposed copy above.
*
            IF ( L2PERT ) THEN
               XSC = DSQRT(SMALL)
               DO 2969 q = 1, NR
                  TEMP1 = XSC*DABS( V(q,q) )
                  DO 2968 p = 1, N
                     IF ( ( p .GT. q ) .AND. ( DABS(V(p,q)) .LE. TEMP1 )
     $                   .OR. ( p .LT. q ) )
     $                   V(p,q) = DSIGN( TEMP1, V(p,q) )
                     IF ( p .LT. q ) V(p,q) = - V(p,q)
2968             CONTINUE
2969          CONTINUE
            ELSE
               CALL DLASET( 'U', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
            END IF
*
*           Estimate the row scaled condition number of R1
*           (If R1 is rectangular, N > NR, then the condition number
*           of the leading NR x NR submatrix is estimated.)
*
            CALL DLACPY( 'L', NR, NR, V, LDV, WORK(2*N+1), NR )
            DO 3950 p = 1, NR
               TEMP1 = DNRM2(NR-p+1,WORK(2*N+(p-1)*NR+p),1)
               CALL DSCAL(NR-p+1,ONE/TEMP1,WORK(2*N+(p-1)*NR+p),1)
3950       CONTINUE
            CALL DPOCON('Lower',NR,WORK(2*N+1),NR,ONE,TEMP1,
     $                   WORK(2*N+NR*NR+1),IWORK(M+2*N+1),IERR)
            CONDR1 = ONE / DSQRT(TEMP1)
*           .. here need a second oppinion on the condition number
*           .. then assume worst case scenario
*           R1 is OK for inverse <=> CONDR1 .LT. DBLE(N)
*           more conservative    <=> CONDR1 .LT. DSQRT(DBLE(N))
*
            COND_OK = DSQRT(DBLE(NR))
*[TP]       COND_OK is a tuning parameter.

            IF ( CONDR1 .LT. COND_OK ) THEN
*              .. the second QRF without pivoting. Note: in an optimized
*              implementation, this QRF should be implemented as the QRF
*              of a lower triangular matrix.
*              R1^t = Q2 * R2
               CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
     $              LWORK-2*N, IERR )
*
               IF ( L2PERT ) THEN
                  XSC = DSQRT(SMALL)/EPSLN
                  DO 3959 p = 2, NR
                     DO 3958 q = 1, p - 1
                        TEMP1 = XSC * DMIN1(DABS(V(p,p)),DABS(V(q,q)))
                        IF ( DABS(V(q,p)) .LE. TEMP1 )
     $                     V(q,p) = DSIGN( TEMP1, V(q,p) )
3958                CONTINUE
3959             CONTINUE
               END IF
*
               IF ( NR .NE. N )
     $         CALL DLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N )
*              .. save ...
*
*           .. this transposed copy should be better than naive
               DO 1969 p = 1, NR - 1
                  CALL DCOPY( NR-p, V(p,p+1), LDV, V(p+1,p), 1 )
1969          CONTINUE
*
               CONDR2 = CONDR1
*
            ELSE
*
*              .. ill-conditioned case: second QRF with pivoting
*              Note that windowed pivoting would be equaly good
*              numerically, and more run-time efficient. So, in
*              an optimal implementation, the next call to DGEQP3
*              should be replaced with eg. CALL SGEQPX (ACM TOMS #782)
*              with properly (carefully) chosen parameters.
*
*              R1^t * P2 = Q2 * R2
               DO 3003 p = 1, NR
                  IWORK(N+p) = 0
3003          CONTINUE
               CALL DGEQP3( N, NR, V, LDV, IWORK(N+1), WORK(N+1),
     $                  WORK(2*N+1), LWORK-2*N, IERR )
**               CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
**     $              LWORK-2*N, IERR )
               IF ( L2PERT ) THEN
                  XSC = DSQRT(SMALL)
                  DO 3969 p = 2, NR
                     DO 3968 q = 1, p - 1
                        TEMP1 = XSC * DMIN1(DABS(V(p,p)),DABS(V(q,q)))
                        IF ( DABS(V(q,p)) .LE. TEMP1 )
     $                     V(q,p) = DSIGN( TEMP1, V(q,p) )
3968                CONTINUE
3969             CONTINUE
               END IF
*
               CALL DLACPY( 'A', N, NR, V, LDV, WORK(2*N+1), N )
*
               IF ( L2PERT ) THEN
                  XSC = DSQRT(SMALL)
                  DO 8970 p = 2, NR
                     DO 8971 q = 1, p - 1
                        TEMP1 = XSC * DMIN1(DABS(V(p,p)),DABS(V(q,q)))
                        V(p,q) = - DSIGN( TEMP1, V(q,p) )
8971                CONTINUE
8970             CONTINUE
               ELSE
                  CALL DLASET( 'L',NR-1,NR-1,ZERO,ZERO,V(2,1),LDV )
               END IF
*              Now, compute R2 = L3 * Q3, the LQ factorization.
               CALL DGELQF( NR, NR, V, LDV, WORK(2*N+N*NR+1),
     $               WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, IERR )
*              .. and estimate the condition number
               CALL DLACPY( 'L',NR,NR,V,LDV,WORK(2*N+N*NR+NR+1),NR )
               DO 4950 p = 1, NR
                  TEMP1 = DNRM2( p, WORK(2*N+N*NR+NR+p), NR )
                  CALL DSCAL( p, ONE/TEMP1, WORK(2*N+N*NR+NR+p), NR )
4950          CONTINUE
               CALL DPOCON( 'L',NR,WORK(2*N+N*NR+NR+1),NR,ONE,TEMP1,
     $              WORK(2*N+N*NR+NR+NR*NR+1),IWORK(M+2*N+1),IERR )
               CONDR2 = ONE / DSQRT(TEMP1)
*
               IF ( CONDR2 .GE. COND_OK ) THEN
*                 .. save the Householder vectors used for Q3
*                 (this overwrittes the copy of R2, as it will not be
*                 needed in this branch, but it does not overwritte the
*                 Huseholder vectors of Q2.).
                  CALL DLACPY( 'U', NR, NR, V, LDV, WORK(2*N+1), N )
*                 .. and the rest of the information on Q3 is in
*                 WORK(2*N+N*NR+1:2*N+N*NR+N)
               END IF
*
            END IF
*
            IF ( L2PERT ) THEN
               XSC = DSQRT(SMALL)
               DO 4968 q = 2, NR
                  TEMP1 = XSC * V(q,q)
                  DO 4969 p = 1, q - 1
*                    V(p,q) = - DSIGN( TEMP1, V(q,p) )
                     V(p,q) = - DSIGN( TEMP1, V(p,q) )
4969             CONTINUE
4968          CONTINUE
            ELSE
               CALL DLASET( 'U', NR-1,NR-1, ZERO,ZERO, V(1,2), LDV )
            END IF
*
*        Second preconditioning finished; continue with Jacobi SVD
*        The input matrix is lower trinagular.
*
*        Recover the right singular vectors as solution of a well
*        conditioned triangular matrix equation.
*
            IF ( CONDR1 .LT. COND_OK ) THEN
*
               CALL DGESVJ( 'L','U','N',NR,NR,V,LDV,SVA,NR,U,
     $              LDU,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,INFO )
               SCALEM  = WORK(2*N+N*NR+NR+1)
               NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
               DO 3970 p = 1, NR
                  CALL DCOPY( NR, V(1,p), 1, U(1,p), 1 )
                  CALL DSCAL( NR, SVA(p),    V(1,p), 1 )
3970          CONTINUE

*        .. pick the right matrix equation and solve it
*
               IF ( NR .EQ. N ) THEN
* :))             .. best case, R1 is inverted. The solution of this matrix
*                 equation is Q2*V2 = the product of the Jacobi rotations
*                 used in DGESVJ, premultiplied with the orthogonal matrix
*                 from the second QR factorization.
                  CALL DTRSM( 'L','U','N','N', NR,NR,ONE, A,LDA, V,LDV )
               ELSE
*                 .. R1 is well conditioned, but non-square. Transpose(R2)
*                 is inverted to get the product of the Jacobi rotations
*                 used in DGESVJ. The Q-factor from the second QR
*                 factorization is then built in explicitly.
                  CALL DTRSM('L','U','T','N',NR,NR,ONE,WORK(2*N+1),
     $                 N,V,LDV)
                  IF ( NR .LT. N ) THEN
                    CALL DLASET('A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV)
                    CALL DLASET('A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV)
                    CALL DLASET('A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV)
                  END IF
                  CALL DORMQR('L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
     $                 V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR)
               END IF
*
            ELSE IF ( CONDR2 .LT. COND_OK ) THEN
*
* :)           .. the input matrix A is very likely a relative of
*              the Kahan matrix :)
*              The matrix R2 is inverted. The solution of the matrix equation
*              is Q3^T*V3 = the product of the Jacobi rotations (appplied to
*              the lower triangular L3 from the LQ factorization of
*              R2=L3*Q3), pre-multiplied with the transposed Q3.
               CALL DGESVJ( 'L', 'U', 'N', NR, NR, V, LDV, SVA, NR, U,
     $              LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
               SCALEM  = WORK(2*N+N*NR+NR+1)
               NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
               DO 3870 p = 1, NR
                  CALL DCOPY( NR, V(1,p), 1, U(1,p), 1 )
                  CALL DSCAL( NR, SVA(p),    U(1,p), 1 )
3870          CONTINUE
               CALL DTRSM('L','U','N','N',NR,NR,ONE,WORK(2*N+1),N,U,LDU)
*              .. apply the permutation from the second QR factorization
               DO 873 q = 1, NR
                  DO 872 p = 1, NR
                     WORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q)
872              CONTINUE
                  DO 874 p = 1, NR
                     U(p,q) = WORK(2*N+N*NR+NR+p)
874              CONTINUE
873           CONTINUE
               IF ( NR .LT. N ) THEN
                  CALL DLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV )
                  CALL DLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV )
                  CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV )
               END IF
               CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
     $              V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
            ELSE
*              Last line of defense.
* #:(          This is a rather pathological case: no scaled condition
*              improvement after two pivoted QR factorizations. Other
*              possibility is that the rank revealing QR factorization
*              or the condition estimator has failed, or the COND_OK
*              is set very close to ONE (which is unnecessary). Normally,
*              this branch should never be executed, but in rare cases of
*              failure of the RRQR or condition estimator, the last line of
*              defense ensures that DGEJSV completes the task.
*              Compute the full SVD of L3 using DGESVJ with explicit
*              accumulation of Jacobi rotations.
               CALL DGESVJ( 'L', 'U', 'V', NR, NR, V, LDV, SVA, NR, U,
     $              LDU, WORK(2*N+N*NR+NR+1), LWORK-2*N-N*NR-NR, INFO )
               SCALEM  = WORK(2*N+N*NR+NR+1)
               NUMRANK = IDNINT(WORK(2*N+N*NR+NR+2))
               IF ( NR .LT. N ) THEN
                  CALL DLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV )
                  CALL DLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV )
                  CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV )
               END IF
               CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
     $              V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
*
               CALL DORMLQ( 'L', 'T', NR, NR, NR, WORK(2*N+1), N,
     $              WORK(2*N+N*NR+1), U, LDU, WORK(2*N+N*NR+NR+1),
     $              LWORK-2*N-N*NR-NR, IERR )
               DO 773 q = 1, NR
                  DO 772 p = 1, NR
                     WORK(2*N+N*NR+NR+IWORK(N+p)) = U(p,q)
772              CONTINUE
                  DO 774 p = 1, NR
                     U(p,q) = WORK(2*N+N*NR+NR+p)
774              CONTINUE
773           CONTINUE
*
            END IF
*
*           Permute the rows of V using the (column) permutation from the
*           first QRF. Also, scale the columns to make them unit in
*           Euclidean norm. This applies to all cases.
*
            TEMP1 = DSQRT(DBLE(N)) * EPSLN
            DO 1972 q = 1, N
               DO 972 p = 1, N
                  WORK(2*N+N*NR+NR+IWORK(p)) = V(p,q)
  972          CONTINUE
               DO 973 p = 1, N
                  V(p,q) = WORK(2*N+N*NR+NR+p)
  973          CONTINUE
               XSC = ONE / DNRM2( N, V(1,q), 1 )
               IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
     $           CALL DSCAL( N, XSC, V(1,q), 1 )
1972       CONTINUE
*           At this moment, V contains the right singular vectors of A.
*           Next, assemble the left singular vector matrix U (M x N).
            IF ( NR .LT. M ) THEN
               CALL DLASET( 'A', M-NR, NR, ZERO, ZERO, U(NR+1,1), LDU )
               IF ( NR .LT. N1 ) THEN
                  CALL DLASET('A',NR,N1-NR,ZERO,ZERO,U(1,NR+1),LDU)
                  CALL DLASET('A',M-NR,N1-NR,ZERO,ONE,U(NR+1,NR+1),LDU)
               END IF
            END IF
*
*           The Q matrix from the first QRF is built into the left singular
*           matrix U. This applies to all cases.
*
            CALL DORMQR( 'Left', 'No_Tr', M, N1, N, A, LDA, WORK, U,
     $           LDU, WORK(N+1), LWORK-N, IERR )

*           The columns of U are normalized. The cost is O(M*N) flops.
            TEMP1 = DSQRT(DBLE(M)) * EPSLN
            DO 1973 p = 1, NR
               XSC = ONE / DNRM2( M, U(1,p), 1 )
               IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
     $          CALL DSCAL( M, XSC, U(1,p), 1 )
1973       CONTINUE
*
*           If the initial QRF is computed with row pivoting, the left
*           singular vectors must be adjusted.
*
            IF ( ROWPIV )
     $          CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
*
         ELSE
*
*        .. the initial matrix A has almost orthogonal columns and
*        the second QRF is not needed
*
            CALL DLACPY( 'Upper', N, N, A, LDA, WORK(N+1), N )
            IF ( L2PERT ) THEN
               XSC = DSQRT(SMALL)
               DO 5970 p = 2, N
                  TEMP1 = XSC * WORK( N + (p-1)*N + p )
                  DO 5971 q = 1, p - 1
                     WORK(N+(q-1)*N+p)=-DSIGN(TEMP1,WORK(N+(p-1)*N+q))
5971             CONTINUE
5970          CONTINUE
            ELSE
               CALL DLASET( 'Lower',N-1,N-1,ZERO,ZERO,WORK(N+2),N )
            END IF
*
            CALL DGESVJ( 'Upper', 'U', 'N', N, N, WORK(N+1), N, SVA,
     $           N, U, LDU, WORK(N+N*N+1), LWORK-N-N*N, INFO )
*
            SCALEM  = WORK(N+N*N+1)
            NUMRANK = IDNINT(WORK(N+N*N+2))
            DO 6970 p = 1, N
               CALL DCOPY( N, WORK(N+(p-1)*N+1), 1, U(1,p), 1 )
               CALL DSCAL( N, SVA(p), WORK(N+(p-1)*N+1), 1 )
6970       CONTINUE
*
            CALL DTRSM( 'Left', 'Upper', 'NoTrans', 'No UD', N, N,
     $           ONE, A, LDA, WORK(N+1), N )
            DO 6972 p = 1, N
               CALL DCOPY( N, WORK(N+p), N, V(IWORK(p),1), LDV )
6972       CONTINUE
            TEMP1 = DSQRT(DBLE(N))*EPSLN
            DO 6971 p = 1, N
               XSC = ONE / DNRM2( N, V(1,p), 1 )
               IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
     $            CALL DSCAL( N, XSC, V(1,p), 1 )
6971       CONTINUE
*
*           Assemble the left singular vector matrix U (M x N).
*
            IF ( N .LT. M ) THEN
               CALL DLASET( 'A',  M-N, N, ZERO, ZERO, U(N+1,1), LDU )
               IF ( N .LT. N1 ) THEN
                  CALL DLASET( 'A',N,  N1-N, ZERO, ZERO,  U(1,N+1),LDU )
                  CALL DLASET( 'A',M-N,N1-N, ZERO, ONE,U(N+1,N+1),LDU )
               END IF
            END IF
            CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
     $           LDU, WORK(N+1), LWORK-N, IERR )
            TEMP1 = DSQRT(DBLE(M))*EPSLN
            DO 6973 p = 1, N1
               XSC = ONE / DNRM2( M, U(1,p), 1 )
               IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
     $            CALL DSCAL( M, XSC, U(1,p), 1 )
6973       CONTINUE
*
            IF ( ROWPIV )
     $         CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
*
         END IF
*
*        end of the  >> almost orthogonal case <<  in the full SVD
*
         ELSE
*
*        This branch deploys a preconditioned Jacobi SVD with explicitly
*        accumulated rotations. It is included as optional, mainly for
*        experimental purposes. It does perfom well, and can also be used.
*        In this implementation, this branch will be automatically activated
*        if the  condition number sigma_max(A) / sigma_min(A) is predicted
*        to be greater than the overflow threshold. This is because the
*        a posteriori computation of the singular vectors assumes robust
*        implementation of BLAS and some LAPACK procedures, capable of working
*        in presence of extreme values. Since that is not always the case, ...
*
         DO 7968 p = 1, NR
            CALL DCOPY( N-p+1, A(p,p), LDA, V(p,p), 1 )
7968    CONTINUE
*
         IF ( L2PERT ) THEN
            XSC = DSQRT(SMALL/EPSLN)
            DO 5969 q = 1, NR
               TEMP1 = XSC*DABS( V(q,q) )
               DO 5968 p = 1, N
                  IF ( ( p .GT. q ) .AND. ( DABS(V(p,q)) .LE. TEMP1 )
     $                .OR. ( p .LT. q ) )
     $                V(p,q) = DSIGN( TEMP1, V(p,q) )
                  IF ( p .LT. q ) V(p,q) = - V(p,q)
5968          CONTINUE
5969       CONTINUE
         ELSE
            CALL DLASET( 'U', NR-1, NR-1, ZERO, ZERO, V(1,2), LDV )
         END IF

         CALL DGEQRF( N, NR, V, LDV, WORK(N+1), WORK(2*N+1),
     $        LWORK-2*N, IERR )
         CALL DLACPY( 'L', N, NR, V, LDV, WORK(2*N+1), N )
*
         DO 7969 p = 1, NR
            CALL DCOPY( NR-p+1, V(p,p), LDV, U(p,p), 1 )
7969    CONTINUE

         IF ( L2PERT ) THEN
            XSC = DSQRT(SMALL/EPSLN)
            DO 9970 q = 2, NR
               DO 9971 p = 1, q - 1
                  TEMP1 = XSC * DMIN1(DABS(U(p,p)),DABS(U(q,q)))
                  U(p,q) = - DSIGN( TEMP1, U(q,p) )
9971          CONTINUE
9970       CONTINUE
         ELSE
            CALL DLASET('U', NR-1, NR-1, ZERO, ZERO, U(1,2), LDU )
         END IF

         CALL DGESVJ( 'G', 'U', 'V', NR, NR, U, LDU, SVA,
     $        N, V, LDV, WORK(2*N+N*NR+1), LWORK-2*N-N*NR, INFO )
         SCALEM  = WORK(2*N+N*NR+1)
         NUMRANK = IDNINT(WORK(2*N+N*NR+2))

         IF ( NR .LT. N ) THEN
            CALL DLASET( 'A',N-NR,NR,ZERO,ZERO,V(NR+1,1),LDV )
            CALL DLASET( 'A',NR,N-NR,ZERO,ZERO,V(1,NR+1),LDV )
            CALL DLASET( 'A',N-NR,N-NR,ZERO,ONE,V(NR+1,NR+1),LDV )
         END IF

         CALL DORMQR( 'L','N',N,N,NR,WORK(2*N+1),N,WORK(N+1),
     $        V,LDV,WORK(2*N+N*NR+NR+1),LWORK-2*N-N*NR-NR,IERR )
*
*           Permute the rows of V using the (column) permutation from the
*           first QRF. Also, scale the columns to make them unit in
*           Euclidean norm. This applies to all cases.
*
            TEMP1 = DSQRT(DBLE(N)) * EPSLN
            DO 7972 q = 1, N
               DO 8972 p = 1, N
                  WORK(2*N+N*NR+NR+IWORK(p)) = V(p,q)
8972          CONTINUE
               DO 8973 p = 1, N
                  V(p,q) = WORK(2*N+N*NR+NR+p)
8973          CONTINUE
               XSC = ONE / DNRM2( N, V(1,q), 1 )
               IF ( (XSC .LT. (ONE-TEMP1)) .OR. (XSC .GT. (ONE+TEMP1)) )
     $           CALL DSCAL( N, XSC, V(1,q), 1 )
7972       CONTINUE
*
*           At this moment, V contains the right singular vectors of A.
*           Next, assemble the left singular vector matrix U (M x N).
*
         IF ( NR .LT. M ) THEN
            CALL DLASET( 'A',  M-NR, NR, ZERO, ZERO, U(NR+1,1), LDU )
            IF ( NR .LT. N1 ) THEN
               CALL DLASET( 'A',NR,  N1-NR, ZERO, ZERO,  U(1,NR+1),LDU )
               CALL DLASET( 'A',M-NR,N1-NR, ZERO, ONE,U(NR+1,NR+1),LDU )
            END IF
         END IF
*
         CALL DORMQR( 'Left', 'No Tr', M, N1, N, A, LDA, WORK, U,
     $        LDU, WORK(N+1), LWORK-N, IERR )
*
            IF ( ROWPIV )
     $         CALL DLASWP( N1, U, LDU, 1, M-1, IWORK(2*N+1), -1 )
*
*
         END IF
         IF ( TRANSP ) THEN
*           .. swap U and V because the procedure worked on A^t
            DO 6974 p = 1, N
               CALL DSWAP( N, U(1,p), 1, V(1,p), 1 )
6974       CONTINUE
         END IF
*
      END IF
*     end of the full SVD
*
*     Undo scaling, if necessary (and possible)
*
      IF ( USCAL2 .LE. (BIG/SVA(1))*USCAL1 ) THEN
         CALL DLASCL( 'G', 0, 0, USCAL1, USCAL2, NR, 1, SVA, N, IERR )
         USCAL1 = ONE
         USCAL2 = ONE
      END IF
*
      IF ( NR .LT. N ) THEN
         DO 3004 p = NR+1, N
            SVA(p) = ZERO
3004    CONTINUE
      END IF
*
      WORK(1) = USCAL2 * SCALEM
      WORK(2) = USCAL1
      IF ( ERREST ) WORK(3) = SCONDA
      IF ( LSVEC .AND. RSVEC ) THEN
         WORK(4) = CONDR1
         WORK(5) = CONDR2
      END IF
      IF ( L2TRAN ) THEN
         WORK(6) = ENTRA
         WORK(7) = ENTRAT
      END IF
*
      IWORK(1) = NR
      IWORK(2) = NUMRANK
      IWORK(3) = WARNING
*
      RETURN
*     ..
*     .. END OF DGEJSV
*     ..
      END
*