Possible Solution

#include <stdio.h>          // for printf()
#include <stdlib.h>         // for malloc(), free(), rand(), srand()
#include <stddef.h>         // for size_t, ptrdiff_t
#include <stdbool.h>        // for typedef bool

//-- print matrix --------------------------------------------------------------

// general matrix
void
printDGeMatrix(size_t m, size_t n,
               const double *A,
               ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            printf("%9.2lf ", A[i*incRowA + j*incColA]);
        }
        printf("\n");
    }
    printf("\n");
}

// trapezoidal matrix
void
printDTrMatrix(size_t m, size_t n, bool lower, bool unit,
               const double *A,
               ptrdiff_t incRowA, ptrdiff_t incColA)
{
    for (size_t i=0; i<m; ++i) {
        for (size_t j=0; j<n; ++j) {
            if (i==j) {
                printf("%9.2lf ", unit ? 1 : A[i*incRowA + j*incColA]);
                continue;
            }
            if ((lower && i>j) || (!lower && i<j)) {
                printf("%9.2lf ", A[i*incRowA + j*incColA]);
            } else {
                printf("%9.2lf ", 0.0);
            }
        }
        printf("\n");
    }
    printf("\n");
}

//-- required BLAS functions: daxpy, ddot, dtrsv -------------------------------

void
daxpy(size_t n, double alpha,
      const double *x, ptrdiff_t incX,
      double *y, ptrdiff_t incY)
{
    for (size_t i=0; i<n; ++i) {
        y[i*incY] += alpha*x[i*incX];
    }
}

double
ddot(size_t n,
     const double *x, ptrdiff_t incX,
     const double *y, ptrdiff_t incY)
{
    double result = 0;

    for (size_t i=0; i<n; ++i) {
        result += x[i*incX]*y[i*incY];
    }

    return result;
}

void
dtrsv(size_t n, bool lower, bool unit,
      const double *A, ptrdiff_t incRowA, ptrdiff_t incColA,
      double *x, ptrdiff_t incX)
{
    if (lower) {
        if (incRowA<incColA) {
            // A is col major
            for (size_t j=0; j<n; ++j) {
                if (!unit) {
                    x[j*incX] /= A[j*(incRowA+incColA)];
                }
                daxpy(n-1-j, -x[j*incX], &A[(j+1)*incRowA+j*incColA], incRowA,
                      &x[(j+1)*incX], incX);
            }
        } else {
            // A is row major
            for (size_t j=0; j<n; ++j) {
                x[j*incX] -= ddot(j, &A[j*incRowA], incColA, x, incX);
                if (!unit) {
                    x[j*incX] /= A[j*(incRowA+incColA)];
                }
            }
        }
    } else {
        if (incRowA<incColA) {
            // A is col major
            for (size_t j=n; j-- >0; ) {
                if (!unit) {
                    x[j*incX] /= A[j*(incRowA+incColA)];
                }
                daxpy(j, -x[j*incX], &A[j*incColA], incRowA,
                      x, incX);
            }
        } else {
            // A is row major
            for (size_t j=n; j-- >0; ) {
                x[j*incX] -= ddot(n-1-j,
                                  &A[j*incRowA+(j+1)*incColA], incColA,
                                  &x[(j+1)*incX], incX);
                if (!unit) {
                    x[j*incX] /= A[j*(incRowA+incColA)];
                }
            }
        }
    }
}

//-- simple test program -------------------------------------------------------

#ifndef COLMAJOR
#define COLMAJOR 1
#endif

int
main()
{
    printf("COLMAJOR = %d\n", COLMAJOR);

    size_t      n       = 3;
    ptrdiff_t   incRowA = COLMAJOR ? 1 : n+1;
    ptrdiff_t   incColA = COLMAJOR ? n : 1;
    double      *A      = malloc(n*(n+1)*sizeof(double));

    if (!A) {
        abort();
    }

    // init A handish
    A[0*incRowA + 0*incColA] =  4;
    A[0*incRowA + 1*incColA] =  5;
    A[0*incRowA + 2*incColA] =  6;
    A[0*incRowA + 3*incColA] = 32;

    A[1*incRowA + 0*incColA] =  1./4;
    A[1*incRowA + 1*incColA] =  3./4;
    A[1*incRowA + 2*incColA] =  3./2;
    A[1*incRowA + 3*incColA] =  14;

    A[2*incRowA + 0*incColA] =  7./4;
    A[2*incRowA + 1*incColA] = -1;
    A[2*incRowA + 2*incColA] = -8;
    A[2*incRowA + 3*incColA] =  26;

    printf("A =\n");
    printDGeMatrix(n, n+1, A, incRowA, incColA);

    printf("b =\n");
    printDGeMatrix(1, n, &A[n*incColA], 0, incRowA);

    printf("print A as lower unit trapezoidal matrix:\n");
    printf("L =\n");
    printDTrMatrix(n, n, true, true, A, incRowA, incColA);

    printf("print A as upper trapezoidal matrix:\n");
    printf("U =\n");
    printDTrMatrix(n, n, false, false, A, incRowA, incColA);

    //-- Solve L*y = b
    dtrsv(n, true, true, A, incRowA, incColA, &A[n*incColA], incRowA);

    printf("y =\n");
    printDGeMatrix(1, n, &A[n*incColA], 0, incRowA);

    //-- Solve U*x = y
    dtrsv(n, false, false, A, incRowA, incColA, &A[n*incColA], incRowA);

    printf("x =\n");
    printDGeMatrix(1, n, &A[n*incColA], 0, incRowA);

    free(A);
}

Test for col major

thales$ gcc -Wall -std=c11 -o simple_dtrsv simple_dtrsv.c
thales$ ./simple_dtrsv
COLMAJOR = 1
A =
     4.00      5.00      6.00     32.00 
     0.25      0.75      1.50     14.00 
     1.75     -1.00     -8.00     26.00 

b =
    32.00     14.00     26.00 

print A as lower unit trapezoidal matrix:
L =
     1.00      0.00      0.00 
     0.25      1.00      0.00 
     1.75     -1.00      1.00 

print A as upper trapezoidal matrix:
U =
     4.00      5.00      6.00 
     0.00      0.75      1.50 
     0.00      0.00     -8.00 

y =
    32.00      6.00    -24.00 

x =
     1.00      2.00      3.00 
thales$ 

Test for row major

thales$ gcc -Wall -std=c11 -DCOLMAJOR=0 -o simple_dtrsv simple_dtrsv.c
thales$ ./simple_dtrsv
COLMAJOR = 0
A =
     4.00      5.00      6.00     32.00 
     0.25      0.75      1.50     14.00 
     1.75     -1.00     -8.00     26.00 

b =
    32.00     14.00     26.00 

print A as lower unit trapezoidal matrix:
L =
     1.00      0.00      0.00 
     0.25      1.00      0.00 
     1.75     -1.00      1.00 

print A as upper trapezoidal matrix:
U =
     4.00      5.00      6.00 
     0.00      0.75      1.50 
     0.00      0.00     -8.00 

y =
    32.00      6.00    -24.00 

x =
     1.00      2.00      3.00 
thales$