Skip to content
Snippets Groups Projects
plot.plt 6.58 KiB
#!/bin/gnuplot

# Check if the separator is a "," or ";". We want to have at least two columns, so we'll test:
sep=","
set datafile separator sep
stats 'data_plot.csv' nooutput
# If we detected too few columns, try another separator.
if (STATS_columns<=1) {
  sep=";"
  set datafile separator sep
  stats 'data_plot.csv' nooutput
}

print "Detected ".STATS_columns." columns."

if (STATS_columns<=1) {
  print "Detected too few columns. Exiting."
  exit
}

set yrange [0:*]
set xdata time
set format x "%Y-%m-%d"

# There are chaotically many ways to encode date/time so far. We presume a standard format, but allow to override the format via timefmt.txt
set timefmt "%Y-%m-%d %H:%M"
if (system("[ ! -f timefmt.txt ]; echo $?")) {
    load "timefmt.txt"
}

set xrange ["2020-01-01":"2022-04-02"]
set xtics rotate by 45
set xtics textcolor rgbcolor "black" offset -4,-3

set  xtics '2020-01-01 00:00',365*24*3600
set mxtics 12
set grid xtics nomxtics ytics

set key top left
set linetype 2 dashtype 2


# set some coordinates for diagram formatting. Can also be used to draw multiple aligned diagrams in one canvas, if wanted (multiplot)
MP_LEFT = .1
MP_RIGHT = .95
MP_BOTTOM = .14
MP_TOP = .93
MP_xGAP = 0.1
MP_yGAP = 0.02


# fitting params
set fit errorscaling errorvariables
FIT_LIMIT=1e-15     # this should help to fit even very degenerate plots
FIT_MAXITER=1000

# fitting function
susage(x) = m*x + y0

# Plot all columns: One file for each column.

filename='data_plot.csv'
# since replot does not work reliably, we repeat the whole plot sequence
# explicitly for every output format wanted.

# log to one file per service
set print "susage.csv"

do for [i=2:STATS_columns] {
    # starting parameters for each fit
    y0=100
    m=1

    # fit slope for data points at least from 2020 (to exclude any possible data points from 2019 or before)
    # for annual reporting 2021: AND fit only for data that is <=2021

    fmt = "%Y-%m-%d"
    set xdata time
    set timefmt fmt

    # reporting date (end of measurement period)
    T_report="2021-12-31"   # hard-coded for every report

    # starting dates.
    T_logstart=system("cat ".filename." | tr -d '\r' | awk 'BEGIN{FS=\"".sep."\"}{if (NR>1 && NF>1) {print $1; exit;}}'")


    # Onboarding date can be no earlier than the start of the pilot Helmholtz Cloud, i.e. 2021-03-29.
    # https://hifis.net/news/2021/03/29/helmholtz-cloud-beta

    T_onboard="2021-03-29"
    # if there is a specific file determining another T_onboard for that service, defined manually, use that instead:
    if (system("[ ! -f T_onboard.txt ]; echo $?")) {
        load "T_onboard.txt"
    }

    if (strptime(fmt, T_onboard) > strptime(fmt, T_logstart)) {
        T_fitstart_overall=T_onboard
    } else {
        T_fitstart_overall=T_logstart
    }

    # Do the fitting
    fit [strptime(fmt, T_fitstart_overall):strptime(fmt, T_report)] susage(x) filename u 1:i via m,y0


    # Later: Yearly procedure for yearly reports, not defined for 2021 annual report yet.
    # Will take care of this later for reporting 2023 ff.
    # But we need a placeholder here.
    T_fitstart_yearly="N/A"
    T_reference_yearly="N/A"

    # Now we have a fit and can decide where to put the T_reference_overall.
    # Per default, T_reference_overall should be equal to T_fitstart_overall.
    # Exceptions MAY be that the KPI at this date is very low (close to zero), such that a division by this KPI makes no sense.
    # In that case, a separate date MAY be defined by Cloud Cluster Management in T_reference_overall.txt.
    T_reference_overall=T_fitstart_overall
    if (system("[ ! -f T_reference_overall.txt ]; echo $?")) {
        load "T_reference_overall.txt"
    }

    # Using T_reference_overall and T_report, the corresponding KPIs at these dates are calculated from the fit.
    KPI_e=susage(strptime(fmt, T_report))
    KPI_a=susage(strptime(fmt, T_reference_overall))

    # Compute the raw dollar value
    kpi_raw=KPI_e/KPI_a-1

    # check if we actually want to consider this value (service was on board long enough, i.e. >=90 days)
    if ((strptime(fmt, T_report)-strptime(fmt, T_fitstart_overall)) >= (90.*3600.*24.)) {
        kpi_raw_title=sprintf("%.3g - subject to weighting.", kpi_raw);
    } else {
        kpi_raw_title=sprintf("%.3g - NOT CONSIDERED: less than 90 days data available.", kpi_raw);
        kpi_raw=0;
    }


    # Extract KPI name. Seemingly, the header name of the column cannot be directly derived with gnuplot,
    # instead we have to circle via shell and pipes (urgh)
    kpi_name=system("head -n 1 ".filename." | tr -d '\r' | awk 'BEGIN{FS=\"".sep."\"}{print $".i."}'")

    # Printing to table
    logdata=sprintf("%s,%d,%s,%g,%g,%g,%g,,%s,%s,%s,%s,%s,%s,%s,%g,%g,%g\n",servicename,i,kpi_name,m,m_err,y0,y0_err,T_logstart,T_onboard,T_fitstart_overall,T_reference_overall,T_fitstart_yearly,T_reference_yearly,T_report,KPI_a,KPI_e,kpi_raw)
    print logdata

    # Plotting (once to pdf, once to svg)
    do for [IDX = 0:1] {
        if (IDX==0) {
            set terminal pdf  color noenhanced size 7,5 dashed
            set output 'plot_'.i.'.pdf'
        } else {
            set terminal svg noenhanced size 800,600
            set output 'plot_'.i.'.svg'
        }

        set multiplot layout 1,1 columnsfirst margins screen MP_LEFT, MP_RIGHT, MP_BOTTOM, MP_TOP spacing screen MP_xGAP, MP_yGAP
        set title servicename

        # print linear fitting variables and errors
        set label sprintf("m=%.2g +- %.3g (%.3g %%),\ny0=%.2g +- %.3g (%.3g %%)\nT_reference_overall=%s, T_report=%s\nkpi-raw=%s",m,m_err,m_err/m*100,y0,y0_err,y0_err/y0*100,T_reference_overall,T_report,kpi_raw_title) left at graph 0.1, graph 0.7

        plot filename u 1:i w lp pt 7 ps 0.25 lc black title columnheader ,\
             (x>=strptime(fmt, T_fitstart_overall) && x<= strptime(fmt, T_report)) ? susage(x) : 1/0 w l linewidth 4 lc "#55bb55" notitle ,\
             '+' using (T_onboard):(susage(strptime(fmt, T_onboard)))                     w p pt 8  ps 1.5 lw 3 lc "#000000" title 'T_onboard' ,\
             '+' using (T_logstart):(susage(strptime(fmt, T_logstart)))                   w p pt 2  ps 1.5 lw 3 lc "#888888" title 'T_logstart' ,\
             '+' using (T_fitstart_overall):(susage(strptime(fmt, T_fitstart_overall)))   w p pt 6  ps 1.5 lw 3 lc "#00bb00" title 'T_fitstart_overall' ,\
             '+' using (T_reference_overall):(susage(strptime(fmt, T_reference_overall))) w p pt 4  ps 1.5 lw 3 lc "#0000bb" title 'T_reference_overall' ,\
             '+' using (T_report):(susage(strptime(fmt, T_report)))                       w p pt 10 ps 1.5 lw 3 lc "#ff0000" title 'T_report'

        unset multiplot
        unset label
    }
}