-
Uwe Jandt (DESY, HIFIS) authoredUwe Jandt (DESY, HIFIS) authored
plot.plt 6.58 KiB
#!/bin/gnuplot
# Check if the separator is a "," or ";". We want to have at least two columns, so we'll test:
sep=","
set datafile separator sep
stats 'data_plot.csv' nooutput
# If we detected too few columns, try another separator.
if (STATS_columns<=1) {
sep=";"
set datafile separator sep
stats 'data_plot.csv' nooutput
}
print "Detected ".STATS_columns." columns."
if (STATS_columns<=1) {
print "Detected too few columns. Exiting."
exit
}
set yrange [0:*]
set xdata time
set format x "%Y-%m-%d"
# There are chaotically many ways to encode date/time so far. We presume a standard format, but allow to override the format via timefmt.txt
set timefmt "%Y-%m-%d %H:%M"
if (system("[ ! -f timefmt.txt ]; echo $?")) {
load "timefmt.txt"
}
set xrange ["2020-01-01":"2022-04-02"]
set xtics rotate by 45
set xtics textcolor rgbcolor "black" offset -4,-3
set xtics '2020-01-01 00:00',365*24*3600
set mxtics 12
set grid xtics nomxtics ytics
set key top left
set linetype 2 dashtype 2
# set some coordinates for diagram formatting. Can also be used to draw multiple aligned diagrams in one canvas, if wanted (multiplot)
MP_LEFT = .1
MP_RIGHT = .95
MP_BOTTOM = .14
MP_TOP = .93
MP_xGAP = 0.1
MP_yGAP = 0.02
# fitting params
set fit errorscaling errorvariables
FIT_LIMIT=1e-15 # this should help to fit even very degenerate plots
FIT_MAXITER=1000
# fitting function
susage(x) = m*x + y0
# Plot all columns: One file for each column.
filename='data_plot.csv'
# since replot does not work reliably, we repeat the whole plot sequence
# explicitly for every output format wanted.
# log to one file per service
set print "susage.csv"
do for [i=2:STATS_columns] {
# starting parameters for each fit
y0=100
m=1
# fit slope for data points at least from 2020 (to exclude any possible data points from 2019 or before)
# for annual reporting 2021: AND fit only for data that is <=2021
fmt = "%Y-%m-%d"
set xdata time
set timefmt fmt
# reporting date (end of measurement period)
T_report="2021-12-31" # hard-coded for every report
# starting dates.
T_logstart=system("cat ".filename." | tr -d '\r' | awk 'BEGIN{FS=\"".sep."\"}{if (NR>1 && NF>1) {print $1; exit;}}'")
# Onboarding date can be no earlier than the start of the pilot Helmholtz Cloud, i.e. 2021-03-29.
# https://hifis.net/news/2021/03/29/helmholtz-cloud-beta
T_onboard="2021-03-29"
# if there is a specific file determining another T_onboard for that service, defined manually, use that instead:
if (system("[ ! -f T_onboard.txt ]; echo $?")) {
load "T_onboard.txt"
}
if (strptime(fmt, T_onboard) > strptime(fmt, T_logstart)) {
T_fitstart_overall=T_onboard
} else {
T_fitstart_overall=T_logstart
}
# Do the fitting
fit [strptime(fmt, T_fitstart_overall):strptime(fmt, T_report)] susage(x) filename u 1:i via m,y0
# Later: Yearly procedure for yearly reports, not defined for 2021 annual report yet.
# Will take care of this later for reporting 2023 ff.
# But we need a placeholder here.
T_fitstart_yearly="N/A"
T_reference_yearly="N/A"
# Now we have a fit and can decide where to put the T_reference_overall.
# Per default, T_reference_overall should be equal to T_fitstart_overall.
# Exceptions MAY be that the KPI at this date is very low (close to zero), such that a division by this KPI makes no sense.
# In that case, a separate date MAY be defined by Cloud Cluster Management in T_reference_overall.txt.
T_reference_overall=T_fitstart_overall
if (system("[ ! -f T_reference_overall.txt ]; echo $?")) {
load "T_reference_overall.txt"
}
# Using T_reference_overall and T_report, the corresponding KPIs at these dates are calculated from the fit.
KPI_e=susage(strptime(fmt, T_report))
KPI_a=susage(strptime(fmt, T_reference_overall))
# Compute the raw dollar value
kpi_raw=KPI_e/KPI_a-1
# check if we actually want to consider this value (service was on board long enough, i.e. >=90 days)
if ((strptime(fmt, T_report)-strptime(fmt, T_fitstart_overall)) >= (90.*3600.*24.)) {
kpi_raw_title=sprintf("%.3g - subject to weighting.", kpi_raw);
} else {
kpi_raw_title=sprintf("%.3g - NOT CONSIDERED: less than 90 days data available.", kpi_raw);
kpi_raw=0;
}
# Extract KPI name. Seemingly, the header name of the column cannot be directly derived with gnuplot,
# instead we have to circle via shell and pipes (urgh)
kpi_name=system("head -n 1 ".filename." | tr -d '\r' | awk 'BEGIN{FS=\"".sep."\"}{print $".i."}'")
# Printing to table
logdata=sprintf("%s,%d,%s,%g,%g,%g,%g,,%s,%s,%s,%s,%s,%s,%s,%g,%g,%g\n",servicename,i,kpi_name,m,m_err,y0,y0_err,T_logstart,T_onboard,T_fitstart_overall,T_reference_overall,T_fitstart_yearly,T_reference_yearly,T_report,KPI_a,KPI_e,kpi_raw)
print logdata
# Plotting (once to pdf, once to svg)
do for [IDX = 0:1] {
if (IDX==0) {
set terminal pdf color noenhanced size 7,5 dashed
set output 'plot_'.i.'.pdf'
} else {
set terminal svg noenhanced size 800,600
set output 'plot_'.i.'.svg'
}
set multiplot layout 1,1 columnsfirst margins screen MP_LEFT, MP_RIGHT, MP_BOTTOM, MP_TOP spacing screen MP_xGAP, MP_yGAP
set title servicename
# print linear fitting variables and errors
set label sprintf("m=%.2g +- %.3g (%.3g %%),\ny0=%.2g +- %.3g (%.3g %%)\nT_reference_overall=%s, T_report=%s\nkpi-raw=%s",m,m_err,m_err/m*100,y0,y0_err,y0_err/y0*100,T_reference_overall,T_report,kpi_raw_title) left at graph 0.1, graph 0.7
plot filename u 1:i w lp pt 7 ps 0.25 lc black title columnheader ,\
(x>=strptime(fmt, T_fitstart_overall) && x<= strptime(fmt, T_report)) ? susage(x) : 1/0 w l linewidth 4 lc "#55bb55" notitle ,\
'+' using (T_onboard):(susage(strptime(fmt, T_onboard))) w p pt 8 ps 1.5 lw 3 lc "#000000" title 'T_onboard' ,\
'+' using (T_logstart):(susage(strptime(fmt, T_logstart))) w p pt 2 ps 1.5 lw 3 lc "#888888" title 'T_logstart' ,\
'+' using (T_fitstart_overall):(susage(strptime(fmt, T_fitstart_overall))) w p pt 6 ps 1.5 lw 3 lc "#00bb00" title 'T_fitstart_overall' ,\
'+' using (T_reference_overall):(susage(strptime(fmt, T_reference_overall))) w p pt 4 ps 1.5 lw 3 lc "#0000bb" title 'T_reference_overall' ,\
'+' using (T_report):(susage(strptime(fmt, T_report))) w p pt 10 ps 1.5 lw 3 lc "#ff0000" title 'T_report'
unset multiplot
unset label
}
}