##
##  Copyright 2013-2024, Seqera Labs
##
##  Licensed under the Apache License, Version 2.0 (the "License");
##  you may not use this file except in compliance with the License.
##  You may obtain a copy of the License at
##
##      http://www.apache.org/licenses/LICENSE-2.0
##
##  Unless required by applicable law or agreed to in writing, software
##  distributed under the License is distributed on an "AS IS" BASIS,
##  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
##  See the License for the specific language governing permissions and
##  limitations under the License.
##
nxf_tree() {
    local pid=$1

    declare -a ALL_CHILDREN
    while read P PP;do
        ALL_CHILDREN[$PP]+=" $P"
    done < <(ps -e -o pid= -o ppid=)

    pstat() {
        local x_pid=$1
        local STATUS=$(2> /dev/null < /proc/$1/status grep -E 'Vm|ctxt')

        if [ $? = 0 ]; then
        local  x_vsz=$(echo "$STATUS" | grep VmSize | awk '{print $2}' || echo -n '0')
        local  x_rss=$(echo "$STATUS" | grep VmRSS | awk '{print $2}' || echo -n '0')
        local x_peak=$(echo "$STATUS" | grep -E 'VmPeak|VmHWM' | sed 's/^.*:\s*//' | sed 's/[\sa-zA-Z]*$//' | tr '\n' ' ' || echo -n '0 0')
        local x_pmem=$(awk -v rss=$x_rss -v mem_tot=$mem_tot 'BEGIN {printf "%.0f", rss/mem_tot*100*10}' || echo -n '0')
        local vol_ctxt=$(echo "$STATUS" | grep '\bvoluntary_ctxt_switches' | awk '{print $2}' || echo -n '0')
        local inv_ctxt=$(echo "$STATUS" | grep '\bnonvoluntary_ctxt_switches' | awk '{print $2}' || echo -n '0')
        ## record struct: pid - %mem - vmem - rss - peak_vmem - peak_rss - voluntary_ctxt_switches - nonvoluntary_ctxt_switches
        cpu_stat[x_pid]="$x_pid $x_pmem $x_vsz $x_rss $x_peak $vol_ctxt $inv_ctxt"
        fi
    }

    pwalk() {
        pstat $1
        for i in ${ALL_CHILDREN[$1]:=}; do pwalk $i; done
    }

    pwalk $1
}

nxf_stat() {
    cpu_stat=()
    nxf_tree $1

    declare -a sum=(0 0 0 0 0 0 0 0)
    local pid
    local i
    for pid in "${!cpu_stat[@]}"; do
        local row=(${cpu_stat[pid]})
        [ $NXF_DEBUG = 1 ] && echo "++ stat mem=${row[*]}"
        ## aggregate metrics for all procs
        for i in "${!row[@]}"; do
        if [ $i != 0 ]; then
            sum[i]=$((sum[i]+row[i]))
        fi
        done
    done

    [ $NXF_DEBUG = 1 ] && echo -e "++ stat SUM=${sum[*]}"

    ## calc peaks
    for i in {1..7}; do
        if [ ${sum[i]} -lt ${cpu_peak[i]} ]; then
            sum[i]=${cpu_peak[i]}
        else
            cpu_peak[i]=${sum[i]}
        fi
    done

    [ $NXF_DEBUG = 1 ] && echo -e "++ stat PEAK=${sum[*]}\n"
    nxf_stat_ret=(${sum[*]})
}

nxf_mem_watch() {
    set -o pipefail
    local pid=$1
    local trace_file={{trace_file}}
    local count=0;
    declare -a cpu_stat=(0 0 0 0 0 0 0 0)
    declare -a cpu_peak=(0 0 0 0 0 0 0 0)
    local mem_tot=$(< /proc/meminfo grep MemTotal | awk '{print $2}')
    local timeout
    local DONE
    local STOP=''

    [ $NXF_DEBUG = 1 ] && nxf_sleep 0.2 && ps fx

    while true; do
        nxf_stat $pid
        ## compute time
        if [ $count -lt 10 ]; then timeout=1;
        elif [ $count -lt 120 ]; then timeout=5;
        else timeout=30;
        fi
        ## wait for termination message
        read -t $timeout -r DONE || true
        ## make sure to not enter an infinite loop
        [[ $DONE ]] && break
        ## this likely could be removed since the process is killed
        ## by the trap on error condition introduced by the patch for #1344
        ## keeping as extra check to prevent hanging
        if [ ! -e /proc/$pid ]; then
            [ ! $STOP ] && STOP=$(nxf_date)
            [ $(($(nxf_date)-STOP)) -gt 10000 ] && break
        fi
        count=$((count+1))
    done

    ## result struct: pid %mem vmem rss peak_vmem peak_rss
    printf "%s\n" \
        "%mem=${nxf_stat_ret[1]}" \
        "vmem=${nxf_stat_ret[2]}" \
        "rss=${nxf_stat_ret[3]}" \
        "peak_vmem=${nxf_stat_ret[4]}" \
        "peak_rss=${nxf_stat_ret[5]}" \
        "vol_ctxt=${nxf_stat_ret[6]}" \
        "inv_ctxt=${nxf_stat_ret[7]}" >> "$trace_file" || >&2 echo "Error: Failed to append to file: $trace_file"
}

nxf_write_trace() {
    printf "%s\n" \
        "nextflow.trace/v2" \
        "realtime=$wall_time" \
        "%cpu=$ucpu" \
        "cpu_model=$cpu_model" \
        "rchar=${io_stat1[0]}" \
        "wchar=${io_stat1[1]}" \
        "syscr=${io_stat1[2]}" \
        "syscw=${io_stat1[3]}" \
        "read_bytes=${io_stat1[4]}" \
        "write_bytes=${io_stat1[5]}" >| "$trace_file" || >&2 echo "Error: Failed to write to file: $trace_file"
}

nxf_trace_mac() {
    local start_millis=$(nxf_date)

    ## run task script
    {{trace_cmd}}

    local end_millis=$(nxf_date)
    local wall_time=$((end_millis-start_millis))
    local ucpu=''
    local cpu_model=''
    local io_stat1=('' '' '' '' '' '')
    nxf_write_trace
}

nxf_fd() {
    ## Find first avail file descriptor starting from 11
    local FD=11
    while [ -e /proc/$$/fd/$FD ]; do FD=$((FD+1)); done
    echo $FD
}

nxf_trace_linux() {
    local pid=$$
    command -v ps &>/dev/null || { >&2 echo "Command 'ps' required by nextflow to collect task metrics cannot be found"; exit 1; }
    ## see
    ##  https://github.com/Leo-G/DevopsWiki/wiki/How-Linux-CPU-Usage-Time-and-Percentage-is-calculated
    ##  https://stackoverflow.com/questions/27508531/calculate-cpu-per-process/27514562##27514562
    ##  https://stackoverflow.com/questions/16726779/how-do-i-get-the-total-cpu-usage-of-an-application-from-proc-pid-stat
    local num_cpus=$(< /proc/cpuinfo grep '^processor' -c)
    local cpu_model=$(< /proc/cpuinfo grep '^model name' | head -n 1 | awk 'BEGIN{FS="\t: "} { print $2 }')
    local tot_time0=$(grep '^cpu ' /proc/stat | awk '{sum=$2+$3+$4+$5+$6+$7+$8+$9; printf "%.0f",sum}')
    local cpu_time0=$(2> /dev/null < /proc/$pid/stat awk '{printf "%.0f", ($16+$17)*10 }' || echo -n 'X')
    local io_stat0=($(2> /dev/null < /proc/$pid/io sed 's/^.*:\s*//' | head -n 6 | tr '\n' ' ' || echo -n '0 0 0 0 0 0'))
    local start_millis=$(nxf_date)
    ## capture error and kill mem watcher
    trap 'kill $mem_proc' ERR
    
    ## run task script
    {{trace_cmd}} &
    local task=$!

    ## run mem stat
    mem_fd=$(nxf_fd)
    eval "exec $mem_fd> >(nxf_mem_watch $task)"
    local mem_proc=$!

    wait $task

    ## compute cpu usage time for processes
    local end_millis=$(nxf_date)
    local tot_time1=$(grep '^cpu ' /proc/stat | awk '{sum=$2+$3+$4+$5+$6+$7+$8+$9; printf "%.0f",sum}')
    local cpu_time1=$(2> /dev/null < /proc/$pid/stat awk '{printf "%.0f", ($16+$17)*10 }' || echo -n 'X')
    local ucpu=$(awk -v p1=$cpu_time1 -v p0=$cpu_time0 -v t1=$tot_time1 -v t0=$tot_time0 -v n=$num_cpus 'BEGIN { pct=(p1-p0)/(t1-t0)*100*n; printf("%.0f", pct>0 ? pct : 0) }' )

    local io_stat1=($(2> /dev/null < /proc/$pid/io sed 's/^.*:\s*//' | head -n 6 | tr '\n' ' ' || echo -n '0 0 0 0 0 0'))
    local i
    for i in {0..5}; do
        io_stat1[i]=$((io_stat1[i]-io_stat0[i]))
    done

    local wall_time=$((end_millis-start_millis))
    [ $NXF_DEBUG = 1 ] && echo "+++ STATS %CPU=$ucpu TIME=$wall_time I/O=${io_stat1[*]}"

    printf "%s\n" \
        "nextflow.trace/v2" \
        "realtime=$wall_time" \
        "%cpu=$ucpu" \
        "cpu_model=$cpu_model" \
        "rchar=${io_stat1[0]}" \
        "wchar=${io_stat1[1]}" \
        "syscr=${io_stat1[2]}" \
        "syscw=${io_stat1[3]}" \
        "read_bytes=${io_stat1[4]}" \
        "write_bytes=${io_stat1[5]}" >| "$trace_file" || >&2 echo "Error: Failed to write to file: $trace_file"

    ## join nxf_mem_watch
    [ -e /proc/$mem_proc ] && eval "echo 'DONE' >&$mem_fd" || true
    wait $mem_proc 2>/dev/null || true
    ## Bash prior 4.4 wait on a process substitution returns an error
    ## fallback on a while loop checking the proc pseudo file system
    while [ -e /proc/$mem_proc ]; do nxf_sleep 0.1; done
    {{fix_ownership}}
}

nxf_trace() {
    local trace_file=.command.trace
    touch $trace_file
    if [[ $(uname) = Darwin ]]; then
        nxf_trace_mac
    else
        nxf_trace_linux
    fi
}
