#!/usr/bin/zsh
#If you replace the shell above with bash (at your own risk)
# Copyright (c) 2024 Quantius Benignus
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#--------------------------------------------------------------------------

# NAME: blooper
# PREREQUSITES: 
#  - whisper.cpp installation (see https://github.com/ggerganov/whisper.cpp) or a whisperfile
#  - recent versions of 'sox', 'curl', 'xsel' or 'wl-copy' and xdotool or ydotool CLI tools from your system's repositories.
#--------------------------------------------------------------------------
#Time stamps for timing the performance of the various stages (Un/Comment as needed): 
#echo $(date +%s.%N)
#X11 or Wayland (2nd line may catch edge cases):
wm="$XDG_SESSION_TYPE"
#wm="${XDG_SESSION_TYPE:-$(loginctl show-session $(loginctl | grep $(whoami) | awk '{print $1}') -p Type --value)}"

#---USER CONFIGURATION BLOCK----------------------------------------------------------------------
#PLEASE, ADJUST THE VARIABLES HERE TO SUIT YOUR ENVIRONMENT:
# Store temp files in memory for speed and to reduce SSD/HDD "grinding":
TEMPD='/dev/shm'
# Hardcoded temp wav file to store the speech audio and get overwritten every time (in RAM):
ramf="$TEMPD/wfile"
#Set the number of processing threads for whisper.cpp inference (adjust for your case):
NTHR=8
#It seems that the optimum number of transcribe threads should equal half CPU processing cores:
#NTHR=$(( $(getconf _NPROCESSORS_ONLN) / 2 ))

#Model superdirectory where AI models (Whisper, LLMs, TTS etc.) are stored (mostly needed for wsiAI):
AI=$HOME/PATH_TO_YOUR/Models
# Default whisper.cpp model file for ASR inference (with local whisper.cpp, possibly whisperfile):
#WMODEL="$TEMPD/ggml-base.en.bin"
WMODEL=${WHISPER_DMODEL:-"$AI/whisper/ggml-base.en.bin"}
# Uncomment to use available whisperfile (downloaded to a folder in the PATH and set to EXECUTABLE):
#WHISPERFILE="whisper-tiny.en.llamafile" 
#Provide hardcoded whisper.cpp hostname and port. To be used when invoked with -n option from CLI:
#(Can be overwritten when command line argument IP:PORT is supplied instead of '-n')
WHOST="127.0.0.1"
WPORT="58080"             
#---END USER CONFIG BLOCK------------------------------------------------------------------------

#Notification code, prefers zenity, then notify-send, (which should be available across
#most distributions, in package libnotify or libnotify-bin), KDE fallback is kdialog:
desknote() {
    local title="$1"
    local message="$2"

    if command -v zenity &> /dev/null; then
        zenity --info --text="$message"
    elif command -v notify-send &> /dev/null; then
        notify-send "$title" "$message"
    elif command -v kdialog &> /dev/null; then
        kdialog --passivepopup "$message" 5
    else
        echo "Notification Message: $message" >&2
        echo "Please install zenity or notify-send (or kdialog for KDE) to use desktop alerts." >&2
        echo "You can install either using your package manager, e.g.:" >&2
        echo "  sudo apt-get install zenity or sudo apt-get install libnotify-bin" >&2
        echo "  sudo yum install zenity or sudo apt-get install libnotify" >&2
        echo "  sudo pacman -S zenity, etc." >&2
    fi
}

#---CHECK DEPENDENCIES. This block can be commented out once dependencies confirmed-----------------------------------
command -v curl &>/dev/null || { echo "curl is required. Please, install curl" >&2 ; exit 1 ; }
command -v sox &>/dev/null || { echo "sox is required. Please, install sox" >&2 ; exit 1 ; }
[[ -n $WHISPERFILE ]] || command -v transcribe &>/dev/null || { echo -e "Please, install whisper.cpp (see https://github.com/ggerganov/whisper.cpp)\
\nand create 'transcribe' in your PATH as a symbolic link to the main executable, e.g.\n \
 'ln -s /full/path/to/whisper.cpp/main \$HOME/.local/bin/transcribe'" >&2 ; exit 1 ; }
#Now let's check if we are in X11 or Wayland and use the right utility:
if [[ wm == "wayland" ]]; then
    command -v wl-copy &>/dev/null || { echo "wl-copy is needed for the clipboard. Please, install wl-copy" >&2 ; exit 1 ; } 
    command -v ydotool &>/dev/null || { echo "ydotool is needed for auto-paste. Please, install it" >&2 ; exit 1 ; }
elif [[ wm == "X11" ]]; then
    command -v xsel &>/dev/null || { echo "We rely on xsel for the clipboard. Please, install xsel." >&2 ; exit 1 ; }
    command -v xdotool &>/dev/null || { echo "xdotool is needed for auto-paste. Please, install it" >&2 ; exit 1 ; }
fi
[[ -f $WMODEL ]] || { desknote "File not found: " "$WMODEL was not found. \nPlease, correct in USER CONFIGURATION BLOCK." ; exi1 1 ; }
[[ -n $WHISPERFILE ]] && { [[ -x $WHISPERFILE ]] || desknote "Not found: " "Executable $WHISPERFILE not found." ; exit 1 ; }
echo -e "\nLooks like you have all dependencies installed.\nTo remove this message, comment out the 'CHECK DEPENDENCIES' block in the blooper script.\n"
desknote "Dependencies installed" "\nLooks like you have all dependencies installed.\n \
To remove this message, comment out the 'CHECK DEPENDENCIES' block in the blooper script.\n \
\nYou can run blooper --help from the Terminal to learn about its options..."
#---END CHECK DEPENDENCIES. The above block can be commented out after successful 1st run----------------------------

# Process command line arguments first
while [ $# -gt 0 ]; do
    case "$1" in
        -w|--whisperfile)
            whf="$WHISPERFILE"
            shift
            ;;
        -n|--netapi)
            #This uses the hostname or IP and port specified in the config block (when run from CLI or set in Blurt with '-something')
            #Can be overwritten from Blurt, supplied as command line flag IP:PORT instead of this option
            IPnPORT="$WHOST:$WPORT" 
            #if [[ "$(curl -s -f -o /dev/null -w '%{http_code}' $IPnPORT)" != "200" ]]; then
            #    echo "Can't connect to whisper.cpp server at provided address!" >&2   
            desknote "No connection to Whisper.cpp server" "No whisper.cpp server at $IPnPORT."
            #    exit 1
            #fi
            shift
            ;;
        -h|--help)
            echo -e "Usage: $0 [-n|--netapi] [-w|--whisperfile]\n"
            echo -e "  -n, --netapi: Use whisper.cpp server with the host:port in the GONFIGURATION block\n"
            echo -e "  -w, --whisperfile: Use whisperfile instead of standalone whisper.cpp. Note that the whisperfile will use the script-configured (not embedded) model.\n"
            echo -e "Any other non-flag command-line argument is expected to be an <IP:port> pair. Error will occur if diferent."
            exit 0
            ;;
        *)
            #The network address and port, is there a server listening?  (should have already been sanitized)
            IPnPORT=$1
            if [[ "$(curl -s -f -o /dev/null -w '%{http_code}' $IPnPORT)" != "200" ]]; then
                echo "Can't connect to a whisper.cpp server at provided address!" >&2   
                desknote "No connection to Whisper.cpp server" "No whisper.cpp server at $IPnPORT."
                exit 1
            fi
            shift
            ;;
    esac
done

#Hear the complaints of the above tools and do not continue with the sequence:
set -e

#echo "Recording now: "
silecount=0
while [[ $silecount -lt 4 ]]; do

rec -q -t wav $ramf rate 16k silence 1 0.1 1% 1 1.0 3% channels 1 2>/dev/null

#stmp=$(date +%s%N)

if [ -n "$IPnPORT" ]; then 
    echo "Sending to server now: "
    str=$(curl -S -s $IPnPORT/inference \
        -H "Content-Type: multipart/form-data" \
        -F file="@$ramf" \
        -F temperature="0.0" \
        -F temperature_inc="0.2" \
        -F response_format="text")
elif [[ "$whf" == *.llamafile ]]; then
   # echo "Using whisperfile: "
    str="$($whf -t $NTHR -nt --gpu auto -f $ramf 2>/dev/null)"     
else
   # echo "Attempting ASR with local whisper.cpp:"
    str="$(transcribe -t $NTHR -nt -fa -m $WMODEL -f $ramf 2>/dev/null)" 
fi

#echo "Got text back after: "$((($(date +%s%N) - stmp)/1000000))" ms" 
#stmp=$(date +%s%N)
#Please, note, that if you have disabled extended globbing, you likely know how to enable it only for this script. 
# Whisper detected non-speech events such as (wind blowing): 
str="${str/\(*\)}"   
str="${str/\[*\]}"
str="${str#$'\n'}"    
str="${str#$'\n'}"

if [[ "${#str}" -lt 4 ]]; then
    silecount=$((silecount + 1))
    continue
else 
    silecount=0
fi

#Prefer the power of zsh, but loose full POSIX compliance.
if [ -n "$ZSH_NAME" ]; then
   str="${str#*([[:space:]])}"
      str="${(C)str:0:1}${str#?}"
elif [ -n "$BASH" ]; then
   #Running in bash because you changed the shebang on line 1
   str="${str#*(+([![:space:]]))}"
   str="${str^}"
else
    #Not testing for AI input if shell is unknown:
    echo "Unknown shell, assuming bash compliance"
    str="${str##+([[:space:]])}"
    str="${str^}"
fi

#We have a result (either recognized text or AI response), now we make a few decisions:
#If this is somehow run in a text console: 
if [[ -z "${DISPLAY}" ]] || [[ -z "${DESKTOP_SESSION}" ]] || [[ -z "${XDG_CURRENT_DESKTOP}" ]]; then
#"Not running in a known graphics environment. Using standard output:
    echo $str ; exit 0
else
#xsel or wl-copy:
#echo "Pasting now: "
 case "$wm" in
    "x11")
          echo $str | xsel -ib
          xdotool key ctrl+v
        ;;
    "wayland")
          echo $str | wl-copy
          #The code sequence (ctrl+v) may be different on your system
          ydotool key 37:1 55:1 55:0 37:0  
        ;;
    *)
        echo $str
        ;;
 esac
fi
done

#echo "Text processed after another: "$((($(date +%s%N) - stmp)/1000000)) "ms"
