#!/usr/bin/zsh
#If you replace the shell above with bash, it should work but tested less rigorously.
# Copyright (c) 2024 Quantius Benignus
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#--------------------------------------------------------------------------

# NAME: wsi 
# PREREQUSITES: 
#  - whisper.cpp installation (see https://github.com/ggerganov/whisper.cpp) or a portable whisperfile
#  - recent versions of 'sox', 'curl', 'xsel' or 'wl-copy' and CLI tools from your system's repositories.
#  - optional xdotool (X11) or ydotool (Wayland) for automatic paste after successful transcription
#--------------------------------------------------------------------------

#X11 or Wayland (2nd line may catch edge cases):
wm="$XDG_SESSION_TYPE"
#wm="${XDG_SESSION_TYPE:-$(loginctl show-session $(loginctl | grep $(whoami) | awk '{print $1}') -p Type --value)}"

#---USER CONFIGURATION BLOCK----------------------------------------------------------------------
#Please, adjust the variables here to suit your environment:
# Store temp files in memory for speed and to reduce SSD/HDD "grinding":
TEMPD='/dev/shm'
# Hardcoded temp wav file to store the voice memo and get overwritten every time (in RAM):
ramf="$TEMPD/wfile"
#Set the number of processing threads for whisper.cpp inference (adjust for your case):
NTHR=8
#It seems that the optimum number of transcribe threads should equal half CPU processing cores:
#NTHR=$(( $(getconf _NPROCESSORS_ONLN) / 2 ))
# Use CLIPBOARD unless primary selection (middle mouse button to paste) selected (change here to true or via CLI flag -p): 
PRIMESEL=false

#Model superdirectory where AI models (Whisper, LLMs, TTS etc.) are stored (mostly needed for wsiAI):
AI=$HOME/PATH_TO_YOUR/Models
# Default whisper.cpp model file for ASR inference (with local whisper.cpp, possibly whisperfile):
#WMODEL="$TEMPD/ggml-base.en.bin"
WMODEL=${WHISPER_DMODEL:-"$AI/whisper/ggml-base.en.bin"}
# Uncomment to use available whisperfile (downloaded to a folder in the PATH and set to EXECUTABLE):
# WHISPERFILE=whisper-tiny.en.llamafile 
# (For the next one needs xdotool (X11) or ydotool (Wayland).
# Change to 'true' to paste automatically on completed transcription (using 'xdotool'):
AUTOPASTE=true
#Provide hardcoded whisper.cpp hostname and port. To be used when invoked with -n option from CLI:
#(Can be overwritten when command line argument IP:PORT is supplied instead of '-n')
WHOST="127.0.0.1"
WPORT="58080"             
#---END USER CONFIGURATION BLOCK--------------------------------------------------------------------

#Notification code, prefers zenity, then notify-send, (which should be available across
#most distributions, in package libnotify or libnotify-bin), KDE fallback is kdialog:
desknote() {
    local title="$1"
    local message="$2"

    if command -v zenity &> /dev/null; then
        zenity --info --text="$message"
    elif command -v notify-send &> /dev/null; then
        notify-send "$title" "$message"
    elif command -v kdialog &> /dev/null; then
        kdialog --passivepopup "$message" 5
    else
        echo "Notification Message: $message" >&2
        echo "Please install zenity or notify-send (or kdialog for KDE) to use desktop alerts." >&2
        echo "You can install either using your package manager, e.g.:" >&2
        echo "  sudo apt-get install zenity or sudo apt-get install libnotify-bin" >&2
        echo "  sudo yum install zenity or sudo apt-get install libnotify" >&2
        echo "  sudo pacman -S zenity, etc." >&2
    fi
}

#---CHECK DEPENDENCIES. This block can be commented out once dependencies confirmed-----------------------------------
#The install-wsi script should have taken care of this, left here for the manual install.
command -v curl &>/dev/null || { echo "curl is required. Please, install curl" >&2 ; exit 1 ; }
command -v sox &>/dev/null || { echo "sox is required. Please, install sox" >&2 ; exit 1 ; }
[[ -n $WHISPERFILE ]] || command -v transcribe &>/dev/null || { echo -e "Please, install whisper.cpp (see https://github.com/ggerganov/whisper.cpp)\
\nand create 'transcribe' in your PATH as a symbolic link to the main executable, e.g.\n \
 'ln -s /full/path/to/whisper.cpp/main \$HOME/.local/bin/transcribe'" >&2 ; exit 1 ; }
# We will use zenity or notify-send (part of libnotify or libnotify-bin in some distros) for desktop notifications:
command -v zenity &>/dev/null || command -v notify-send &>/dev/null || { echo "zenity or notify-send needed for error reporting. Please, install zenity or libnotify-bin" >&2 ; exit 1 ; }
#Now let's check if we are in X11 or Wayland and use the right utility:
if [[ wm == "wayland" ]]; then
    command -v wl-copy &>/dev/null || { echo "wl-copy is needed for the clipboard. Please, install wl-copy" >&2 ; exit 1 ; } 
    if [ "$AUTOPASTE" = true ]; then 
       command -v ydotool &>/dev/null || { echo "To have the transcribed text pasted automatically at the cursor position, please install ydotool" >&2 ; exit 1 ; }
    fi
elif [[ wm == "X11" ]]; then
    command -v xsel &>/dev/null || { echo "We rely on xsel for the clipboard. Please, install xsel." >&2 ; exit 1 ; }
    if [ "$AUTOPASTE" = true ]; then 
       command -v xdotool &>/dev/null || { echo "To have the transcribed text pasted automatically at the cursor position, please install xdotool" >&2 ; exit 1 ; }
    fi
fi
[[ -f $WMODEL ]] || { desknote "File not found: " "$WMODEL was not found. \nPlease, correct in USER CONFIGURATION BLOCK." ; exi1 1 ; }
[[ -n $WHISPERFILE ]] && { [[ -x $WHISPERFILE ]] || desknote "Not found: " "Executable $WHISPERFILE not found." ; exit 1 ; }
echo -e "\nLooks like you have all dependencies installed.\nTo remove this message, comment out the 'CHECK DEPENDENCIES' block in the wsi script.\n"
desknote "Dependencies installed" "\nLooks like you have all dependencies installed.\n \
To remove this message, comment out the 'CHECK DEPENDENCIES' block in the wsi script.\n \
\nYou can run wsi --help from the Terminal to learn about its options..."
#---END CHECK DEPENDENCIES. The above block can be commented out after successful 1st run----------------------------

# Process command line arguments first
while [ $# -gt 0 ]; do
    case "$1" in
        -p|--primary)
            # To enable pasting with the middle mouse button
            PRIMESEL=true
            shift
            ;;
        -c|--clipboard)
            # This is transitional warning due to breaking change (will be removed in the future). Clipboard is now the default:
            desknote "Breaking Change:" "The '-c' flag is deprecated, since clipboard is now default. \n \
            Remove unnecessary '-c' from your desktop shortcut definitions. \n \
            To use primary selection as the text source, use the '-p' flag in your desktop shortcuts."
            shift
            ;;
        -w|--whisperfile)
            whf="$WHISPERFILE"
            shift
            ;;
        -n|--netapi)
            #This uses the hostname or IP and port specified in the config block
            #Can be overwritten with a supplied command-line argument IP:PORT instead of this option
            IPnPORT="$WHOST:$WPORT" 
            if [[ "$(curl -s -f -o /dev/null -w '%{http_code}' $IPnPORT)" != "200" ]]; then
                echo "Can't connect to whisper.cpp server at provided address!" >&2  
                desknote "No connection to Whisper.cpp server" "No whisper.cpp server at $IPnPORT."   
                exit 1
            fi
            shift
            ;;
        --help)
            echo "Usage: $0 [-p|--primary] [-n|--netapi] [-w|--whisperfile] [IP:PORT]"
            echo "  -p, --primary: Use PRIMARY selection instead of CLIPBOARD (default - the better choice when autopaste is on)"
            echo "  -n, --netapi: Use the preconfigured IP/hostanem and port to call a whisper.cpp server for inference."
            echo "  -w, --whisperfile: Use the preconfigured whisperfile***.llamafile for inference."
            echo "  any command-line argument: Expected to be of the form IP:PORT or HOSTNAME:PORT to call a server other than the preconfigured."
            exit 0
            ;;
        *)
            #The network address and port should have already been sanitized in extension
            IPnPORT=$1
            if [[ "$(curl -s -f -o /dev/null -w '%{http_code}' $IPnPORT)" != "200" ]]; then
                echo "Can't connect to a whisper.cpp server at provided address!" >&2   
                desknote "No connection to Whisper.cpp server" "No whisper.cpp server at $IPnPORT."   
                exit 1
            fi
            shift
            ;;
    esac
done

#Hear the complaints of the above tools and do not continue with the sequence:
set -e

rec -q -t wav $ramf rate 16k silence 1 0.1 1% 1 2.0 5%  2>/dev/null

#The server inference has precedence (i.e. -w will be ignored if -n is present). 
if [ -n "$IPnPORT" ]; then
    str=$(curl -S -s $IPnPORT/inference \
        -H "Content-Type: multipart/form-data" \
        -F file="@$ramf" \
        -F temperature="0.0" \
        -F temperature_inc="0.2" \
        -F response_format="text")
# | jq -r '.text' )
elif [[ "$whf" == *.llamafile ]]; then
    #echo "Using whisperfile: "
    str="$($whf -t $NTHR -nt -f $ramf 2>/dev/null)"  
else
    str="$(transcribe -t $NTHR -nt -m $WMODEL -f $ramf 2>/dev/null)" 
fi
# Whisper detected non-speech events such as (wind blowing): 
str="${str/\(*\)}"   
str="${str/\[*\]}"
str="${str#$'\n'}"    
str="${str#$'\n'}"
#Prefer the power of zsh, but lose full POSIX compliance.
if [ -n "$ZSH_NAME" ]; then
    str="${str#*([[:space:]])}"
    str="${(C)str:0:1}${str#?}"
elif [ -n "$BASH" ]; then
    #Running in bash because you changed the shebang on line 1
    str="${str##+([[:space:]])}"
    str="${str^}"
else
    echo "Unknown shell, assuming bash compliance"
    str="${str##+([[:space:]])}"
    str="${str^}"
fi

#We have a result, now we make a few decisions:
#If this is somehow run in a text console: 
if [[ -z "${DISPLAY}" ]] || [[ -z "${DESKTOP_SESSION}" ]] || [[ -z "${XDG_CURRENT_DESKTOP}" ]]; then
#"Not running in a known graphics environment. Using standard output:
    echo $str ; exit 0
else
#xsel or wl-copy:
 case "$wm" in
    "x11")
        if [ "$PRIMESEL" = true ]; then
          echo $str | xsel -ip
          # Simply clicking the middle mouse button. The user must take care of: 1. Window choice. 2. Position within window.
          # Thus, autopaste from the primary selection is more unpredictable and requires extra care of window focus and mouse pointer repositioning. 
          if [ "$AUTOPASTE" = true ]; then xdotool click 2; fi
        else
          echo $str | xsel -ib
          # The automatic paste option makes more sense when using the CLIPBOARD, not primary sellection.
          if [ "$AUTOPASTE" = true ]; then xdotool key ctrl+v; fi
        fi
        ;;
    "wayland")
        if [ "$PRIMESEL" = true ]; then
          echo $str | wl-copy -p
          # Simply clicking the middle mouse button. The user must take care of: 1. Window choice. 2. Position within window.
          # Thus, autopaste from the primary selection is more unpredictable and requires extra care of window focus and mouse pointer repositioning. 
          if [ "$AUTOPASTE" = true ]; then ydotool click 0xC2; fi
        else
          echo $str | wl-copy
          # The key sym sequence may differ from the one below on a variety of systems and keyboard layouts:
          if [ "$AUTOPASTE" = true ]; then ydotool key 37:1 55:1 55:0 37:0 ; fi
        fi 
        ;;
    *)
        echo $str
        ;;
 esac
fi
