#!/bin/bash
# Restart HauhauCS on dual GPU (3090 + 4080 RPC)
kill $(lsof -t -i:8081) 2>/dev/null
kill $(lsof -t -i:50053) 2>/dev/null
sleep 1

cd ~/llama.cpp/build-clang/bin
./rpc-server --host 0.0.0.0 --port 50053 --cache &>/tmp/rpc-local.log &
sleep 2

./llama-server \
  -m /home/serv3090/models/Qwen3.5-35B-A3B-Uncensored-HauhauCS-Aggressive-Q6_K.gguf \
  -ngl 99 \
  --rpc 127.0.0.1:50053,192.168.31.58:50052 \
  --host 0.0.0.0 --port 8081 \
  --jinja -c 32768 \
  &>/tmp/llama-hauhau.log &

sleep 5
if curl -s --max-time 5 http://localhost:8081/health | grep -q ok; then
  echo "OK"
else
  echo "WAITING..."
  for i in $(seq 1 30); do
    sleep 5
    if curl -s --max-time 5 http://localhost:8081/health | grep -q ok; then
      echo "OK"
      exit 0
    fi
  done
  echo "FAILED"
  exit 1
fi