Skip to main content
This example runs a Session WebSocket demo from the terminal using the Vatel Go SDK: your microphone is streamed to Vatel, and the agent’s audio is played through your speaker. Transcripts and events are printed to the console. Response audio can also be written to a raw PCM file. It uses PortAudio (via gordonklaus/portaudio) for capture and playback.

Session token

REST endpoint used to obtain a short-lived JWT for the WebSocket connection.

Connection

WebSocket channel, message types, and request/response flow.

Prerequisites

  • Go 1.21+ (with CGO enabled for PortAudio)
  • PortAudio dev libraries:
    • macOS: brew install portaudio
    • Ubuntu/Debian: apt-get install portaudio19-dev
  • Organization API key and agent ID (agent UUID)

Setup

1

Clone or copy the example

Use the example from the vatel-nextjs-starters repo, or copy examples/golang-cli/ (main.go, go.mod) into your project.
2

Install dependencies

From the example directory:
go mod tidy
This pulls the Vatel Go SDK and github.com/gordonklaus/portaudio.
3

Build and run

go build -o run_session .
./run_session -api-key YOUR_API_KEY -agent-id AGENT_UUID
Or set VATEL_API_KEY and VATEL_AGENT_ID and run ./run_session. Optional: -base-url for a different API host; -output path to write response audio to a PCM file (default session_out.pcm).

How it works

  1. Client and token — A client is created with vatel.New(baseURL, apiKey). The program gets a session token with client.SessionToken(ctx, agentID).
  2. Connectionclient.DialConnection(ctx, token) opens the WebSocket. A goroutine runs runMic(ctx, conn): it opens the default PortAudio input stream (24 kHz, mono, 16-bit), reads blocks, and sends them with conn.SendInputAudioBytes(pcm).
  3. Playback — A PortAudio output stream runs with a callback. When response_audio messages arrive, the base64 payload is decoded and pushed into a buffer; the callback drains the buffer and plays PCM. If -output is set, the same decoded PCM is written to that file.
  4. Events — The main loop consumes conn.Messages(). It prints session_started, response_text, and session_ended, and logs tool calls while sending "ok" via conn.SendToolCallOutput. On session_ended or context cancel (SIGINT/SIGTERM), the mic goroutine is stopped and the program exits.

Project structure

PathPurpose
main.goCLI: flags, client, connection, mic goroutine, playback buffer + PortAudio callback, message loop
go.modModule and dependencies (Vatel SDK, portaudio)

Code

Full-duplex session: mic → server, agent audio → speaker (and optional PCM file). Uses PortAudio for mic and playback; graceful shutdown on SIGINT/SIGTERM.
package main

import (
    "context"
    "encoding/base64"
    "encoding/binary"
    "flag"
    "fmt"
    "log"
    "os"
    "os/signal"
    "sync"
    "syscall"

    "github.com/Devpro-Software/vatel-go-sdk"
    "github.com/gordonklaus/portaudio"
)

const (
    sampleRate = 24000
    blockSize  = 480
)

func main() {
    apiKey := flag.String("api-key", os.Getenv("VATEL_API_KEY"), "Organization API key")
    agentID := flag.String("agent-id", os.Getenv("VATEL_AGENT_ID"), "Agent UUID")
    baseURL := flag.String("base-url", defaultEnv("VATEL_BASE_URL", "https://api.vatel.ai"), "API base URL")
    outputPCM := flag.String("output", "session_out.pcm", "Path to write response audio (raw PCM 16-bit 24kHz mono)")
    flag.Parse()

    if *apiKey == "" || *agentID == "" {
        log.Fatal("error: set -api-key and -agent-id (or VATEL_API_KEY, VATEL_AGENT_ID)")
    }

    ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
    defer stop()

    if err := run(ctx, runOpts{
        baseURL:    *baseURL,
        apiKey:     *apiKey,
        agentID:    *agentID,
        outputPath: *outputPCM,
    }); err != nil && err != context.Canceled {
        log.Fatal(err)
    }
}

type runOpts struct {
    baseURL, apiKey, agentID string
    outputPath               string
}

func defaultEnv(key, fallback string) string {
    if v := os.Getenv(key); v != "" {
        return v
    }
    return fallback
}

func run(ctx context.Context, opts runOpts) error {
    client := vatel.New(opts.baseURL, opts.apiKey)

    tokenResp, err := client.SessionToken(ctx, opts.agentID)
    if err != nil {
        return fmt.Errorf("session token: %w", err)
    }

    conn, err := client.DialConnection(ctx, tokenResp.Token)
    if err != nil {
        return fmt.Errorf("dial connection: %w", err)
    }
    defer conn.Close()

    if err := portaudio.Initialize(); err != nil {
        return fmt.Errorf("portaudio init: %w", err)
    }
    defer portaudio.Terminate()

    var outFile *os.File
    if opts.outputPath != "" {
        var err error
        outFile, err = os.Create(opts.outputPath)
        if err != nil {
            return fmt.Errorf("create output file: %w", err)
        }
        defer outFile.Close()
    }

    player, err := startPlayback()
    if err != nil {
        return fmt.Errorf("playback: %w", err)
    }
    defer player.Close()

    sendCtx, cancelSend := context.WithCancel(ctx)
    defer cancelSend()

    var sendWg sync.WaitGroup
    sendWg.Add(1)
    go func() {
        defer sendWg.Done()
        runMic(sendCtx, conn)
    }()

    for msg := range conn.Messages() {
        select {
        case <-ctx.Done():
            cancelSend()
            sendWg.Wait()
            return ctx.Err()
        default:
        }
        data, _ := msg.ParseData()
        switch msg.Type {
        case vatel.TypeSessionStarted:
            if d, ok := data.(vatel.SessionStartedData); ok {
                log.Println("Session started:", d.ID)
            }
        case vatel.TypeResponseAudio:
            if d, ok := data.(vatel.ResponseAudioData); ok {
                decoded, err := base64.StdEncoding.DecodeString(d.Audio)
                if err != nil {
                    continue
                }
                if outFile != nil {
                    outFile.Write(decoded)
                }
                player.Push(decoded)
            }
        case vatel.TypeToolCall:
            if d, ok := data.(vatel.ToolCallData); ok {
                log.Println("Tool:", d.ToolName)
                conn.SendToolCallOutput(d.ToolCallID, "ok")
            }
        case vatel.TypeSessionEnded:
            log.Println("Session ended.")
            cancelSend()
            sendWg.Wait()
            return nil
        case vatel.TypeResponseText:
            if d, ok := data.(vatel.ResponseTextData); ok {
                log.Println("Agent:", d.Text)
            }
        default:
            log.Println("Event:", msg.Type)
        }
    }

    cancelSend()
    sendWg.Wait()
    return nil
}

func runMic(ctx context.Context, conn *vatel.Connection) {
    inBuf := make([]int16, blockSize)
    stream, err := portaudio.OpenDefaultStream(1, 0, float64(sampleRate), blockSize, &inBuf)
    if err != nil {
        log.Printf("open mic stream: %v", err)
        return
    }
    defer stream.Close()
    if err := stream.Start(); err != nil {
        log.Printf("start mic: %v", err)
        return
    }
    defer stream.Stop()
    for {
        select {
        case <-ctx.Done():
            return
        default:
        }
        if err := stream.Read(); err != nil {
            return
        }
        conn.SendInputAudioBytes(int16SliceToBytes(inBuf))
    }
}

type playbackPlayer struct {
    mu     sync.Mutex
    buf    []byte
    stream *portaudio.Stream
}

func (p *playbackPlayer) Push(pcm []byte) {
    if len(pcm) == 0 {
        return
    }
    p.mu.Lock()
    p.buf = append(p.buf, pcm...)
    p.mu.Unlock()
}

func (p *playbackPlayer) callback(out []int16) {
    for i := range out {
        out[i] = 0
    }
    p.mu.Lock()
    defer p.mu.Unlock()
    const bytesPerSample = 2
    wantBytes := len(out) * bytesPerSample
    haveBytes := min(len(p.buf), wantBytes)
    haveBytes = (haveBytes / bytesPerSample) * bytesPerSample
    if haveBytes > 0 {
        samples := pcm16BytesToInt16LE(p.buf[:haveBytes])
        copy(out, samples)
        p.buf = p.buf[haveBytes:]
    }
}

func (p *playbackPlayer) Close() {
    if p.stream != nil {
        p.stream.Stop()
        p.stream.Close()
        p.stream = nil
    }
}

func startPlayback() (*playbackPlayer, error) {
    p := &playbackPlayer{}
    stream, err := portaudio.OpenDefaultStream(0, 1, float64(sampleRate), blockSize, p.callback)
    if err != nil {
        return nil, err
    }
    p.stream = stream
    if err := stream.Start(); err != nil {
        stream.Close()
        return nil, err
    }
    return p, nil
}

func pcm16BytesToInt16LE(b []byte) []int16 {
    if len(b)%2 != 0 {
        b = b[:len(b)-1]
    }
    out := make([]int16, len(b)/2)
    for i := range out {
        out[i] = int16(binary.LittleEndian.Uint16(b[i*2:]))
    }
    return out
}

func int16SliceToBytes(s []int16) []byte {
    b := make([]byte, len(s)*2)
    for i, v := range s {
        binary.LittleEndian.PutUint16(b[2*i:], uint16(v))
    }
    return b
}

Audio format

  • Input: microphone only — PCM 16-bit, 24 kHz, mono.
  • Output: same format; played on the speaker and, if -output is set, written to that file. To play the file:
    ffplay -f s16le -ar 24000 -ac 1 session_out.pcm
    # or
    aplay -f S16_LE -r 24000 -c 1 session_out.pcm