{
  "name": "Voice Agent Index benchmark evidence matrix",
  "url": "https://voiceagentindex.com/benchmarks/results/",
  "updatedAt": "2026-06-28",
  "methodologyUrl": "https://voiceagentindex.com/benchmarks/methodology/",
  "submitEvidenceUrl": "https://voiceagentindex.com/benchmarks/submit-evidence/",
  "columns": [
    {
      "key": "latency",
      "label": "Latency",
      "href": "/benchmarks/latency/"
    },
    {
      "key": "handoff",
      "label": "Handoff",
      "href": "/benchmarks/human-handoff/"
    },
    {
      "key": "booking",
      "label": "Booking",
      "href": "/benchmarks/appointment-booking/"
    },
    {
      "key": "escalation",
      "label": "Escalation",
      "href": "/benchmarks/emergency-escalation/"
    },
    {
      "key": "noisy",
      "label": "Noisy caller",
      "href": "/benchmarks/noisy-caller/"
    },
    {
      "key": "state",
      "label": "Evidence state",
      "href": "/benchmarks/methodology/"
    }
  ],
  "statusDefinitions": {
    "Protocol ready": "The test scenario exists and can be run during a demo or pilot.",
    "Public evidence check": "Public claims or docs are available, but standardized evidence is not complete yet.",
    "Test pending": "Voice Agent Index has not published a repeatable benchmark result for that vendor and scenario.",
    "Needs evidence packet": "The vendor should provide recordings, transcripts, logs, routing proof, or workflow artifacts.",
    "Implementation dependent": "The result depends heavily on how the buyer or implementation partner configures the workflow."
  },
  "rows": [
    {
      "vendor": "Retell AI",
      "slug": "retell-ai",
      "category": "Developer voice agent platform",
      "profileUrl": "https://voiceagentindex.com/tools/retell-ai/",
      "benchmarkUrl": "https://voiceagentindex.com/benchmarks/vendors/retell-ai/",
      "evidenceState": "Profiled, with public claims to verify and a standardized evidence packet still needed before scored benchmark results.",
      "statuses": {
        "latency": {
          "label": "Public claims to verify",
          "tone": "public"
        },
        "handoff": {
          "label": "Test pending",
          "tone": "pending"
        },
        "booking": {
          "label": "Scenario ready",
          "tone": "ready"
        },
        "escalation": {
          "label": "Needs evidence packet",
          "tone": "needs"
        },
        "noisyCaller": {
          "label": "Test pending",
          "tone": "pending"
        },
        "evidenceState": {
          "label": "Profiled",
          "tone": "public"
        }
      },
      "missingEvidence": [
        "Timestamped call recordings showing first greeting, first useful response, interruptions, and tool waits.",
        "Transfer artifacts that show destination, transfer trigger, transcript context, and whether the human received the reason.",
        "Calendar or CRM action logs tied to the same call transcript and final summary.",
        "Failed-tool and noisy-audio examples, not only polished successful demo calls."
      ],
      "proofPacket": [
        "Three inbound scheduling recordings with transcripts and timing checkpoints.",
        "One failed calendar-slot call showing safe recovery and no invented booking.",
        "One human handoff call with transfer event, destination, and summary payload.",
        "Tool or webhook logs mapped to the call ID and post-call analysis fields."
      ]
    },
    {
      "vendor": "Vapi",
      "slug": "vapi",
      "category": "Developer voice agent API",
      "profileUrl": "https://voiceagentindex.com/tools/vapi/",
      "benchmarkUrl": "https://voiceagentindex.com/benchmarks/vendors/vapi/",
      "evidenceState": "Profiled, with implementation-dependent outcomes and standardized proof needed before scored benchmark results.",
      "statuses": {
        "latency": {
          "label": "Public claims to verify",
          "tone": "public"
        },
        "handoff": {
          "label": "Implementation dependent",
          "tone": "dependent"
        },
        "booking": {
          "label": "Scenario ready",
          "tone": "ready"
        },
        "escalation": {
          "label": "Needs evidence packet",
          "tone": "needs"
        },
        "noisyCaller": {
          "label": "Test pending",
          "tone": "pending"
        },
        "evidenceState": {
          "label": "Profiled",
          "tone": "public"
        }
      },
      "missingEvidence": [
        "Reference assistant configuration for the tested workflow, including tools, model choices, and phone route.",
        "Call logs that show tool timeouts, retries, failures, and downstream webhooks.",
        "Transfer configuration and proof that the human receives useful context.",
        "Cost trace for representative calls across model, voice, telephony, and platform layers."
      ],
      "proofPacket": [
        "One simple call and one tool-heavy call using the same published benchmark script.",
        "Assistant, tool, and phone-number configuration screenshots or exports.",
        "Structured call analysis, webhook payloads, and failed-tool logs for the same call IDs.",
        "Human transfer recording and summary packet."
      ]
    },
    {
      "vendor": "Telnyx",
      "slug": "telnyx",
      "category": "Voice infrastructure",
      "profileUrl": "https://voiceagentindex.com/tools/telnyx/",
      "benchmarkUrl": "https://voiceagentindex.com/tools/telnyx/",
      "evidenceState": "Architecture mapped",
      "statuses": {
        "latency": {
          "label": "Infrastructure evidence",
          "tone": "public"
        },
        "handoff": {
          "label": "Implementation dependent",
          "tone": "dependent"
        },
        "booking": {
          "label": "Implementation dependent",
          "tone": "dependent"
        },
        "escalation": {
          "label": "Needs workflow proof",
          "tone": "needs"
        },
        "noisyCaller": {
          "label": "Test pending",
          "tone": "pending"
        },
        "evidenceState": {
          "label": "Architecture mapped",
          "tone": "public"
        }
      },
      "missingEvidence": [],
      "proofPacket": []
    },
    {
      "vendor": "Bland AI",
      "slug": "bland-ai",
      "category": "Enterprise voice AI",
      "profileUrl": "https://voiceagentindex.com/tools/bland-ai/",
      "benchmarkUrl": "https://voiceagentindex.com/benchmarks/vendors/bland-ai/",
      "evidenceState": "Profiled, with public evidence to check and a standardized evidence packet still needed before scored benchmark results.",
      "statuses": {
        "latency": {
          "label": "Public evidence check",
          "tone": "public"
        },
        "handoff": {
          "label": "Test pending",
          "tone": "pending"
        },
        "booking": {
          "label": "Scenario ready",
          "tone": "ready"
        },
        "escalation": {
          "label": "Needs evidence packet",
          "tone": "needs"
        },
        "noisyCaller": {
          "label": "Test pending",
          "tone": "pending"
        },
        "evidenceState": {
          "label": "Profiled",
          "tone": "public"
        }
      },
      "missingEvidence": [
        "Pathway or workflow map tied to each benchmark call.",
        "Failed-call examples showing voicemail, no-answer, opt-out, correction, and transfer behavior.",
        "CRM, webhook, or calendar logs for completed and failed actions.",
        "Policy proof for escalation, consent, suppression, retry, and recording rules where relevant."
      ],
      "proofPacket": [
        "One pathway export or workflow screenshot for the tested benchmark scenario.",
        "Recordings and transcripts for successful and failed calls.",
        "Branch usage, webhook logs, transfer packet, and analytics view for the same call IDs.",
        "Outbound governance packet if the benchmark includes outbound calling."
      ]
    },
    {
      "vendor": "Synthflow",
      "slug": "synthflow",
      "category": "No-code enterprise voice AI",
      "profileUrl": "https://voiceagentindex.com/tools/synthflow/",
      "benchmarkUrl": "https://voiceagentindex.com/benchmarks/vendors/synthflow/",
      "evidenceState": "Profiled, with public evidence to check and standardized workflow proof needed before scored benchmark results.",
      "statuses": {
        "latency": {
          "label": "Public evidence check",
          "tone": "public"
        },
        "handoff": {
          "label": "Test pending",
          "tone": "pending"
        },
        "booking": {
          "label": "Scenario ready",
          "tone": "ready"
        },
        "escalation": {
          "label": "Needs evidence packet",
          "tone": "needs"
        },
        "noisyCaller": {
          "label": "Test pending",
          "tone": "pending"
        },
        "evidenceState": {
          "label": "Profiled",
          "tone": "public"
        }
      },
      "missingEvidence": [
        "Workflow builder evidence for the exact benchmark scenario.",
        "Analytics, review, and QA screenshots connected to benchmark calls.",
        "Transfer setup showing context passed to staff and what requires vendor support.",
        "Role, approval, retention, security, HIPAA, and GDPR documentation for regulated deployments."
      ],
      "proofPacket": [
        "No-code workflow screenshots or exports for the tested call path.",
        "Recordings, transcripts, analytics views, and failed-intent logs.",
        "Calendar, CRM, ticketing, or transfer evidence tied to the same call IDs.",
        "Change-ownership proof showing what the buyer can edit without vendor or developer help."
      ]
    },
    {
      "vendor": "Goodcall",
      "slug": "goodcall",
      "category": "AI receptionist",
      "profileUrl": "https://voiceagentindex.com/tools/goodcall/",
      "benchmarkUrl": "https://voiceagentindex.com/benchmarks/vendors/goodcall/",
      "evidenceState": "Profiled, with booking scenario ready and policy, handoff, latency, and noisy-caller proof still needed before scored benchmark results.",
      "statuses": {
        "latency": {
          "label": "Test pending",
          "tone": "pending"
        },
        "handoff": {
          "label": "Needs evidence packet",
          "tone": "needs"
        },
        "booking": {
          "label": "Scenario ready",
          "tone": "ready"
        },
        "escalation": {
          "label": "Needs policy proof",
          "tone": "needs"
        },
        "noisyCaller": {
          "label": "Test pending",
          "tone": "pending"
        },
        "evidenceState": {
          "label": "Profiled",
          "tone": "public"
        }
      },
      "missingEvidence": [
        "Local-service call recordings with incomplete information, caller corrections, and questions the agent should not answer.",
        "Staff notification, lead summary, SMS, callback, or booking artifacts tied to the same call.",
        "Policy proof for healthcare, legal, financial, urgent, or high-value calls.",
        "Noisy-caller and after-hours examples showing safe capture rather than over-answering."
      ],
      "proofPacket": [
        "Three inbound SMB call recordings covering quote request, appointment request, and incomplete contact details.",
        "Dashboard or notification evidence showing what staff see after the call.",
        "Booking, SMS, callback, or Zapier artifact for the same test call.",
        "Policy and escalation documentation for calls the agent should route instead of answer."
      ]
    },
    {
      "vendor": "Smith.ai",
      "slug": "smith-ai",
      "category": "Hybrid receptionist",
      "profileUrl": "https://voiceagentindex.com/tools/smith-ai/",
      "benchmarkUrl": "https://voiceagentindex.com/tools/smith-ai/",
      "evidenceState": "Profiled",
      "statuses": {
        "latency": {
          "label": "Service dependent",
          "tone": "dependent"
        },
        "handoff": {
          "label": "Hybrid model noted",
          "tone": "public"
        },
        "booking": {
          "label": "Scenario ready",
          "tone": "ready"
        },
        "escalation": {
          "label": "Needs packet",
          "tone": "needs"
        },
        "noisyCaller": {
          "label": "Service dependent",
          "tone": "dependent"
        },
        "evidenceState": {
          "label": "Profiled",
          "tone": "public"
        }
      },
      "missingEvidence": [],
      "proofPacket": []
    },
    {
      "vendor": "Slang AI",
      "slug": "slang-ai",
      "category": "Restaurant voice AI",
      "profileUrl": "https://voiceagentindex.com/tools/slang-ai/",
      "benchmarkUrl": "https://voiceagentindex.com/tools/slang-ai/",
      "evidenceState": "Profiled",
      "statuses": {
        "latency": {
          "label": "Test pending",
          "tone": "pending"
        },
        "handoff": {
          "label": "Needs evidence packet",
          "tone": "needs"
        },
        "booking": {
          "label": "Restaurant scenario ready",
          "tone": "ready"
        },
        "escalation": {
          "label": "Needs escalation proof",
          "tone": "needs"
        },
        "noisyCaller": {
          "label": "Restaurant audio test pending",
          "tone": "pending"
        },
        "evidenceState": {
          "label": "Profiled",
          "tone": "public"
        }
      },
      "missingEvidence": [],
      "proofPacket": []
    }
  ],
  "researchSources": [
    {
      "label": "Google helpful content guidance",
      "url": "https://developers.google.com/search/docs/fundamentals/creating-helpful-content",
      "note": "Public pages should add original information, complete analysis, clear sourcing, and visible trust signals."
    },
    {
      "label": "Google link best practices",
      "url": "https://developers.google.com/search/docs/crawling-indexing/links-crawlable",
      "note": "Links should use crawlable anchor elements and descriptive anchor text so readers and crawlers understand the target."
    },
    {
      "label": "VoiceBenchmark.ai public leaderboard",
      "url": "https://voicebenchmark.ai/",
      "note": "Public benchmark example with leaderboard, methodology, JSON feeds, and agent-readable links."
    },
    {
      "label": "Deepgram Voice Agent Quality Index",
      "url": "https://deepgram.com/learn/voice-agent-quality-index",
      "note": "Benchmark framing for timing, interruption behavior, and response coverage in conversational voice agents."
    },
    {
      "label": "LiveKit turn-taking tuning",
      "url": "https://docs.livekit.io/agents/logic/turns/tuning/",
      "note": "Technical reference for endpointing, interruption handling, preemptive generation, and audio pre-processing."
    },
    {
      "label": "Hamming voice agent testing guide",
      "url": "https://hamming.ai/resources/voice-agent-testing-guide",
      "note": "Testing lifecycle reference covering scenario tests, regression, load, compliance, and production monitoring."
    },
    {
      "label": "Daily voice-agent benchmark",
      "url": "https://www.daily.co/blog/benchmarking-llms-for-voice-agent-use-cases/",
      "note": "Open benchmark framing for latency, tool calling, instruction following, and knowledge grounding in long voice-agent conversations."
    },
    {
      "label": "Sierra real-time voice benchmark",
      "url": "https://sierra.ai/blog/tau-voice-benchmarking-real-time-voice-agents-on-real-world-tasks",
      "note": "Benchmark framing that combines task completion, simultaneous speech, interruptions, backchannels, and realistic audio conditions."
    }
  ]
}