1- import VoltAgent , { Agent , VoltAgentObservability , buildScorer } from "@voltagent/core" ;
1+ import VoltAgent , { Agent , VoltAgentObservability , buildScorer , createTool } from "@voltagent/core" ;
22import {
33 createAnswerCorrectnessScorer ,
44 createAnswerRelevancyScorer ,
@@ -10,6 +10,7 @@ import {
1010 createModerationScorer ,
1111 createPossibleScorer ,
1212 createSummaryScorer ,
13+ createToolCallAccuracyScorerCode ,
1314 createTranslationScorer ,
1415 scorers ,
1516} from "@voltagent/scorers" ;
@@ -74,6 +75,115 @@ const customScorer = buildScorer({
7475 } )
7576 . build ( ) ;
7677
78+ const productCatalog = [
79+ { id : "laptop-pro-13" , name : "Laptop Pro 13" , price : 1299 , inStock : 8 } ,
80+ { id : "laptop-air-14" , name : "Laptop Air 14" , price : 999 , inStock : 14 } ,
81+ { id : "office-monitor-27" , name : "Office Monitor 27" , price : 299 , inStock : 0 } ,
82+ ] ;
83+
84+ const searchProductsTool = createTool ( {
85+ name : "searchProducts" ,
86+ description : "Searches a small product catalog by query and returns product candidates." ,
87+ parameters : z . object ( {
88+ query : z . string ( ) . describe ( "Product search query" ) ,
89+ } ) ,
90+ execute : async ( { query } : { query : string } ) => {
91+ const normalizedQuery = query . toLowerCase ( ) ;
92+ const matches = productCatalog . filter ( ( product ) =>
93+ product . name . toLowerCase ( ) . includes ( normalizedQuery ) ,
94+ ) ;
95+
96+ return {
97+ query,
98+ total : matches . length ,
99+ results : matches . map ( ( { id, name, price } ) => ( { id, name, price } ) ) ,
100+ } ;
101+ } ,
102+ } ) ;
103+
104+ const checkInventoryTool = createTool ( {
105+ name : "checkInventory" ,
106+ description : "Checks stock status for a product id." ,
107+ parameters : z . object ( {
108+ productId : z . string ( ) . describe ( "Product id from searchProducts result" ) ,
109+ } ) ,
110+ execute : async ( { productId } : { productId : string } ) => {
111+ const found = productCatalog . find ( ( product ) => product . id === productId ) ;
112+ if ( ! found ) {
113+ return {
114+ productId,
115+ isError : true ,
116+ error : "Product not found" ,
117+ available : 0 ,
118+ } ;
119+ }
120+
121+ return {
122+ productId,
123+ available : found . inStock ,
124+ isError : false ,
125+ } ;
126+ } ,
127+ } ) ;
128+
129+ interface ToolEvalToolResult extends Record < string , unknown > {
130+ result ?: unknown ;
131+ isError ?: boolean ;
132+ error ?: unknown ;
133+ }
134+
135+ interface ToolEvalPayload extends Record < string , unknown > {
136+ toolCalls ?: Array < { toolName ?: string } > ;
137+ toolResults ?: ToolEvalToolResult [ ] ;
138+ }
139+
140+ const toolCallOrderScorer = createToolCallAccuracyScorerCode < ToolEvalPayload > ( {
141+ expectedToolOrder : [ "searchProducts" , "checkInventory" ] ,
142+ strictMode : false ,
143+ } ) ;
144+
145+ const toolExecutionHealthScorer = buildScorer < ToolEvalPayload , Record < string , unknown > > ( {
146+ id : "tool-execution-health" ,
147+ label : "Tool Execution Health" ,
148+ } )
149+ . score ( ( { payload } ) => {
150+ const toolCalls = payload . toolCalls ?? [ ] ;
151+ const toolResults = payload . toolResults ?? [ ] ;
152+
153+ const calledToolNames = toolCalls
154+ . map ( ( call ) => call . toolName )
155+ . filter ( ( name ) : name is string => Boolean ( name ) ) ;
156+
157+ const failedResults = toolResults . filter ( ( toolResult ) => {
158+ if ( toolResult . isError === true || Boolean ( toolResult . error ) ) {
159+ return true ;
160+ }
161+
162+ if ( toolResult . result && typeof toolResult . result === "object" ) {
163+ const resultRecord = toolResult . result as Record < string , unknown > ;
164+ return resultRecord . isError === true || Boolean ( resultRecord . error ) ;
165+ }
166+
167+ return false ;
168+ } ) ;
169+
170+ const completionRatio =
171+ toolCalls . length === 0 ? 1 : Math . min ( toolResults . length / toolCalls . length , 1 ) ;
172+ const score = Math . max ( 0 , completionRatio - failedResults . length * 0.25 ) ;
173+
174+ return {
175+ score,
176+ metadata : {
177+ calledToolNames,
178+ toolCallCount : toolCalls . length ,
179+ toolResultCount : toolResults . length ,
180+ failedResultCount : failedResults . length ,
181+ completionRatio,
182+ } ,
183+ } ;
184+ } )
185+ . build ( ) ;
186+
77187const HELPFULNESS_SCHEMA = z . object ( {
78188 score : z . number ( ) . min ( 0 ) . max ( 1 ) . describe ( "Score from 0 to 1 for helpfulness" ) ,
79189 reason : z . string ( ) . describe ( "Explanation of the score" ) ,
@@ -295,6 +405,26 @@ const supportAgent = new Agent({
295405 } ,
296406} ) ;
297407
408+ const toolEvalAgent = new Agent ( {
409+ name : "tool-eval-demo" ,
410+ instructions : `You are a product assistant.
411+ Always call searchProducts first, then call checkInventory for a selected product before finalizing your answer.
412+ If no products are found, explain that clearly.` ,
413+ model : "openai/gpt-4o-mini" ,
414+ tools : [ searchProductsTool , checkInventoryTool ] ,
415+ eval : {
416+ sampling : { type : "ratio" , rate : 1 } ,
417+ scorers : {
418+ toolCallOrder : {
419+ scorer : toolCallOrderScorer ,
420+ } ,
421+ toolExecutionHealth : {
422+ scorer : toolExecutionHealthScorer ,
423+ } ,
424+ } ,
425+ } ,
426+ } ) ;
427+
298428const singleEvalAgent = new Agent ( {
299429 name : "single-eval-demo" ,
300430 instructions : "You are a helpful assistant that answers questions about VoltAgent." ,
@@ -340,6 +470,7 @@ const scorerFeedbackAgent = new Agent({
340470new VoltAgent ( {
341471 agents : {
342472 support : supportAgent ,
473+ toolEval : toolEvalAgent ,
343474 singleEval : singleEvalAgent ,
344475 scorerFeedback : scorerFeedbackAgent ,
345476 } ,
@@ -350,7 +481,11 @@ new VoltAgent({
350481( async ( ) => {
351482 const question = "How can I enable live eval scorers in VoltAgent?" ;
352483 const result = await singleEvalAgent . generateText ( question ) ;
484+ const toolQuestion = "Find a laptop and check inventory before recommending one." ;
485+ const toolResult = await toolEvalAgent . generateText ( toolQuestion , { maxSteps : 4 } ) ;
353486
354487 console . log ( "Question:\n" , question , "\n" ) ;
355488 console . log ( "Agent response:\n" , result . text , "\n" ) ;
489+ console . log ( "Tool eval question:\n" , toolQuestion , "\n" ) ;
490+ console . log ( "Tool eval response:\n" , toolResult . text , "\n" ) ;
356491} ) ( ) ;
0 commit comments