@@ -7171,6 +7171,38 @@ paths:
71717171 required : true
71727172 schema :
71737173 type : string
7174+ /rl/training-sessions/{session_id}/operations/sample/{operation_id} :
7175+ get :
7176+ summary : Get sample operation
7177+ description : Retrieves the current status and result of a sample operation.
7178+ operationId : GetSample
7179+ tags : [RL]
7180+ responses :
7181+ " 200 " :
7182+ description : " "
7183+ content :
7184+ application/json :
7185+ schema :
7186+ $ref : ' #/components/schemas/RL.SampleOperation'
7187+ default :
7188+ description : An unexpected error response.
7189+ content :
7190+ application/json :
7191+ schema :
7192+ $ref : ' #/components/schemas/ErrorResponse'
7193+ parameters :
7194+ - name : session_id
7195+ description : Training session ID
7196+ in : path
7197+ required : true
7198+ schema :
7199+ type : string
7200+ - name : operation_id
7201+ description : Operation ID
7202+ in : path
7203+ required : true
7204+ schema :
7205+ type : string
71747206 /rl/training-sessions/{session_id}:forward-backward :
71757207 post :
71767208 summary : Forward-backward pass
@@ -7235,6 +7267,38 @@ paths:
72357267 required : true
72367268 schema :
72377269 type : string
7270+ /rl/training-sessions/{session_id}:sample :
7271+ post :
7272+ summary : Sample
7273+ description : Submits a sample operation that will asynchronously generate text completions with logprobs.
7274+ operationId : Sample
7275+ tags : [RL]
7276+ requestBody :
7277+ content :
7278+ application/json :
7279+ schema :
7280+ $ref : ' #/components/schemas/RL.SampleBody'
7281+ required : true
7282+ responses :
7283+ " 200 " :
7284+ description : " "
7285+ content :
7286+ application/json :
7287+ schema :
7288+ $ref : ' #/components/schemas/RL.SampleOperation'
7289+ default :
7290+ description : An unexpected error response.
7291+ content :
7292+ application/json :
7293+ schema :
7294+ $ref : ' #/components/schemas/ErrorResponse'
7295+ parameters :
7296+ - name : session_id
7297+ description : Training session ID
7298+ in : path
7299+ required : true
7300+ schema :
7301+ type : string
72387302 /rl/training-sessions/{session_id}:stop :
72397303 post :
72407304 summary : Stop training session
@@ -7280,6 +7344,23 @@ components:
72807344 default : 0.0001
72817345 adamw_params :
72827346 $ref : ' #/components/schemas/RL.AdamWOptimizerParams'
7347+ RL.SampleBody :
7348+ type : object
7349+ required :
7350+ - prompt
7351+ properties :
7352+ prompt :
7353+ $ref : ' #/components/schemas/RL.ModelInput'
7354+ description : Input prompt as tokenized chunks
7355+ sampling_params :
7356+ $ref : ' #/components/schemas/RL.SamplingParams'
7357+ description : Optional sampling parameters
7358+ num_samples :
7359+ type : integer
7360+ format : int64
7361+ example : 1
7362+ default : 1
7363+ description : Number of completions to generate for this prompt
72837364 RL.ForwardBackwardBody :
72847365 type : object
72857366 required :
@@ -7306,6 +7387,46 @@ components:
73067387 items :
73077388 type : object
73087389 $ref : ' #/components/schemas/RL.InputChunk'
7390+ RL.SamplingParams :
7391+ type : object
7392+ properties :
7393+ max_tokens :
7394+ type : integer
7395+ format : int32
7396+ example : 100
7397+ default : 100
7398+ description : Maximum number of tokens to generate per completion
7399+ temperature :
7400+ type : number
7401+ format : float
7402+ example : 1
7403+ default : 1.0
7404+ description : Sampling temperature
7405+ top_p :
7406+ type : number
7407+ format : float
7408+ example : 1
7409+ default : 1.0
7410+ description : Nucleus sampling probability threshold
7411+ top_k :
7412+ type : integer
7413+ format : int32
7414+ example : -1
7415+ default : -1
7416+ description : Top-k sampling limit
7417+ stop :
7418+ type : array
7419+ example :
7420+ - " \n "
7421+ - END
7422+ items :
7423+ type : string
7424+ description : Generation stops when any of these strings is produced
7425+ seed :
7426+ type : string
7427+ format : int64
7428+ example : 42
7429+ description : Random seed for reproducibility
73097430 RL.InputChunk :
73107431 type : object
73117432 properties :
@@ -7556,13 +7677,64 @@ components:
75567677 $ref : ' #/components/schemas/RL.OptimStepResult'
75577678 error :
75587679 $ref : ' #/components/schemas/RL.TrainingOperationError'
7680+ RL.SampleOperation :
7681+ type : object
7682+ properties :
7683+ operation_id :
7684+ type : string
7685+ example : 550e8400-e29b-41d4-a716-446655440000
7686+ description : Operation ID
7687+ status :
7688+ $ref : ' #/components/schemas/RL.TrainingOperationStatus'
7689+ example : TRAINING_OPERATION_STATUS_PENDING
7690+ description : Operation status
7691+ data :
7692+ $ref : ' #/components/schemas/RL.SampleResult'
7693+ error :
7694+ $ref : ' #/components/schemas/RL.TrainingOperationError'
75597695 RL.OptimStepResult :
75607696 type : object
75617697 properties :
75627698 step :
75637699 description : Step number
75647700 type : integer
75657701 example : 100
7702+ RL.SampleResult :
7703+ type : object
7704+ properties :
7705+ sequences :
7706+ type : array
7707+ items :
7708+ type : object
7709+ $ref : ' #/components/schemas/RL.SampleSequence'
7710+ description : Generated completions
7711+ RL.SampleSequence :
7712+ type : object
7713+ properties :
7714+ tokens :
7715+ type : array
7716+ example :
7717+ - 123
7718+ - 456
7719+ - 789
7720+ items :
7721+ type : string
7722+ format : int64
7723+ description : Generated token IDs
7724+ logprobs :
7725+ type : array
7726+ example :
7727+ - -0.5
7728+ - -1.2
7729+ - -0.3
7730+ items :
7731+ type : number
7732+ format : double
7733+ description : Log probabilities for each generated token
7734+ stop_reason :
7735+ type : string
7736+ example : length
7737+ description : Reason for stopping generation
75667738 RL.ForwardBackwardOperation :
75677739 type : object
75687740 properties :
0 commit comments