Skip to content

Commit 2c15da5

Browse files
committed
add sampling
1 parent de99e9d commit 2c15da5

File tree

1 file changed

+172
-0
lines changed

1 file changed

+172
-0
lines changed

openapi.yaml

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7171,6 +7171,38 @@ paths:
71717171
required: true
71727172
schema:
71737173
type: string
7174+
/rl/training-sessions/{session_id}/operations/sample/{operation_id}:
7175+
get:
7176+
summary: Get sample operation
7177+
description: Retrieves the current status and result of a sample operation.
7178+
operationId: GetSample
7179+
tags: [RL]
7180+
responses:
7181+
"200":
7182+
description: ""
7183+
content:
7184+
application/json:
7185+
schema:
7186+
$ref: '#/components/schemas/RL.SampleOperation'
7187+
default:
7188+
description: An unexpected error response.
7189+
content:
7190+
application/json:
7191+
schema:
7192+
$ref: '#/components/schemas/ErrorResponse'
7193+
parameters:
7194+
- name: session_id
7195+
description: Training session ID
7196+
in: path
7197+
required: true
7198+
schema:
7199+
type: string
7200+
- name: operation_id
7201+
description: Operation ID
7202+
in: path
7203+
required: true
7204+
schema:
7205+
type: string
71747206
/rl/training-sessions/{session_id}:forward-backward:
71757207
post:
71767208
summary: Forward-backward pass
@@ -7235,6 +7267,38 @@ paths:
72357267
required: true
72367268
schema:
72377269
type: string
7270+
/rl/training-sessions/{session_id}:sample:
7271+
post:
7272+
summary: Sample
7273+
description: Submits a sample operation that will asynchronously generate text completions with logprobs.
7274+
operationId: Sample
7275+
tags: [RL]
7276+
requestBody:
7277+
content:
7278+
application/json:
7279+
schema:
7280+
$ref: '#/components/schemas/RL.SampleBody'
7281+
required: true
7282+
responses:
7283+
"200":
7284+
description: ""
7285+
content:
7286+
application/json:
7287+
schema:
7288+
$ref: '#/components/schemas/RL.SampleOperation'
7289+
default:
7290+
description: An unexpected error response.
7291+
content:
7292+
application/json:
7293+
schema:
7294+
$ref: '#/components/schemas/ErrorResponse'
7295+
parameters:
7296+
- name: session_id
7297+
description: Training session ID
7298+
in: path
7299+
required: true
7300+
schema:
7301+
type: string
72387302
/rl/training-sessions/{session_id}:stop:
72397303
post:
72407304
summary: Stop training session
@@ -7280,6 +7344,23 @@ components:
72807344
default: 0.0001
72817345
adamw_params:
72827346
$ref: '#/components/schemas/RL.AdamWOptimizerParams'
7347+
RL.SampleBody:
7348+
type: object
7349+
required:
7350+
- prompt
7351+
properties:
7352+
prompt:
7353+
$ref: '#/components/schemas/RL.ModelInput'
7354+
description: Input prompt as tokenized chunks
7355+
sampling_params:
7356+
$ref: '#/components/schemas/RL.SamplingParams'
7357+
description: Optional sampling parameters
7358+
num_samples:
7359+
type: integer
7360+
format: int64
7361+
example: 1
7362+
default: 1
7363+
description: Number of completions to generate for this prompt
72837364
RL.ForwardBackwardBody:
72847365
type: object
72857366
required:
@@ -7306,6 +7387,46 @@ components:
73067387
items:
73077388
type: object
73087389
$ref: '#/components/schemas/RL.InputChunk'
7390+
RL.SamplingParams:
7391+
type: object
7392+
properties:
7393+
max_tokens:
7394+
type: integer
7395+
format: int32
7396+
example: 100
7397+
default: 100
7398+
description: Maximum number of tokens to generate per completion
7399+
temperature:
7400+
type: number
7401+
format: float
7402+
example: 1
7403+
default: 1.0
7404+
description: Sampling temperature
7405+
top_p:
7406+
type: number
7407+
format: float
7408+
example: 1
7409+
default: 1.0
7410+
description: Nucleus sampling probability threshold
7411+
top_k:
7412+
type: integer
7413+
format: int32
7414+
example: -1
7415+
default: -1
7416+
description: Top-k sampling limit
7417+
stop:
7418+
type: array
7419+
example:
7420+
- "\n"
7421+
- END
7422+
items:
7423+
type: string
7424+
description: Generation stops when any of these strings is produced
7425+
seed:
7426+
type: string
7427+
format: int64
7428+
example: 42
7429+
description: Random seed for reproducibility
73097430
RL.InputChunk:
73107431
type: object
73117432
properties:
@@ -7556,13 +7677,64 @@ components:
75567677
$ref: '#/components/schemas/RL.OptimStepResult'
75577678
error:
75587679
$ref: '#/components/schemas/RL.TrainingOperationError'
7680+
RL.SampleOperation:
7681+
type: object
7682+
properties:
7683+
operation_id:
7684+
type: string
7685+
example: 550e8400-e29b-41d4-a716-446655440000
7686+
description: Operation ID
7687+
status:
7688+
$ref: '#/components/schemas/RL.TrainingOperationStatus'
7689+
example: TRAINING_OPERATION_STATUS_PENDING
7690+
description: Operation status
7691+
data:
7692+
$ref: '#/components/schemas/RL.SampleResult'
7693+
error:
7694+
$ref: '#/components/schemas/RL.TrainingOperationError'
75597695
RL.OptimStepResult:
75607696
type: object
75617697
properties:
75627698
step:
75637699
description: Step number
75647700
type: integer
75657701
example: 100
7702+
RL.SampleResult:
7703+
type: object
7704+
properties:
7705+
sequences:
7706+
type: array
7707+
items:
7708+
type: object
7709+
$ref: '#/components/schemas/RL.SampleSequence'
7710+
description: Generated completions
7711+
RL.SampleSequence:
7712+
type: object
7713+
properties:
7714+
tokens:
7715+
type: array
7716+
example:
7717+
- 123
7718+
- 456
7719+
- 789
7720+
items:
7721+
type: string
7722+
format: int64
7723+
description: Generated token IDs
7724+
logprobs:
7725+
type: array
7726+
example:
7727+
- -0.5
7728+
- -1.2
7729+
- -0.3
7730+
items:
7731+
type: number
7732+
format: double
7733+
description: Log probabilities for each generated token
7734+
stop_reason:
7735+
type: string
7736+
example: length
7737+
description: Reason for stopping generation
75667738
RL.ForwardBackwardOperation:
75677739
type: object
75687740
properties:

0 commit comments

Comments
 (0)