release: 2.4.0 (#297)

stainless-app[bot] · dulaj-me · timofeev1995 · web-flow · commit 86cee4c4cf95 · 2026-03-11T22:54:28.000Z
* fix: fixed autoscaling configs in deployments api * fix(jig): send {} for deployment autoscaling to unset if unset in config (#294) * fix: fix autoscaling config usage in jig (#298) * Fix & enrich tokenization example script (#296) * Fix script * Add parquet columnt to the list * feat(jig): move config.dockerfile to config.image.dockerfile_path and add an config.deploy.image option so that you don't have to always pass it as a flag (#287) * chore: fix lints (#299) * release: 2.4.0 --------- Co-authored-by: stainless-app[bot] <142633134+stainless-app[bot]@users.noreply.github.com> Co-authored-by: Dulaj Disanayaka <8828757+dulaj-me@users.noreply.github.com> Co-authored-by: Egor Timofeev <43988181+timofeev1995@users.noreply.github.com> Co-authored-by: technillogue <technillogue@gmail.com>
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "2.3.2"
+  ".": "2.4.0"
 }
diff --git a/.stats.yml b/.stats.yml
@@ -1,4 +1,4 @@
 configured_endpoints: 74
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-7a33f9086abc839141dcbfef36fc505b7c2618dc893a24e11a09e5bffe976bcf.yml
-openapi_spec_hash: dba6fe0b4f5f10181628be7e93718d3a
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/togetherai%2Ftogetherai-452048e531c558b879791ccc9788c3f3c23c50c808c909a6d95f47af360566a4.yml
+openapi_spec_hash: 11835b47ff4c2d1b4dbeed74c49908e1
 config_hash: b66198d27b4d5c152688ff6cccfdeab5
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,26 @@
 # Changelog
 
+## 2.4.0 (2026-03-11)
+
+Full Changelog: [v2.3.2...v2.4.0](https://github.com/togethercomputer/together-py/compare/v2.3.2...v2.4.0)
+
+### Features
+
+* **jig:** move config.dockerfile to config.image.dockerfile_path and add an config.deploy.image option so that you don't have to always pass it as a flag ([#287](https://github.com/togethercomputer/together-py/issues/287)) ([16f64a5](https://github.com/togethercomputer/together-py/commit/16f64a58d8b83f137474ea3eabb31abdc7d38a5f))
+
+
+### Bug Fixes
+
+* fix autoscaling config usage in jig ([#298](https://github.com/togethercomputer/together-py/issues/298)) ([a3b6657](https://github.com/togethercomputer/together-py/commit/a3b6657efb440bed6cc9f722e54bffd264a95270))
+* fixed autoscaling configs in deployments api ([d31204c](https://github.com/togethercomputer/together-py/commit/d31204c3e78340bb362ec8b44da9ee8b09e8f6ac))
+* **jig:** deployment tracking ([#300](https://github.com/togethercomputer/together-py/issues/300)) ([f80a2b0](https://github.com/togethercomputer/together-py/commit/f80a2b05ec9c5d917a5a327ac002e44dddd96b84))
+* **jig:** send {} for deployment autoscaling to unset if unset in config ([#294](https://github.com/togethercomputer/together-py/issues/294)) ([7657de3](https://github.com/togethercomputer/together-py/commit/7657de3ccc5ef077f8b413f67399d35724af21fd))
+
+
+### Chores
+
+* fix lints ([#299](https://github.com/togethercomputer/together-py/issues/299)) ([2b35ec4](https://github.com/togethercomputer/together-py/commit/2b35ec46fa2414b14b589b16781c42990e697dd4))
+
 ## 2.3.2 (2026-03-09)
 
 Full Changelog: [v2.3.1...v2.3.2](https://github.com/togethercomputer/together-py/compare/v2.3.1...v2.3.2)
diff --git a/examples/tokenize_data.py b/examples/tokenize_data.py
@@ -72,38 +72,56 @@ def pack_sequences(
         Sequence 3:
         ['▁toys', '▁.', '</s>', '<s>', '▁but', '▁just', '▁one', '▁look']
     """
-    packed_sequences = []
-    buffer = []
+    packed_sequences: list[list[int]] = []
+    packed_position_ids: list[list[int]] = []
+    buffer: list[int] = []
+    position_buffer: list[int] = []
 
     for input_ids in batch["input_ids"]:
-        # Add the current sequence to the buffer
-        buffer.extend(input_ids)
-        buffer.append(eos_token_id)  # Add EOS at the end of each sequence
+        # Truncate sequences that individually exceed max_seq_len (including EOS token).
+        seq_with_eos = (input_ids + [eos_token_id])[:max_seq_len]
+        # Position IDs reset to 0 at the start of each sub-sequence.
+        seq_positions = list(range(len(seq_with_eos)))
 
-        # Check if buffer needs to be split into chunks
-        while len(buffer) > max_seq_len:
-            # Take a full chunk from the buffer and append it to packed_sequences
-            packed_sequences.append(buffer[:max_seq_len])
-            # Remove the processed chunk from the buffer
-            buffer = buffer[max_seq_len:]
+        # If adding this sequence would overflow, flush the current buffer first.
+        # This ensures every chunk starts at a sequence boundary (position_ids[0] == 0).
+        if buffer and len(buffer) + len(seq_with_eos) > max_seq_len:
+            padding_length = max_seq_len - len(buffer)
+            packed_sequences.append(buffer + [pad_token_id] * padding_length)
+            packed_position_ids.append(position_buffer + [0] * padding_length)
+            buffer = []
+            position_buffer = []
+
+        buffer.extend(seq_with_eos)
+        position_buffer.extend(seq_positions)
+
+        # Flush immediately if exactly full (no padding needed).
+        if len(buffer) == max_seq_len:
+            packed_sequences.append(buffer)
+            packed_position_ids.append(position_buffer)
+            buffer = []
+            position_buffer = []
 
     # Add the last buffer if it's exactly chunk_size
     if len(buffer) == max_seq_len:
         packed_sequences.append(buffer)
+        packed_position_ids.append(position_buffer)
     elif len(buffer) > cutoff_size:
         # if the buffer is larger than the cutoff size, pad it to the chunk_size
         # if not, we do not include in the packed_sequences
-        buffer.extend([pad_token_id] * (max_seq_len - len(buffer)))
+        padding_length = max_seq_len - len(buffer)
+        buffer.extend([pad_token_id] * padding_length)
+        position_buffer.extend([0] * padding_length)
         packed_sequences.append(buffer)
+        packed_position_ids.append(position_buffer)
 
-    output = {"input_ids": packed_sequences}
+    output = {"input_ids": packed_sequences, "position_ids": packed_position_ids}
     if add_labels:
         output["labels"] = [
             [LOSS_IGNORE_INDEX if token_id == pad_token_id else token_id for token_id in example]
             for example in output["input_ids"]
         ]
 
-    # mask attention for padding tokens, a better version would also mask cross-sequence dependencies
     output["attention_mask"] = [
         [0 if token_id == pad_token_id else 1 for token_id in example] for example in output["input_ids"]
     ]
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "together"
-version = "2.3.2"
+version = "2.4.0"
 description = "The official Python library for the together API"
 dynamic = ["readme"]
 license = "Apache-2.0"
diff --git a/src/together/_version.py b/src/together/_version.py
@@ -1,4 +1,4 @@
 # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
 
 __title__ = "together"
-__version__ = "2.3.2"  # x-release-please-version
+__version__ = "2.4.0"  # x-release-please-version
diff --git a/src/together/lib/cli/api/beta/jig/jig.py b/src/together/lib/cli/api/beta/jig/jig.py
@@ -72,6 +72,7 @@ class ImageConfig:
     cmd: str = "python app.py"
     copy: list[str] = field(default_factory=list[str])
     auto_include_git: bool = False
+    dockerfile_path: str = "Dockerfile"
 
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> ImageConfig:
@@ -109,10 +110,11 @@ class DeployConfig:
     port: int = 8000
     environment_variables: dict[str, str] = field(default_factory=dict[str, str])
     command: list[str] | None = None
-    autoscaling: dict[str, str | float | int] | None = None
+    autoscaling: dict[str, Union[str, float, int]] = field(default_factory=dict[str, Union[str, float, int]])
     health_check_path: str = "/health"
     termination_grace_period_seconds: int = 300
     volume_mounts: list[VolumeMount] = field(default_factory=list[VolumeMount])
+    image: str | None = None
 
     @classmethod
     def from_dict(cls, data: dict[str, Any]) -> DeployConfig:
@@ -170,7 +172,6 @@ class Config:
     """Main configuration from jig.toml or pyproject.toml"""
 
     model_name: str = ""
-    dockerfile: str = "Dockerfile"
     image: ImageConfig = field(default_factory=ImageConfig)
     deploy: DeployConfig = field(default_factory=DeployConfig)
     _path: Path = field(default_factory=lambda: Path("pyproject.toml"))
@@ -236,7 +237,6 @@ def load(cls, data: dict[str, Any], path: Path) -> Config:
         return cls(
             image=ImageConfig.from_dict(jig_config.get("image", {})),
             deploy=DeployConfig.from_dict(jig_config.get("deploy", {})),
-            dockerfile=jig_config.get("dockerfile", "Dockerfile"),
             model_name=name,
             _path=path,
             _unique_name_hint=hint,
@@ -394,7 +394,7 @@ def _generate_dockerfile(config: Config) -> str:
 
 def _dockerfile(config: Config) -> bool:
     """Generate or update managed Dockerfile, returns False if user-managed"""
-    dockerfile_path = Path(config.dockerfile)
+    dockerfile_path = Path(config.image.dockerfile_path)
     if not dockerfile_path.exists():
         dockerfile_path.write_text(_generate_dockerfile(config))
         echo("\N{CHECK MARK} Generated Dockerfile")
@@ -570,12 +570,12 @@ def build(self, tag: str = "latest", warmup: bool = False, docker_args: str | No
         image = self.image(tag)
 
         if not _dockerfile(self.config):
-            echo(f"\N{INFORMATION SOURCE} Using existing {self.config.dockerfile} (not managed by jig)")
+            echo(f"\N{INFORMATION SOURCE} Using existing {self.config.image.dockerfile_path} (not managed by jig)")
 
         echo(f"Building {image}")
         cmd = ["docker", "build", "--platform", "linux/amd64", "-t", image, "."]
-        if self.config.dockerfile != "Dockerfile":
-            cmd.extend(["-f", self.config.dockerfile])
+        if self.config.image.dockerfile_path != "Dockerfile":
+            cmd.extend(["-f", self.config.image.dockerfile_path])
 
         extra_args = docker_args or os.getenv("DOCKER_BUILD_EXTRA_ARGS", "")
         if extra_args:
@@ -609,8 +609,10 @@ def deploy(
         docker_args: str | None = None,
         existing_image: str | None = None,
     ) -> None:
-        if existing_image:
-            deployment_image = existing_image
+        if deployment_image := existing_image:
+            echo(f"Deploying provided image {deployment_image}")
+        elif deployment_image := self.config.deploy.image:
+            echo(f"Deploying configured image {deployment_image}")
         else:
             self.build(tag, warmup, docker_args)
             self.push(tag)
@@ -822,7 +824,7 @@ def format_status(self, d: Deployment) -> str:
         ]
 
         if a := d.autoscaling:
-            lines.append(f"  Autoscaling: {a.get('metric', 'N/A')} {a.get('target', 'N/A')} (target)")
+            lines.append(f"  Autoscaling: {a.metric or 'N/A'} {a.target or 'N/A'} (target)")
         lines.append(f"""  Replicas: {d.ready_replicas}/{d.desired_replicas} ready (min {d.min_replicas}, max {d.max_replicas})
 
 Configuration:""")
@@ -971,7 +973,7 @@ def init() -> None:
 def dockerfile(jig: Jig) -> None:
     """Generate Dockerfile"""
     if not _dockerfile(jig.config):
-        msg = f"{jig.config.dockerfile} exists and is not managed by jig. Remove or rename the file to allow jig to manage dockerfile."
+        msg = f"{jig.config.image.dockerfile_path} exists and is not managed by jig. Remove or rename the file to allow jig to manage dockerfile."
         raise JigError(msg)
 
 
diff --git a/src/together/lib/constants.py b/src/together/lib/constants.py
@@ -45,7 +45,7 @@
 MAX_BASE64_IMAGE_LENGTH = len("data:image/jpeg;base64,") + 4 * MAX_IMAGE_BYTES // 3
 
 # expected columns for Parquet files
-PARQUET_EXPECTED_COLUMNS = ["input_ids", "attention_mask", "labels"]
+PARQUET_EXPECTED_COLUMNS = ["input_ids", "attention_mask", "labels", "position_ids"]
 
 
 class DatasetFormat(enum.Enum):
diff --git a/src/together/resources/beta/jig/jig.py b/src/together/resources/beta/jig/jig.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Dict, Iterable
+from typing import Iterable
 from typing_extensions import Literal
 
 import httpx
@@ -122,7 +122,7 @@ def update(
         id: str,
         *,
         args: SequenceNotStr[str] | Omit = omit,
-        autoscaling: Dict[str, str] | Omit = omit,
+        autoscaling: jig_update_params.Autoscaling | Omit = omit,
         command: SequenceNotStr[str] | Omit = omit,
         cpu: float | Omit = omit,
         description: str | Omit = omit,
@@ -155,9 +155,8 @@ def update(
           args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
               ["python", "app.py"])
 
-          autoscaling:
-              Autoscaling configuration as key-value pairs. Example: {"metric":
-              "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
+          autoscaling: Autoscaling configuration for the deployment. Omit or set to null to disable
+              autoscaling
 
           command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
               ["/bin/sh", "-c"])
@@ -268,7 +267,7 @@ def deploy(
         image: str,
         name: str,
         args: SequenceNotStr[str] | Omit = omit,
-        autoscaling: Dict[str, str] | Omit = omit,
+        autoscaling: jig_deploy_params.Autoscaling | Omit = omit,
         command: SequenceNotStr[str] | Omit = omit,
         cpu: float | Omit = omit,
         description: str | Omit = omit,
@@ -303,9 +302,9 @@ def deploy(
           args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
               ["python", "app.py"])
 
-          autoscaling:
-              Autoscaling configuration as key-value pairs. Example: {"metric":
-              "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
+          autoscaling: Autoscaling configuration. Example: {"metric": "QueueBacklogPerWorker",
+              "target": 1.01} to scale based on queue backlog. Omit or set to null to disable
+              autoscaling
 
           command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
               ["/bin/sh", "-c"])
@@ -534,7 +533,7 @@ async def update(
         id: str,
         *,
         args: SequenceNotStr[str] | Omit = omit,
-        autoscaling: Dict[str, str] | Omit = omit,
+        autoscaling: jig_update_params.Autoscaling | Omit = omit,
         command: SequenceNotStr[str] | Omit = omit,
         cpu: float | Omit = omit,
         description: str | Omit = omit,
@@ -567,9 +566,8 @@ async def update(
           args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
               ["python", "app.py"])
 
-          autoscaling:
-              Autoscaling configuration as key-value pairs. Example: {"metric":
-              "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
+          autoscaling: Autoscaling configuration for the deployment. Omit or set to null to disable
+              autoscaling
 
           command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
               ["/bin/sh", "-c"])
@@ -680,7 +678,7 @@ async def deploy(
         image: str,
         name: str,
         args: SequenceNotStr[str] | Omit = omit,
-        autoscaling: Dict[str, str] | Omit = omit,
+        autoscaling: jig_deploy_params.Autoscaling | Omit = omit,
         command: SequenceNotStr[str] | Omit = omit,
         cpu: float | Omit = omit,
         description: str | Omit = omit,
@@ -715,9 +713,9 @@ async def deploy(
           args: Args overrides the container's CMD. Provide as an array of arguments (e.g.,
               ["python", "app.py"])
 
-          autoscaling:
-              Autoscaling configuration as key-value pairs. Example: {"metric":
-              "QueueBacklogPerWorker", "target": "10"} to scale based on queue backlog
+          autoscaling: Autoscaling configuration. Example: {"metric": "QueueBacklogPerWorker",
+              "target": 1.01} to scale based on queue backlog. Omit or set to null to disable
+              autoscaling
 
           command: Command overrides the container's ENTRYPOINT. Provide as an array (e.g.,
               ["/bin/sh", "-c"])
diff --git a/src/together/types/beta/deployment.py b/src/together/types/beta/deployment.py
diff --git a/src/together/types/beta/jig_deploy_params.py b/src/together/types/beta/jig_deploy_params.py
diff --git a/src/together/types/beta/jig_update_params.py b/src/together/types/beta/jig_update_params.py
diff --git a/tests/api_resources/beta/test_jig.py b/tests/api_resources/beta/test_jig.py

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,3 @@`
`1`	`1`	`{`
`2`		`- ".": "2.3.2"`
	`2`	`+ ".": "2.4.0"`
`3`	`3`	`}`