Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/confcom/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

Release History
===============

2.0.0b3
+++++++
* Fix path traversal when generating policies from untrusted image tar files

2.0.0b2
+++++++
* Fix default working directory for Windows containers being set to C:\\ if the image doesn't specify one.
Expand Down
94 changes: 24 additions & 70 deletions src/confcom/azext_confcom/os_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def clean_up_temp_folder(temp_file_path: str) -> None:
shutil.rmtree(folder_name)


def load_json_from_str(data: str) -> dict:
def load_json_from_str(data: str | bytes | bytearray) -> dict:
if data:
try:
return json.loads(data)
Expand Down Expand Up @@ -159,56 +159,13 @@ def load_tar_mapping_from_config_file(path: str) -> dict:
return output_dict


def map_image_from_tar_backwards_compatibility(image_name: str, tar: TarFile, tar_location: str):
tar_dir = os.path.dirname(tar_location)
# grab all files in the folder and only take the one that's named with hex values and a json extension
members = tar.getmembers()

info_file = None
# if there's more than one image in the tarball, we need to do some more logic
if len(members) > 0:
# extract just the manifest file and see if any of the RepoTags match the image_name we're searching for
# the manifest.json should have a list of all the image tags
# and what json files they map to to get env vars, startup cmd, etc.
tar.extract("manifest.json", path=tar_dir)
manifest_path = os.path.join(tar_dir, "manifest.json")
manifest = load_json_from_file(manifest_path)
# if we match a RepoTag to the image, stop searching
for image in manifest:
if image_name in image.get("RepoTags"):
info_file = [
item for item in members if item.name == image.get("Config")
][0]
break
# remove the extracted manifest file to clean up
force_delete_silently(manifest_path)
else:
eprint(f"Tarball at {tar_location} contains no images")

if not info_file:
return None
tar.extract(info_file.name, path=tar_dir)

# get the path of the json file and read it in
image_info_file_path = os.path.join(tar_dir, info_file.name)
image_info_raw = load_json_from_file(image_info_file_path)
# delete the extracted json file to clean up
force_delete_silently(image_info_file_path)
image_info = image_info_raw.get("config")
# importing the constant from config.py gives a circular dependency error
image_info["Architecture"] = image_info_raw.get("architecture")

shutil.rmtree("blobs", ignore_errors=True)
return image_info


def get_oci_image_name(image_name: str) -> str:
if "/" not in image_name:
return f"docker.io/library/{image_name}"
return image_name


def read_file_from_tar(tar: TarFile, filename: str) -> str:
def read_file_from_tar(tar: TarFile, filename: str) -> bytes:
try:
return tar.extractfile(filename).read()
except KeyError:
Expand Down Expand Up @@ -251,36 +208,33 @@ def map_image_from_tar_oci_layout_v1(image_name: str, tar: TarFile, tar_location
eprint(f"Image '{image_name}' is not found in '{tar_location}'")


def map_image_from_tar(image_name: str, tar: TarFile, tar_location: str):
tar_dir = os.path.dirname(tar_location)
def map_image_from_tar(image_name: str, tar: TarFile, _tar_location: str):
# Inspect the manifest file and see if any of the RepoTags match the
# image_name we're searching for. For each manifest in the JSON, it should
# also have a Config field for what json files they map to to get env vars,
# startup cmd, etc.
#
# NOTE: read manifest.json directly (not via read_file_from_tar) so that a
# missing manifest.json raises KeyError. The caller relies on that to fall
# back to the OCI layout v1 reader.
manifest_bytes = tar.extractfile("manifest.json").read()
Copy link

Copilot AI Apr 30, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tar.extractfile(...) can return None for non-regular members (e.g., directories/links). Calling .read() on None will raise AttributeError. Consider explicitly handling the None case and raising a clear exception (or KeyError) so the caller’s fallback logic behaves predictably.

Copilot uses AI. Check for mistakes.
manifest = load_json_from_str(manifest_bytes)
Comment thread
micromaomao marked this conversation as resolved.

info_file = None
info_file_name = "manifest.json"

# extract just the manifest file and see if any of the RepoTags match the image_name we're searching for
# the manifest.json should have a list of all the image tags
# and what json files they map to to get env vars, startup cmd, etc.
tar.extract(info_file_name, path=tar_dir)
manifest_path = os.path.join(tar_dir, info_file_name)
manifest = load_json_from_file(manifest_path)
try:
# if we match a RepoTag to the image, stop searching
for image in manifest:
if image_name in image.get("RepoTags"):
info_file = image.get("Config")
break
finally:
# remove the extracted manifest file to clean up
force_delete_silently(manifest_path)
# if we match a RepoTag to the image, stop searching
for image in manifest:
if image_name in image.get("RepoTags"):
info_file = image.get("Config")
break

if not info_file:
return None
tar.extract(info_file, path=tar_dir)

# get the path of the json file and read it in
image_info_file_path = os.path.join(tar_dir, info_file)
image_info_raw = load_json_from_file(image_info_file_path)
# delete the extracted json file to clean up
force_delete_silently(image_info_file_path)
# Read config file directly from the tar stream (without extracting
# anything) so that malicious paths in the manifest cannot cause any actual
# writes.
image_info_raw_bytes = read_file_from_tar(tar, info_file)
image_info_raw = load_json_from_str(image_info_raw_bytes)
Comment thread
micromaomao marked this conversation as resolved.
image_info = image_info_raw.get("config")
Comment thread
micromaomao marked this conversation as resolved.
# importing the constant from config.py gives a circular dependency error
image_info["Architecture"] = image_info_raw.get("architecture")
Expand Down
5 changes: 0 additions & 5 deletions src/confcom/azext_confcom/template_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,6 @@ def get_image_info(progress, message_queue, tar_mapping, image):
with tarfile.open(tar_location) as tar_file:
# get all the info out of the tarfile
try:
logger.info("using backwards compatibility tar file")
image_info = os_util.map_image_from_tar_backwards_compatibility(
image_name, tar_file, tar_location
)
except IndexError:
logger.info("using docker formatted tar file")
image_info = os_util.map_image_from_tar(
image_name, tar_file, tar_location
Expand Down
2 changes: 1 addition & 1 deletion src/confcom/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

logger.warn("Wheel is not available, disabling bdist_wheel hook")

VERSION = "2.0.0b2"
VERSION = "2.0.0b3"

# The full list of classifiers is available at
# https://pypi.python.org/pypi?%3Aaction=list_classifiers
Expand Down
Loading