Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion resources/seccomp/aarch64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,11 @@
},
{
"syscall": "madvise",
"comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms."
"comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms."
},
{
"syscall": "fallocate",
"comment": "Used to punch holes in guest_memfd (MAP_SHARED) when discarding memory ranges, e.g. during virtio-mem unplug or balloon inflate with secret_free."
},
{
"syscall": "msync",
Expand Down
4 changes: 4 additions & 0 deletions resources/seccomp/x86_64-unknown-linux-musl.json
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@
"syscall": "madvise",
"comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms."
},
{
"syscall": "fallocate",
"comment": "Used to punch holes in guest_memfd (MAP_SHARED) when discarding memory ranges, e.g. during virtio-mem unplug or balloon inflate with secret_free."
},
{
"syscall": "msync",
"comment": "Used by the VirtIO pmem device to sync the file content with the backing file.",
Expand Down
10 changes: 0 additions & 10 deletions src/vmm/src/resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -268,12 +268,6 @@ impl VmResources {
return Err(MachineConfigError::IncompatibleBalloonSize);
}

if self.balloon.get().is_some() && updated.secret_free {
return Err(MachineConfigError::Incompatible(
"balloon device",
"secret freedom",
));
}
if updated.secret_free {
if self.vhost_user_devices_used() {
return Err(MachineConfigError::Incompatible(
Expand Down Expand Up @@ -347,10 +341,6 @@ impl VmResources {
return Err(BalloonConfigError::TooManyPagesRequested);
}

if self.machine_config.secret_free {
return Err(BalloonConfigError::IncompatibleWith("secret freedom"));
}

self.balloon.set(config)
}

Expand Down
32 changes: 25 additions & 7 deletions src/vmm/src/vstate/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -475,14 +475,32 @@ impl GuestRegionMmapExt {
Ok(())
}
}
// Match either the case of an anonymous mapping, or the case
// of a shared file mapping.
// TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd
// (or in general MAP_SHARED of a fd). In those cases we should use
// fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE).
// We keep falling to the madvise branch to keep the previous behaviour.
// Guest_memfd (MAP_SHARED): use fallocate(PUNCH_HOLE) to free pages.
(Some(fo), flags) if flags & libc::MAP_SHARED != 0 => {
let file_off = fo.start() + caddr.raw_value();
let len_i64 = i64::try_from(len).expect("discard length exceeds i64");
// SAFETY: fd and offset are valid, len is within the mapped region.
let ret = unsafe {
libc::fallocate(
fo.file().as_raw_fd(),
libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
file_off.cast_signed(),
len_i64,
)
};
if ret < 0 {
let os_error = std::io::Error::last_os_error();
error!(
"discard_range: fallocate(PUNCH_HOLE) failed: {:?}",
os_error
);
Err(GuestMemoryError::IOError(os_error))
} else {
Ok(())
}
}
// Anonymous memory: MADV_DONTNEED releases pages back to the kernel.
_ => {
// Madvise the region in order to mark it as not used.
// SAFETY: The address and length are known to be valid.
let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) };
if ret < 0 {
Expand Down
22 changes: 17 additions & 5 deletions tests/framework/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
wait_fixed,
)

from framework.guest_stats import MeminfoGuest

FLUSH_CMD = 'screen -S {session} -X colon "logfile flush 0^M"'
CommandReturn = namedtuple("CommandReturn", "returncode stdout stderr")
CMDLOG = logging.getLogger("commands")
Expand Down Expand Up @@ -131,9 +133,14 @@ def track_cpu_utilization(
return cpu_utilization


def get_resident_memory(process: psutil.Process):
def get_resident_memory(uvm):
"""Returns current memory utilization in KiB, including used HugeTLBFS"""

if uvm is not None and uvm.secret_free:
stats = MeminfoGuest(uvm).get()
return stats.mem_total.kib() - stats.mem_available.kib()

process: psutil.Process = uvm.ps
proc_status = Path("/proc", str(process.pid), "status").read_text("utf-8")
for line in proc_status.splitlines():
if line.startswith("HugetlbPages:"): # entry is in KiB
Expand Down Expand Up @@ -257,18 +264,23 @@ def search_output_from_cmd(cmd: str, find_regex: typing.Pattern) -> typing.Match

def get_stable_rss_mem(uvm, percentage_delta=1):
"""
Get the RSS memory that a guest uses, given the pid of the guest.
Get a stable memory usage reading for the VM.

For regular memory: returns host RSS of the FC process (KiB).
For secret_free (guest_memfd): returns guest-side memory usage
(total - available) since host RSS doesn't track page cache pages
freed by fallocate(PUNCH_HOLE).

Wait till the fluctuations in RSS drop below percentage_delta.
Wait till the fluctuations drop below percentage_delta.
Or print a warning if this does not happen.
"""

first_rss = 0
second_rss = 0
for _ in range(5):
first_rss = get_resident_memory(uvm.ps)
first_rss = get_resident_memory(uvm)
time.sleep(1)
second_rss = get_resident_memory(uvm.ps)
second_rss = get_resident_memory(uvm)
abs_diff = abs(first_rss - second_rss)
abs_delta = abs_diff / first_rss * 100
print(
Expand Down
66 changes: 31 additions & 35 deletions tests/integration_tests/functional/test_balloon.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@ def _test_rss_memory_lower(test_microvm):


# pylint: disable=C0103
def test_rss_memory_lower(uvm_plain_any):
def test_rss_memory_lower(uvm_plain_any, secret_free):
"""
Test that inflating the balloon makes guest use less rss memory.
"""
test_microvm = uvm_plain_any
test_microvm.spawn()
test_microvm.basic_config()
test_microvm.basic_config(secret_free=secret_free)
test_microvm.add_net_iface()

# Add a memory balloon.
Expand All @@ -114,13 +114,13 @@ def test_rss_memory_lower(uvm_plain_any):


# pylint: disable=C0103
def test_inflate_reduces_free(uvm_plain_any):
def test_inflate_reduces_free(uvm_plain_any, secret_free):
"""
Check that the output of free in guest changes with inflate.
"""
test_microvm = uvm_plain_any
test_microvm.spawn()
test_microvm.basic_config()
test_microvm.basic_config(secret_free=secret_free)
test_microvm.add_net_iface()

# Install deflated balloon.
Expand Down Expand Up @@ -150,7 +150,7 @@ def test_inflate_reduces_free(uvm_plain_any):

# pylint: disable=C0103
@pytest.mark.parametrize("deflate_on_oom", [True, False])
def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):
def test_deflate_on_oom(uvm_plain_any, secret_free, deflate_on_oom):
"""
Verify that setting the `deflate_on_oom` option works correctly.

Expand All @@ -167,7 +167,7 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):

test_microvm = uvm_plain_any
test_microvm.spawn()
test_microvm.basic_config()
test_microvm.basic_config(secret_free=secret_free)
test_microvm.add_net_iface()

# Add a deflated memory balloon.
Expand Down Expand Up @@ -215,13 +215,13 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):


# pylint: disable=C0103
def test_reinflate_balloon(uvm_plain_any):
def test_reinflate_balloon(uvm_plain_any, secret_free):
"""
Verify that repeatedly inflating and deflating the balloon works.
"""
test_microvm = uvm_plain_any
test_microvm.spawn()
test_microvm.basic_config()
test_microvm.basic_config(secret_free=secret_free)
test_microvm.add_net_iface()

# Add a deflated memory balloon.
Expand Down Expand Up @@ -280,13 +280,13 @@ def test_reinflate_balloon(uvm_plain_any):


# pylint: disable=C0103
def test_stats(uvm_plain_any):
def test_stats(uvm_plain_any, secret_free):
"""
Verify that balloon stats work as expected.
"""
test_microvm = uvm_plain_any
test_microvm.spawn()
test_microvm.basic_config()
test_microvm.basic_config(secret_free=secret_free)
test_microvm.add_net_iface()

# Add a memory balloon with stats enabled.
Expand Down Expand Up @@ -351,13 +351,13 @@ def test_stats(uvm_plain_any):
check_guest_dmesg_for_stalls(test_microvm.ssh)


def test_stats_update(uvm_plain_any):
def test_stats_update(uvm_plain_any, secret_free):
"""
Verify that balloon stats update correctly.
"""
test_microvm = uvm_plain_any
test_microvm.spawn()
test_microvm.basic_config()
test_microvm.basic_config(secret_free=secret_free)
test_microvm.add_net_iface()

# Add a memory balloon with stats enabled.
Expand Down Expand Up @@ -403,7 +403,7 @@ def test_stats_update(uvm_plain_any):
check_guest_dmesg_for_stalls(test_microvm.ssh)


def test_balloon_snapshot(uvm_plain_any, microvm_factory):
def test_balloon_snapshot(uvm_plain_any, secret_free, microvm_factory):
"""
Test that the balloon works after pause/resume.
"""
Expand All @@ -415,6 +415,7 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
vm.basic_config(
vcpu_count=2,
mem_size_mib=256,
secret_free=secret_free,
)
vm.add_net_iface()

Expand All @@ -437,15 +438,14 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
# Now inflate the balloon with 20MB of pages.
vm.api.balloon.patch(amount_mib=20)

# Check memory usage again.
# Check memory usage again — should decrease (balloon reclaimed pages).
second_reading = get_stable_rss_mem(vm)

# There should be a reduction in RSS, but it's inconsistent.
# We only test that the reduction happens.
assert first_reading > second_reading

snapshot = vm.snapshot_full()
microvm = microvm_factory.build_from_snapshot(snapshot)
# secret_free requires UFFD backend (file backend can't mmap as guest_memfd)
uffd = "on_demand" if secret_free else None
microvm = microvm_factory.build_from_snapshot(snapshot, uffd_handler_name=uffd)

# Free page reporting and hinting fragment guest memory VMAs
# making it harder to identify them in the memory monitor.
Expand All @@ -461,18 +461,15 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
# Dirty 60MB of pages.
make_guest_dirty_memory(microvm.ssh, amount_mib=60)

# Check memory usage.
# Check memory usage — should increase (guest used more memory).
fourth_reading = get_stable_rss_mem(microvm)

assert fourth_reading > third_reading

# Inflate the balloon with another 20MB of pages.
microvm.api.balloon.patch(amount_mib=40)

# Should decrease again (balloon reclaimed pages).
fifth_reading = get_stable_rss_mem(microvm)

# There should be a reduction in RSS, but it's inconsistent.
# We only test that the reduction happens.
assert fourth_reading > fifth_reading

# Get the stats after we take a snapshot and dirty some memory,
Expand All @@ -488,7 +485,9 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):


@pytest.mark.parametrize("method", ["reporting", "hinting"])
def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
def test_hinting_reporting_snapshot(
uvm_plain_any, secret_free, microvm_factory, method
):
"""
Test that the balloon hinting and reporting works after pause/resume.
"""
Expand All @@ -500,6 +499,7 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
vm.basic_config(
vcpu_count=2,
mem_size_mib=256,
secret_free=secret_free,
)
vm.add_net_iface()

Expand Down Expand Up @@ -534,15 +534,14 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
if free_page_hinting:
vm.api.balloon_hinting_start.patch()

# Check memory usage again.
# Check memory usage again — should decrease (pages freed + hinted/reported).
second_reading = get_stable_rss_mem(vm)

# There should be a reduction in RSS, but it's inconsistent.
# We only test that the reduction happens.
assert first_reading > second_reading

snapshot = vm.snapshot_full()
microvm = microvm_factory.build_from_snapshot(snapshot)
# secret_free requires UFFD backend (file backend can't mmap as guest_memfd)
uffd = "on_demand" if secret_free else None
microvm = microvm_factory.build_from_snapshot(snapshot, uffd_handler_name=uffd)

# Free page reporting and hinting fragment guest memory VMAs
# making it harder to identify them in the memory monitor.
Expand All @@ -565,23 +564,20 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
if free_page_hinting:
microvm.api.balloon_hinting_start.patch()

# Check memory usage again.
# Check memory usage again — should decrease.
fourth_reading = get_stable_rss_mem(microvm)

# There should be a reduction in RSS, but it's inconsistent.
# We only test that the reduction happens.
assert third_reading > fourth_reading
check_guest_dmesg_for_stalls(microvm.ssh)


@pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"])
def test_memory_scrub(uvm_plain_any, method):
def test_memory_scrub(uvm_plain_any, secret_free, method):
"""
Test that the memory is zeroed after deflate.
"""
microvm = uvm_plain_any
microvm.spawn()
microvm.basic_config(vcpu_count=2, mem_size_mib=256)
microvm.basic_config(vcpu_count=2, mem_size_mib=256, secret_free=secret_free)
microvm.add_net_iface()

free_page_reporting = method == "reporting"
Expand Down
10 changes: 6 additions & 4 deletions tests/integration_tests/performance/test_hotplug_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,19 +278,21 @@ def check_hotplug(uvm, requested_size_mib):
def check_hotunplug(uvm, requested_size_mib):
"""Verifies memory can be hotunplugged and gets released"""

rss_before = get_resident_memory(uvm.ps)
rss_before = get_resident_memory(uvm)

check_hotplug(uvm, requested_size_mib)

rss_after = get_resident_memory(uvm.ps)
rss_after = get_resident_memory(uvm)

print(f"RSS before: {rss_before}, after: {rss_after}")

machine_config = uvm.api.machine_config.get().json()
huge_pages = HugePagesConfig(machine_config["huge_pages"])
secret_free = machine_config.get("secret_free", False)
if not secret_free and (
huge_pages == HugePagesConfig.NONE or supports_hugetlbfs_discard()
if (
secret_free
or huge_pages == HugePagesConfig.NONE
or supports_hugetlbfs_discard()
):
assert rss_after < rss_before, "RSS didn't decrease"

Expand Down
Loading