firecracker-microvm · JackThomson2 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/resources/seccomp/aarch64-unknown-linux-musl.json b/resources/seccomp/aarch64-unknown-linux-musl.json
@@ -217,7 +217,11 @@
             },
             {
                 "syscall": "madvise",
-                "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms." 
+                "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms."
+            },
+            {
+                "syscall": "fallocate",
+                "comment": "Used to punch holes in guest_memfd (MAP_SHARED) when discarding memory ranges, e.g. during virtio-mem unplug or balloon inflate with secret_free."
             },
             {
                 "syscall": "msync",

diff --git a/resources/seccomp/x86_64-unknown-linux-musl.json b/resources/seccomp/x86_64-unknown-linux-musl.json
@@ -219,6 +219,10 @@
                 "syscall": "madvise",
                 "comment": "Used by the VirtIO balloon device and by musl for some customer workloads. It is also used by aws-lc during random number generation. They setup a memory page that mark with MADV_WIPEONFORK to be able to detect forks. They also call it with -1 to see if madvise is supported in certain platforms."
             },
+            {
+                "syscall": "fallocate",
+                "comment": "Used to punch holes in guest_memfd (MAP_SHARED) when discarding memory ranges, e.g. during virtio-mem unplug or balloon inflate with secret_free."
+            },
             {
                 "syscall": "msync",
                 "comment": "Used by the VirtIO pmem device to sync the file content with the backing file.",

diff --git a/src/vmm/src/resources.rs b/src/vmm/src/resources.rs
@@ -268,12 +268,6 @@ impl VmResources {
             return Err(MachineConfigError::IncompatibleBalloonSize);
         }
 
-        if self.balloon.get().is_some() && updated.secret_free {
-            return Err(MachineConfigError::Incompatible(
-                "balloon device",
-                "secret freedom",
-            ));
-        }
         if updated.secret_free {
             if self.vhost_user_devices_used() {
                 return Err(MachineConfigError::Incompatible(
@@ -347,10 +341,6 @@ impl VmResources {
             return Err(BalloonConfigError::TooManyPagesRequested);
         }
 
-        if self.machine_config.secret_free {
-            return Err(BalloonConfigError::IncompatibleWith("secret freedom"));
-        }
-
         self.balloon.set(config)
     }
 

diff --git a/src/vmm/src/vstate/memory.rs b/src/vmm/src/vstate/memory.rs
@@ -475,14 +475,32 @@ impl GuestRegionMmapExt {
                     Ok(())
                 }
             }
-            // Match either the case of an anonymous mapping, or the case
-            // of a shared file mapping.
-            // TODO: madvise(MADV_DONTNEED) doesn't actually work with memfd
-            // (or in general MAP_SHARED of a fd). In those cases we should use
-            // fallocate64(FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE).
-            // We keep falling to the madvise branch to keep the previous behaviour.
+            // Guest_memfd (MAP_SHARED): use fallocate(PUNCH_HOLE) to free pages.
+            (Some(fo), flags) if flags & libc::MAP_SHARED != 0 => {
+                let file_off = fo.start() + caddr.raw_value();
+                let len_i64 = i64::try_from(len).expect("discard length exceeds i64");
+                // SAFETY: fd and offset are valid, len is within the mapped region.
+                let ret = unsafe {
+                    libc::fallocate(
+                        fo.file().as_raw_fd(),
+                        libc::FALLOC_FL_PUNCH_HOLE | libc::FALLOC_FL_KEEP_SIZE,
+                        file_off.cast_signed(),
+                        len_i64,
+                    )
+                };
+                if ret < 0 {
+                    let os_error = std::io::Error::last_os_error();
+                    error!(
+                        "discard_range: fallocate(PUNCH_HOLE) failed: {:?}",
+                        os_error
+                    );
+                    Err(GuestMemoryError::IOError(os_error))
+                } else {
+                    Ok(())
+                }
+            }
+            // Anonymous memory: MADV_DONTNEED releases pages back to the kernel.
             _ => {
-                // Madvise the region in order to mark it as not used.
                 // SAFETY: The address and length are known to be valid.
                 let ret = unsafe { libc::madvise(phys_address.cast(), len, libc::MADV_DONTNEED) };
                 if ret < 0 {

diff --git a/tests/framework/utils.py b/tests/framework/utils.py
@@ -28,6 +28,8 @@
     wait_fixed,
 )
 
+from framework.guest_stats import MeminfoGuest
+
 FLUSH_CMD = 'screen -S {session} -X colon "logfile flush 0^M"'
 CommandReturn = namedtuple("CommandReturn", "returncode stdout stderr")
 CMDLOG = logging.getLogger("commands")
@@ -131,9 +133,14 @@ def track_cpu_utilization(
     return cpu_utilization
 
 
-def get_resident_memory(process: psutil.Process):
+def get_resident_memory(uvm):
     """Returns current memory utilization in KiB, including used HugeTLBFS"""
 
+    if uvm is not None and uvm.secret_free:
+        stats = MeminfoGuest(uvm).get()
+        return stats.mem_total.kib() - stats.mem_available.kib()
+
+    process: psutil.Process = uvm.ps
     proc_status = Path("/proc", str(process.pid), "status").read_text("utf-8")
     for line in proc_status.splitlines():
         if line.startswith("HugetlbPages:"):  # entry is in KiB
@@ -257,18 +264,23 @@ def search_output_from_cmd(cmd: str, find_regex: typing.Pattern) -> typing.Match
 
 def get_stable_rss_mem(uvm, percentage_delta=1):
     """
-    Get the RSS memory that a guest uses, given the pid of the guest.
+    Get a stable memory usage reading for the VM.
+
+    For regular memory: returns host RSS of the FC process (KiB).
+    For secret_free (guest_memfd): returns guest-side memory usage
+    (total - available) since host RSS doesn't track page cache pages
+    freed by fallocate(PUNCH_HOLE).
 
-    Wait till the fluctuations in RSS drop below percentage_delta.
+    Wait till the fluctuations drop below percentage_delta.
     Or print a warning if this does not happen.
     """
 
     first_rss = 0
     second_rss = 0
     for _ in range(5):
-        first_rss = get_resident_memory(uvm.ps)
+        first_rss = get_resident_memory(uvm)
         time.sleep(1)
-        second_rss = get_resident_memory(uvm.ps)
+        second_rss = get_resident_memory(uvm)
         abs_diff = abs(first_rss - second_rss)
         abs_delta = abs_diff / first_rss * 100
         print(

diff --git a/tests/integration_tests/functional/test_balloon.py b/tests/integration_tests/functional/test_balloon.py
@@ -93,13 +93,13 @@ def _test_rss_memory_lower(test_microvm):
 
 
 # pylint: disable=C0103
-def test_rss_memory_lower(uvm_plain_any):
+def test_rss_memory_lower(uvm_plain_any, secret_free):
     """
     Test that inflating the balloon makes guest use less rss memory.
     """
     test_microvm = uvm_plain_any
     test_microvm.spawn()
-    test_microvm.basic_config()
+    test_microvm.basic_config(secret_free=secret_free)
     test_microvm.add_net_iface()
 
     # Add a memory balloon.
@@ -114,13 +114,13 @@ def test_rss_memory_lower(uvm_plain_any):
 
 
 # pylint: disable=C0103
-def test_inflate_reduces_free(uvm_plain_any):
+def test_inflate_reduces_free(uvm_plain_any, secret_free):
     """
     Check that the output of free in guest changes with inflate.
     """
     test_microvm = uvm_plain_any
     test_microvm.spawn()
-    test_microvm.basic_config()
+    test_microvm.basic_config(secret_free=secret_free)
     test_microvm.add_net_iface()
 
     # Install deflated balloon.
@@ -150,7 +150,7 @@ def test_inflate_reduces_free(uvm_plain_any):
 
 # pylint: disable=C0103
 @pytest.mark.parametrize("deflate_on_oom", [True, False])
-def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):
+def test_deflate_on_oom(uvm_plain_any, secret_free, deflate_on_oom):
     """
     Verify that setting the `deflate_on_oom` option works correctly.
 
@@ -167,7 +167,7 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):
 
     test_microvm = uvm_plain_any
     test_microvm.spawn()
-    test_microvm.basic_config()
+    test_microvm.basic_config(secret_free=secret_free)
     test_microvm.add_net_iface()
 
     # Add a deflated memory balloon.
@@ -215,13 +215,13 @@ def test_deflate_on_oom(uvm_plain_any, deflate_on_oom):
 
 
 # pylint: disable=C0103
-def test_reinflate_balloon(uvm_plain_any):
+def test_reinflate_balloon(uvm_plain_any, secret_free):
     """
     Verify that repeatedly inflating and deflating the balloon works.
     """
     test_microvm = uvm_plain_any
     test_microvm.spawn()
-    test_microvm.basic_config()
+    test_microvm.basic_config(secret_free=secret_free)
     test_microvm.add_net_iface()
 
     # Add a deflated memory balloon.
@@ -280,13 +280,13 @@ def test_reinflate_balloon(uvm_plain_any):
 
 
 # pylint: disable=C0103
-def test_stats(uvm_plain_any):
+def test_stats(uvm_plain_any, secret_free):
     """
     Verify that balloon stats work as expected.
     """
     test_microvm = uvm_plain_any
     test_microvm.spawn()
-    test_microvm.basic_config()
+    test_microvm.basic_config(secret_free=secret_free)
     test_microvm.add_net_iface()
 
     # Add a memory balloon with stats enabled.
@@ -351,13 +351,13 @@ def test_stats(uvm_plain_any):
     check_guest_dmesg_for_stalls(test_microvm.ssh)
 
 
-def test_stats_update(uvm_plain_any):
+def test_stats_update(uvm_plain_any, secret_free):
     """
     Verify that balloon stats update correctly.
     """
     test_microvm = uvm_plain_any
     test_microvm.spawn()
-    test_microvm.basic_config()
+    test_microvm.basic_config(secret_free=secret_free)
     test_microvm.add_net_iface()
 
     # Add a memory balloon with stats enabled.
@@ -403,7 +403,7 @@ def test_stats_update(uvm_plain_any):
     check_guest_dmesg_for_stalls(test_microvm.ssh)
 
 
-def test_balloon_snapshot(uvm_plain_any, microvm_factory):
+def test_balloon_snapshot(uvm_plain_any, secret_free, microvm_factory):
     """
     Test that the balloon works after pause/resume.
     """
@@ -415,6 +415,7 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
     vm.basic_config(
         vcpu_count=2,
         mem_size_mib=256,
+        secret_free=secret_free,
     )
     vm.add_net_iface()
 
@@ -437,15 +438,14 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
     # Now inflate the balloon with 20MB of pages.
     vm.api.balloon.patch(amount_mib=20)
 
-    # Check memory usage again.
+    # Check memory usage again — should decrease (balloon reclaimed pages).
     second_reading = get_stable_rss_mem(vm)
-
-    # There should be a reduction in RSS, but it's inconsistent.
-    # We only test that the reduction happens.
     assert first_reading > second_reading
 
     snapshot = vm.snapshot_full()
-    microvm = microvm_factory.build_from_snapshot(snapshot)
+    # secret_free requires UFFD backend (file backend can't mmap as guest_memfd)
+    uffd = "on_demand" if secret_free else None
+    microvm = microvm_factory.build_from_snapshot(snapshot, uffd_handler_name=uffd)
 
     # Free page reporting and hinting fragment guest memory VMAs
     # making it harder to identify them in the memory monitor.
@@ -461,18 +461,15 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
     # Dirty 60MB of pages.
     make_guest_dirty_memory(microvm.ssh, amount_mib=60)
 
-    # Check memory usage.
+    # Check memory usage — should increase (guest used more memory).
     fourth_reading = get_stable_rss_mem(microvm)
-
     assert fourth_reading > third_reading
 
     # Inflate the balloon with another 20MB of pages.
     microvm.api.balloon.patch(amount_mib=40)
 
+    # Should decrease again (balloon reclaimed pages).
     fifth_reading = get_stable_rss_mem(microvm)
-
-    # There should be a reduction in RSS, but it's inconsistent.
-    # We only test that the reduction happens.
     assert fourth_reading > fifth_reading
 
     # Get the stats after we take a snapshot and dirty some memory,
@@ -488,7 +485,9 @@ def test_balloon_snapshot(uvm_plain_any, microvm_factory):
 
 
 @pytest.mark.parametrize("method", ["reporting", "hinting"])
-def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
+def test_hinting_reporting_snapshot(
+    uvm_plain_any, secret_free, microvm_factory, method
+):
     """
     Test that the balloon hinting and reporting works after pause/resume.
     """
@@ -500,6 +499,7 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
     vm.basic_config(
         vcpu_count=2,
         mem_size_mib=256,
+        secret_free=secret_free,
     )
     vm.add_net_iface()
 
@@ -534,15 +534,14 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
     if free_page_hinting:
         vm.api.balloon_hinting_start.patch()
 
-    # Check memory usage again.
+    # Check memory usage again — should decrease (pages freed + hinted/reported).
     second_reading = get_stable_rss_mem(vm)
-
-    # There should be a reduction in RSS, but it's inconsistent.
-    # We only test that the reduction happens.
     assert first_reading > second_reading
 
     snapshot = vm.snapshot_full()
-    microvm = microvm_factory.build_from_snapshot(snapshot)
+    # secret_free requires UFFD backend (file backend can't mmap as guest_memfd)
+    uffd = "on_demand" if secret_free else None
+    microvm = microvm_factory.build_from_snapshot(snapshot, uffd_handler_name=uffd)
 
     # Free page reporting and hinting fragment guest memory VMAs
     # making it harder to identify them in the memory monitor.
@@ -565,23 +564,20 @@ def test_hinting_reporting_snapshot(uvm_plain_any, microvm_factory, method):
     if free_page_hinting:
         microvm.api.balloon_hinting_start.patch()
 
-    # Check memory usage again.
+    # Check memory usage again — should decrease.
     fourth_reading = get_stable_rss_mem(microvm)
-
-    # There should be a reduction in RSS, but it's inconsistent.
-    # We only test that the reduction happens.
     assert third_reading > fourth_reading
     check_guest_dmesg_for_stalls(microvm.ssh)
 
 
 @pytest.mark.parametrize("method", ["traditional", "hinting", "reporting"])
-def test_memory_scrub(uvm_plain_any, method):
+def test_memory_scrub(uvm_plain_any, secret_free, method):
     """
     Test that the memory is zeroed after deflate.
     """
     microvm = uvm_plain_any
     microvm.spawn()
-    microvm.basic_config(vcpu_count=2, mem_size_mib=256)
+    microvm.basic_config(vcpu_count=2, mem_size_mib=256, secret_free=secret_free)
     microvm.add_net_iface()
 
     free_page_reporting = method == "reporting"

diff --git a/tests/integration_tests/performance/test_hotplug_memory.py b/tests/integration_tests/performance/test_hotplug_memory.py
@@ -278,19 +278,21 @@ def check_hotplug(uvm, requested_size_mib):
 def check_hotunplug(uvm, requested_size_mib):
     """Verifies memory can be hotunplugged and gets released"""
 
-    rss_before = get_resident_memory(uvm.ps)
+    rss_before = get_resident_memory(uvm)
 
     check_hotplug(uvm, requested_size_mib)
 
-    rss_after = get_resident_memory(uvm.ps)
+    rss_after = get_resident_memory(uvm)
 
     print(f"RSS before: {rss_before}, after: {rss_after}")
 
     machine_config = uvm.api.machine_config.get().json()
     huge_pages = HugePagesConfig(machine_config["huge_pages"])
     secret_free = machine_config.get("secret_free", False)
-    if not secret_free and (
-        huge_pages == HugePagesConfig.NONE or supports_hugetlbfs_discard()
+    if (
+        secret_free
+        or huge_pages == HugePagesConfig.NONE
+        or supports_hugetlbfs_discard()
     ):
         assert rss_after < rss_before, "RSS didn't decrease"