Skip to content
Open
5 changes: 3 additions & 2 deletions src/vmm/src/device_manager/pci_mngr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,11 +93,12 @@ impl PciDevices {

debug!(
"Inserting MMIO BAR region: {:#x}:{:#x}",
virtio_device_locked.bar_address, CAPABILITY_BAR_SIZE
virtio_device_locked.config_bar_addr(),
CAPABILITY_BAR_SIZE
);
vm.common.mmio_bus.insert(
virtio_device.clone(),
virtio_device_locked.bar_address,
virtio_device_locked.config_bar_addr(),
CAPABILITY_BAR_SIZE,
)?;

Expand Down
126 changes: 79 additions & 47 deletions src/vmm/src/devices/virtio/transport/pci/device.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use vm_allocator::{AddressAllocator, AllocPolicy, RangeInclusive};
use vm_memory::{Address, ByteValued, GuestAddress, Le32};
use vmm_sys_util::errno;
use vmm_sys_util::eventfd::EventFd;
use zerocopy::IntoBytes;

use crate::Vm;
use crate::devices::virtio::device::{VirtioDevice, VirtioDeviceType};
Expand All @@ -32,7 +33,9 @@ use crate::devices::virtio::transport::pci::common_config::{
};
use crate::devices::virtio::transport::{VirtioInterrupt, VirtioInterruptType};
use crate::logger::{debug, error};
use crate::pci::configuration::{PciCapability, PciConfiguration, PciConfigurationState};
use crate::pci::configuration::{
BAR0_REG_IDX, Bars, NUM_BAR_REGS, PciCapability, PciConfiguration, PciConfigurationState,
};
use crate::pci::msix::{MsixCap, MsixConfig, MsixConfigState};
use crate::pci::{
BarReprogrammingParams, DeviceRelocationError, PciCapabilityId, PciClassCode, PciDevice,
Expand Down Expand Up @@ -220,8 +223,6 @@ const MSIX_PBA_BAR_OFFSET: u32 = 0x48000;
const MSIX_PBA_SIZE: u32 = 0x800;
/// The BAR size must be a power of 2.
pub const CAPABILITY_BAR_SIZE: u64 = 0x80000;
const VIRTIO_COMMON_BAR_INDEX: u8 = 0;
const VIRTIO_SHM_BAR_INDEX: usize = 2;

const NOTIFY_OFF_MULTIPLIER: u32 = 4; // A dword per notification address.

Expand All @@ -237,7 +238,8 @@ pub struct VirtioPciDeviceState {
pub pci_configuration_state: PciConfigurationState,
pub pci_dev_state: VirtioPciCommonConfigState,
pub msix_state: MsixConfigState,
pub bar_address: u64,
pub bars: Bars,
pub msix_config_cap_offset: u16,
}

#[derive(Debug, thiserror::Error, displaydoc::Display)]
Expand Down Expand Up @@ -281,8 +283,10 @@ pub struct VirtioPciDevice {
// a device.
cap_pci_cfg_info: VirtioPciCfgCapInfo,

// Allocated address for the BAR
pub bar_address: u64,
// BARs region for the device
pub bars: Bars,
pub msix_config_cap_offset: u16,
pub msix_config: Arc<Mutex<MsixConfig>>,
}

impl Debug for VirtioPciDevice {
Expand Down Expand Up @@ -322,7 +326,6 @@ impl VirtioPciDevice {
subclass,
VIRTIO_PCI_VENDOR_ID,
pci_device_id,
Some(msix_config.clone()),
)
}

Expand All @@ -345,16 +348,14 @@ impl VirtioPciDevice {
)
.unwrap()
.start();

self.configuration.add_pci_bar(
VIRTIO_COMMON_BAR_INDEX,
// Virtio BAR is 64bit BAR without prefetchable bit since it is an MMIO region.
self.bars.set_bar_64(
VIRTIO_BAR_INDEX,
virtio_pci_bar_addr,
CAPABILITY_BAR_SIZE,
false,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Perhaps an enum type Prefetchable / Non-prefetchable would be more descriptive and avoid the comment above?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would use enum if there were more than 2 options, but here it does not seem like it would change much.

);

// Once the BARs are allocated, the capabilities can be added to the PCI configuration.
self.add_pci_capabilities();
self.bar_address = virtio_pci_bar_addr;
}

/// Constructs a new PCI transport for the given virtio device.
Expand Down Expand Up @@ -400,7 +401,9 @@ impl VirtioPciDevice {
virtio_interrupt: Some(interrupt),
memory,
cap_pci_cfg_info: VirtioPciCfgCapInfo::default(),
bar_address: 0,
bars: Bars::default(),
msix_config,
msix_config_cap_offset: 0,
};

Ok(virtio_pci_device)
Expand All @@ -416,10 +419,7 @@ impl VirtioPciDevice {
let vectors = msix_config.vectors.clone();
let msix_config = Arc::new(Mutex::new(msix_config));

let pci_config = PciConfiguration::type0_from_state(
state.pci_configuration_state,
Some(msix_config.clone()),
);
let pci_config = PciConfiguration::type0_from_state(state.pci_configuration_state);
let virtio_common_config = VirtioPciCommonConfig::new(state.pci_dev_state);
let cap_pci_cfg_info = VirtioPciCfgCapInfo {
offset: state.cap_pci_cfg_offset,
Expand All @@ -444,7 +444,9 @@ impl VirtioPciDevice {
virtio_interrupt: Some(interrupt),
memory: vm.guest_memory().clone(),
cap_pci_cfg_info,
bar_address: state.bar_address,
bars: state.bars,
msix_config,
msix_config_cap_offset: state.msix_config_cap_offset,
};

if state.device_activated {
Expand Down Expand Up @@ -474,7 +476,7 @@ impl VirtioPciDevice {
}

pub fn config_bar_addr(&self) -> u64 {
self.configuration.get_bar_addr(VIRTIO_BAR_INDEX)
self.bars.get_bar_addr_64(VIRTIO_BAR_INDEX)
}

fn add_pci_capabilities(&mut self) {
Expand Down Expand Up @@ -528,7 +530,10 @@ impl VirtioPciDevice {
VIRTIO_BAR_INDEX,
MSIX_PBA_BAR_OFFSET,
);
self.configuration.add_capability(&msix_cap);
// The whole Configuration region is 4K, so u16 can address it all
#[allow(clippy::cast_possible_truncation)]
let offset = self.configuration.add_capability(&msix_cap) as u16;
self.msix_config_cap_offset = offset;
}
}

Expand Down Expand Up @@ -630,7 +635,8 @@ impl VirtioPciDevice {
.lock()
.expect("Poisoned lock")
.state(),
bar_address: self.bar_address,
bars: self.bars,
msix_config_cap_offset: self.msix_config_cap_offset,
}
}
}
Expand Down Expand Up @@ -734,15 +740,33 @@ impl PciDevice for VirtioPciDevice {
offset: u8,
data: &[u8],
) -> Option<Arc<Barrier>> {
// Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG
// is accessed. This capability has a special meaning as it allows the
// guest to access other capabilities without mapping the PCI BAR.
let base = reg_idx as usize * 4;
if base + offset as usize >= self.cap_pci_cfg_info.offset as usize
&& base + offset as usize + data.len()
<= self.cap_pci_cfg_info.offset as usize + self.cap_pci_cfg_info.cap.bytes().len()
{
let offset = base + offset as usize - self.cap_pci_cfg_info.offset as usize;
let in_bars = BAR0_REG_IDX <= reg_idx && reg_idx < BAR0_REG_IDX + u16::from(NUM_BAR_REGS);
let in_msix_config = reg_idx * 4 == self.msix_config_cap_offset;
let in_pci_cfg = {
let base = reg_idx * 4;
let cap_start = self.cap_pci_cfg_info.offset;
let cap_end =
self.cap_pci_cfg_info.offset as usize + self.cap_pci_cfg_info.cap.bytes().len();
let write_start = base + u16::from(offset);
let write_end = (base + u16::from(offset)) as usize + data.len();
cap_start < write_start && write_end <= cap_end
};
if in_bars {
// reg_idx is in [BAR0_REG_IDX, BAR0_REG_IDX+NUM_BAR_REGS), so the difference is 0..5.
#[allow(clippy::cast_possible_truncation)]
let bar_idx = (reg_idx - BAR0_REG_IDX) as u8;
self.bars.write(bar_idx, offset, data);
None
} else if in_msix_config {
self.msix_config
.lock()
.unwrap()
.write_msg_ctl_register(offset, data);
self.configuration
.write_config_register(reg_idx, offset, data);
None
} else if in_pci_cfg {
let offset = (reg_idx * 4 + u16::from(offset) - self.cap_pci_cfg_info.offset) as usize;
self.write_cap_pci_cfg(offset, data)
} else {
self.configuration
Expand All @@ -752,15 +776,29 @@ impl PciDevice for VirtioPciDevice {
}

fn read_config_register(&mut self, reg_idx: u16) -> u32 {
// Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG
// is accessed. This capability has a special meaning as it allows the
// guest to access other capabilities without mapping the PCI BAR.
let base = reg_idx as usize * 4;
if base >= self.cap_pci_cfg_info.offset as usize
&& base + 4
<= self.cap_pci_cfg_info.offset as usize + self.cap_pci_cfg_info.cap.bytes().len()
{
let offset = base - self.cap_pci_cfg_info.offset as usize;
let in_bars = BAR0_REG_IDX <= reg_idx && reg_idx < BAR0_REG_IDX + u16::from(NUM_BAR_REGS);
let in_pci_cfg = {
let base = reg_idx * 4;
let cap_start = self.cap_pci_cfg_info.offset;
let cap_end =
self.cap_pci_cfg_info.offset as usize + self.cap_pci_cfg_info.cap.bytes().len();
let write_start = base;
let write_end = (base + 4) as usize;
cap_start < write_start && write_end <= cap_end
};

if in_bars {
// reg_idx is in [BAR0_REG_IDX, BAR0_REG_IDX+NUM_BAR_REGS), so the difference is 0..5.
#[allow(clippy::cast_possible_truncation)]
let bar_idx = (reg_idx - BAR0_REG_IDX) as u8;
let mut value: u32 = 0;
self.bars.read(bar_idx, 0, value.as_mut_bytes());
value
} else if in_pci_cfg {
// Handle the special case where the capability VIRTIO_PCI_CAP_PCI_CFG
// is accessed. This capability has a special meaning as it allows the
// guest to access other capabilities without mapping the PCI BAR.
let offset = (reg_idx * 4 - self.cap_pci_cfg_info.offset) as usize;
let mut data = [0u8; 4];
let len = u32::from(self.cap_pci_cfg_info.cap.cap.length) as usize;
if len <= 4 {
Expand All @@ -779,16 +817,10 @@ impl PciDevice for VirtioPciDevice {
reg_idx: u16,
data: &[u8],
) -> Option<BarReprogrammingParams> {
self.configuration.detect_bar_reprogramming(reg_idx, data)
None
}

fn move_bar(&mut self, old_base: u64, new_base: u64) -> Result<(), DeviceRelocationError> {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We did not get a dead code warning?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And can this be merged with "pci: virtio: refactor: remove BAR handling from PciConfiguration"?

Copy link
Copy Markdown
Contributor Author

@ShadowCurse ShadowCurse Apr 2, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed this commit in general as move_bar is actually a trait function

// We only update our idea of the bar in order to support free_bars() above.
// The majority of the reallocation is done inside DeviceManager.
if self.bar_address == old_base {
self.bar_address = new_base;
}

Ok(())
}

Expand Down
71 changes: 4 additions & 67 deletions src/vmm/src/pci/bus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ impl PciRoot {
PciBridgeSubclass::HostBridge as u8,
0,
0,
None,
),
}
}
Expand Down Expand Up @@ -492,12 +491,6 @@ mod tests {
reloc_cnt: AtomicUsize,
}

impl RelocationMock {
fn cnt(&self) -> usize {
self.reloc_cnt.load(std::sync::atomic::Ordering::SeqCst)
}
}

impl DeviceRelocation for RelocationMock {
fn move_bar(
&self,
Expand All @@ -516,19 +509,15 @@ mod tests {

impl PciDevMock {
fn new() -> Self {
let mut config = PciConfiguration::new_type0(
let config = PciConfiguration::new_type0(
0x42,
0x0,
0x0,
PciClassCode::MassStorageController,
PciMassStorageSubclass::SerialScsiController as u8,
0x13,
0x12,
None,
);

config.add_pci_bar(0, 0x1000, 0x1000);

PciDevMock(config)
}
}
Expand All @@ -550,10 +539,10 @@ mod tests {

fn detect_bar_reprogramming(
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we remove this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not really about VFIO preparation. I can remove this in another PR if you want.

&mut self,
reg_idx: u16,
data: &[u8],
_reg_idx: u16,
_data: &[u8],
) -> Option<BarReprogrammingParams> {
self.0.detect_bar_reprogramming(reg_idx, data)
None
}
}

Expand Down Expand Up @@ -936,56 +925,4 @@ mod tests {
read_mmio_config(&mut mmio_config, 0, 0, 0, 15, 0, &mut buffer);
assert_eq!(buffer[0], 0x42);
}

#[test]
fn test_bar_reprogramming() {
let (mut mmio_config, _, mock) = initialize_bus();
let mut buffer = [0u8; 4];
assert_eq!(mock.cnt(), 0);

read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer);
let old_addr = u32::from_le_bytes(buffer) & 0xffff_fff0;
assert_eq!(old_addr, 0x1000);

// Writing the lower 32bits first should not trigger any reprogramming
write_mmio_config(
&mut mmio_config,
0,
1,
0,
0x4,
0,
&u32::to_le_bytes(0x1312_0000),
);

read_mmio_config(&mut mmio_config, 0, 1, 0, 0x4, 0, &mut buffer);
let new_addr = u32::from_le_bytes(buffer) & 0xffff_fff0;
assert_eq!(new_addr, 0x1312_0000);
assert_eq!(mock.cnt(), 0);

// Writing the upper 32bits first should now trigger the reprogramming logic
write_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &u32::to_le_bytes(0x1110));
read_mmio_config(&mut mmio_config, 0, 1, 0, 0x5, 0, &mut buffer);
let new_addr = u32::from_le_bytes(buffer);
assert_eq!(new_addr, 0x1110);
assert_eq!(mock.cnt(), 1);

// BAR2 should not be used, so reading its address should return all 0s
read_mmio_config(&mut mmio_config, 0, 1, 0, 0x6, 0, &mut buffer);
assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]);

// and reprogramming shouldn't have any effect
write_mmio_config(
&mut mmio_config,
0,
1,
0,
0x5,
0,
&u32::to_le_bytes(0x1312_1110),
);

read_mmio_config(&mut mmio_config, 0, 1, 0, 0x6, 0, &mut buffer);
assert_eq!(buffer, [0x0, 0x0, 0x0, 0x0]);
}
}
Loading
Loading