2025年3月29日 星期六

2025-03-29 Debian 12 Promox 8.3.5 IOMMU

 


# reference

https://cloud.tencent.com/developer/article/2414840


# before

root@box02:~# dmesg | grep -E "DMAR|IOMMU"

echo nothing

# after reboot: IOMMU

root@box02:~# dmesg | grep -E "DMAR|IOMMU"

[    0.000000] Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA

[    0.004754] ACPI: DMAR 0x0000000072312000 000088 (v01 INTEL  EDK2     00000002      01000013)

[    0.004783] ACPI: Reserving DMAR table memory at [mem 0x72312000-0x72312087]

[    0.068253] DMAR: IOMMU enabled

[    0.160656] DMAR: Host address width 39

[    0.160656] DMAR: DRHD base: 0x000000fed90000 flags: 0x0

[    0.160661] DMAR: dmar0: reg_base_addr fed90000 ver 4:0 cap 1c0000c40660462 ecap 29a00f0505e

[    0.160662] DMAR: DRHD base: 0x000000fed91000 flags: 0x1

[    0.160665] DMAR: dmar1: reg_base_addr fed91000 ver 5:0 cap d2008c40660462 ecap f050da

[    0.160666] DMAR: RMRR base: 0x0000007c000000 end: 0x000000807fffff

[    0.160668] DMAR-IR: IOAPIC id 2 under DRHD base  0xfed91000 IOMMU 1

[    0.160669] DMAR-IR: HPET id 0 under DRHD base 0xfed91000

[    0.160669] DMAR-IR: Queued invalidation will be enabled to support x2apic and Intr-remapping.

[    0.162202] DMAR-IR: Enabled IRQ remapping in x2apic mode

[    0.358227] pci 0000:00:02.0: DMAR: Skip IOMMU disabling for graphics

[    0.417403] DMAR: No ATSR found

[    0.417404] DMAR: No SATC found

[    0.417405] DMAR: IOMMU feature fl1gp_support inconsistent

[    0.417406] DMAR: IOMMU feature pgsel_inv inconsistent

[    0.417406] DMAR: IOMMU feature nwfs inconsistent

[    0.417407] DMAR: IOMMU feature dit inconsistent

[    0.417408] DMAR: IOMMU feature sc_support inconsistent

[    0.417408] DMAR: IOMMU feature dev_iotlb_support inconsistent

[    0.417409] DMAR: dmar0: Using Queued invalidation

[    0.417411] DMAR: dmar1: Using Queued invalidation

[    0.418166] DMAR: Intel(R) Virtualization Technology for Directed I/O


# verify vfio

box02:~# dmesg | grep -i vfio

[    1.783340] VFIO - User Level meta-driver version: 0.3

[    7.532731] vfio-pci 0000:01:00.0: vgaarb: VGA decodes changed: olddecodes=io+mem,decodes=io+mem:owns=none

[    7.533818] vfio-pci 0000:01:00.0: vgaarb: VGA decodes changed: olddecodes=io+mem,decodes=io+mem:owns=none

[    7.534253] vfio-pci 0000:01:00.0: vgaarb: VGA decodes changed: olddecodes=io+mem,decodes=io+mem:owns=none


# verify interrupt mapping

box02:~# dmesg | grep 'remapping'

[    0.160669] DMAR-IR: Queued invalidation will be enabled to support x2apic and Intr-remapping.

[    0.162202] DMAR-IR: Enabled IRQ remapping in x2apic mode


# grub

# update-initramfs -k all -u

update-initramfs: Generating /boot/initrd.img-6.8.12-8-pve

Running hook script 'zz-proxmox-boot'..

Re-executing '/etc/kernel/postinst.d/zz-proxmox-boot' in new private mount namespace..

No /etc/kernel/proxmox-boot-uuids found, skipping ESP sync.

Removable bootloader found at '/boot/efi/EFI/BOOT/BOOTX64.efi', but GRUB packages not set up to update it!

Run the following command:

echo 'grub-efi-amd64 grub2/force_efi_extra_removable boolean true' | debconf-set-selections -v -u

Then reinstall GRUB with 'apt install --reinstall grub-efi-amd64'


# before

$ sudo lspci

00:02.0 VGA compatible controller: Device 1234:1111 (rev 02)

# after

$ sudo lspci | grep VGA

00:02.0 VGA compatible controller: Device 1234:1111 (rev 02)

00:10.0 VGA compatible controller: NVIDIA Corporation TU106 [GeForce GTX 1650] (rev a1)


# driver

sudo apt-get remove --purge '^nvidia-.*'

sudo apt install nvidia-driver-550 nvidia-dkms-550

sudo reboot

nvidia-smi

$ nvidia-smi

Sat Mar 29 17:30:35 2025       

+-----------------------------------------------------------------------------------------+

| NVIDIA-SMI 550.120                Driver Version: 550.120        CUDA Version: 12.4     |

|-----------------------------------------+------------------------+----------------------+

| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |

| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |

|                                         |                        |               MIG M. |

|=========================================+========================+======================|

|   0  NVIDIA GeForce GTX 1650        Off |   00000000:00:10.0 Off |                  N/A |

| 26%   37C    P8              9W /  100W |       6MiB /   4096MiB |      0%      Default |

|                                         |                        |                  N/A |

+-----------------------------------------+------------------------+----------------------+

                                                                                         

+-----------------------------------------------------------------------------------------+

| Processes:                                                                              |

|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |

|        ID   ID                                                               Usage      |

|=========================================================================================|

|    0   N/A  N/A      1638      G   /usr/lib/xorg/Xorg                              4MiB |

+-----------------------------------------------------------------------------------------+


# more
:~$ nvidia-smi --query-gpu=driver_version --format=csv
driver_version
550.120

# desktop manager
sudo nvidia-settings

# also
nano /etc/modprobe.d/pve-blacklist.conf
blacklist radeon
blacklist nouveau
blacklist nvidia

# something
nano /etc/modprobe.d/pve-blacklist.conf
blacklist radeon
blacklist nouveau
blacklist nvidia

touch /etc/modprobe.d/vfio.conf
nano /etc/modprobe.d/vfio.conf
options vfio-pci ids=10de:1f0a,10de:10f9

update-initramfs -u

lsmod | grep vfio
# lsmod | grep vfio
vfio_pci               16384  2
vfio_pci_core          86016  1 vfio_pci
irqbypass              12288  3 vfio_pci_core,kvm
vfio_iommu_type1       49152  1
vfio                   65536  9 vfio_pci_core,vfio_iommu_type1,vfio_pci
iommufd                94208  1 vfio


# also 2
https://www.mereith.com/post/116#%E8%99%9A%E6%8B%9F%E6%9C%BA%E8%AE%BE%E7%BD%AE

# also 3
https://upsangel.com/htpc-nas/iommu-vfio-gpo-proxmox-pve-pcie%E7%9B%B4%E9%80%9A%E5%92%8C%E6%A0%B8%E9%A1%AFhdmi%E7%9B%B4%E9%80%9A%E8%A8%AD%E7%BD%AE%E5%8F%83%E6%95%B8%E8%A7%A3%E9%87%8B/
vfio.conf 没有 disable_vga=1,有的删掉!

# also 4
https://blog.csdn.net/qq_34419607/article/details/139872906
vga=none! then win10 ok





# revisit

# 01 file
vi /etc/default/grub
# original
# GRUB_CMDLINE_LINUX_DEFAULT="quiet"
# original to updated
# GRUB_CMDLINE_LINUX_DEFAULT="quiet intel_iommu=on iommu=pt initcall_blacklist=sysfb_init pcie_acs_override=downstream video=efifb:off,vesa
fb:off vfio-pci.ids=10de:1f0a,10de:10f9"
GRUB_CMDLINE_LINUX_DEFAULT="quiet intel_iommu=on iommu=pt initcall_blacklist=sysfb_init pcie_acs_override=downstream"

# 02 file
cat /etc/modprobe.d/blacklist.conf 
# amd
blacklist radeon
blacklist amdgpu
# nvidia
blacklist nouveau
blacklist nvidia
blacklist nvidiafb
blacklist nvidia_drm
# options
options vfio_iommu_type1 allow_unsafe_interrupts=1

# 03 file
# cat /etc/modprobe.d/iommu_unsafe_interrupts.conf 
options vfio_iommu_type1 allow_unsafe_interrupts=1

# 04 execute
update-initramfs -u -k all

# 05 virtual machine
32GB
16 core
BIOS UEFI
Display none
Machine i440fx
scsi2 /dev/disk/by-id/nvme-Samsung_SSD_990
hostpci0 01:00
hostpci1 00:14

沒有留言:

張貼留言

2007 to 2023 HP and Dell Servers Comparison

  HP Gen5 to Gen11  using ChatGPT HP ProLiant Gen Active Years CPU Socket Popular HP CPUs Cores Base Clock Max RAM Capacity Comparable Dell ...