Skip to content

Commit 381ceda

Browse files
LeoBrasmpe
authored andcommitted
powerpc/pseries/iommu: Make use of DDW for indirect mapping
So far it's assumed possible to map the guest RAM 1:1 to the bus, which works with a small number of devices. SRIOV changes it as the user can configure hundreds VFs and since phyp preallocates TCEs and does not allow IOMMU pages bigger than 64K, it has to limit the number of TCEs per a PE to limit waste of physical pages. As of today, if the assumed direct mapping is not possible, DDW creation is skipped and the default DMA window "ibm,dma-window" is used instead. By using DDW, indirect mapping can get more TCEs than available for the default DMA window, and also get access to using much larger pagesizes (16MB as implemented in qemu vs 4k from default DMA window), causing a significant increase on the maximum amount of memory that can be IOMMU mapped at the same time. Indirect mapping will only be used if direct mapping is not a possibility. For indirect mapping, it's necessary to re-create the iommu_table with the new DMA window parameters, so iommu_alloc() can use it. Removing the default DMA window for using DDW with indirect mapping is only allowed if there is no current IOMMU memory allocated in the iommu_table. enable_ddw() is aborted otherwise. Even though there won't be both direct and indirect mappings at the same time, we can't reuse the DIRECT64_PROPNAME property name, or else an older kexec()ed kernel can assume direct mapping, and skip iommu_alloc(), causing undesirable behavior. So a new property name DMA64_PROPNAME "linux,dma64-ddr-window-info" was created to represent a DDW that does not allow direct mapping. Signed-off-by: Leonardo Bras <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 8599395 commit 381ceda

File tree

1 file changed

+74
-15
lines changed
  • arch/powerpc/platforms/pseries

1 file changed

+74
-15
lines changed

arch/powerpc/platforms/pseries/iommu.c

Lines changed: 74 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ static DEFINE_SPINLOCK(direct_window_list_lock);
375375
/* protects initializing window twice for same device */
376376
static DEFINE_MUTEX(direct_window_init_mutex);
377377
#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info"
378+
#define DMA64_PROPNAME "linux,dma64-ddr-window-info"
378379

379380
static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn,
380381
unsigned long num_pfn, const void *arg)
@@ -940,6 +941,7 @@ static int find_existing_ddw_windows(void)
940941
return 0;
941942

942943
find_existing_ddw_windows_named(DIRECT64_PROPNAME);
944+
find_existing_ddw_windows_named(DMA64_PROPNAME);
943945

944946
return 0;
945947
}
@@ -1226,14 +1228,17 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
12261228
struct ddw_create_response create;
12271229
int page_shift;
12281230
u64 win_addr;
1231+
const char *win_name;
12291232
struct device_node *dn;
12301233
u32 ddw_avail[DDW_APPLICABLE_SIZE];
12311234
struct direct_window *window;
12321235
struct property *win64;
12331236
bool ddw_enabled = false;
12341237
struct failed_ddw_pdn *fpdn;
1235-
bool default_win_removed = false;
1238+
bool default_win_removed = false, direct_mapping = false;
12361239
bool pmem_present;
1240+
struct pci_dn *pci = PCI_DN(pdn);
1241+
struct iommu_table *tbl = pci->table_group->tables[0];
12371242

12381243
dn = of_find_node_by_type(NULL, "ibm,pmemory");
12391244
pmem_present = dn != NULL;
@@ -1242,6 +1247,7 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
12421247
mutex_lock(&direct_window_init_mutex);
12431248

12441249
if (find_existing_ddw(pdn, &dev->dev.archdata.dma_offset, &len)) {
1250+
direct_mapping = (len >= max_ram_len);
12451251
ddw_enabled = true;
12461252
goto out_unlock;
12471253
}
@@ -1322,8 +1328,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
13221328
query.page_size);
13231329
goto out_failed;
13241330
}
1325-
/* verify the window * number of ptes will map the partition */
1326-
/* check largest block * page size > max memory hotplug addr */
1331+
1332+
13271333
/*
13281334
* The "ibm,pmemory" can appear anywhere in the address space.
13291335
* Assuming it is still backed by page structs, try MAX_PHYSMEM_BITS
@@ -1339,13 +1345,25 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
13391345
dev_info(&dev->dev, "Skipping ibm,pmemory");
13401346
}
13411347

1348+
/* check if the available block * number of ptes will map everything */
13421349
if (query.largest_available_block < (1ULL << (len - page_shift))) {
13431350
dev_dbg(&dev->dev,
13441351
"can't map partition max 0x%llx with %llu %llu-sized pages\n",
13451352
1ULL << len,
13461353
query.largest_available_block,
13471354
1ULL << page_shift);
1348-
goto out_failed;
1355+
1356+
/* DDW + IOMMU on single window may fail if there is any allocation */
1357+
if (default_win_removed && iommu_table_in_use(tbl)) {
1358+
dev_dbg(&dev->dev, "current IOMMU table in use, can't be replaced.\n");
1359+
goto out_failed;
1360+
}
1361+
1362+
len = order_base_2(query.largest_available_block << page_shift);
1363+
win_name = DMA64_PROPNAME;
1364+
} else {
1365+
direct_mapping = true;
1366+
win_name = DIRECT64_PROPNAME;
13491367
}
13501368

13511369
ret = create_ddw(dev, ddw_avail, &create, page_shift, len);
@@ -1356,8 +1374,8 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
13561374
create.liobn, dn);
13571375

13581376
win_addr = ((u64)create.addr_hi << 32) | create.addr_lo;
1359-
win64 = ddw_property_create(DIRECT64_PROPNAME, create.liobn, win_addr,
1360-
page_shift, len);
1377+
win64 = ddw_property_create(win_name, create.liobn, win_addr, page_shift, len);
1378+
13611379
if (!win64) {
13621380
dev_info(&dev->dev,
13631381
"couldn't allocate property, property name, or value\n");
@@ -1375,15 +1393,54 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
13751393
if (!window)
13761394
goto out_del_prop;
13771395

1378-
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
1379-
win64->value, tce_setrange_multi_pSeriesLP_walk);
1380-
if (ret) {
1381-
dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
1382-
dn, ret);
1396+
if (direct_mapping) {
1397+
/* DDW maps the whole partition, so enable direct DMA mapping */
1398+
ret = walk_system_ram_range(0, memblock_end_of_DRAM() >> PAGE_SHIFT,
1399+
win64->value, tce_setrange_multi_pSeriesLP_walk);
1400+
if (ret) {
1401+
dev_info(&dev->dev, "failed to map direct window for %pOF: %d\n",
1402+
dn, ret);
13831403

13841404
/* Make sure to clean DDW if any TCE was set*/
13851405
clean_dma_window(pdn, win64->value);
1386-
goto out_del_list;
1406+
goto out_del_list;
1407+
}
1408+
} else {
1409+
struct iommu_table *newtbl;
1410+
int i;
1411+
1412+
for (i = 0; i < ARRAY_SIZE(pci->phb->mem_resources); i++) {
1413+
const unsigned long mask = IORESOURCE_MEM_64 | IORESOURCE_MEM;
1414+
1415+
/* Look for MMIO32 */
1416+
if ((pci->phb->mem_resources[i].flags & mask) == IORESOURCE_MEM)
1417+
break;
1418+
}
1419+
1420+
if (i == ARRAY_SIZE(pci->phb->mem_resources))
1421+
goto out_del_list;
1422+
1423+
/* New table for using DDW instead of the default DMA window */
1424+
newtbl = iommu_pseries_alloc_table(pci->phb->node);
1425+
if (!newtbl) {
1426+
dev_dbg(&dev->dev, "couldn't create new IOMMU table\n");
1427+
goto out_del_list;
1428+
}
1429+
1430+
iommu_table_setparms_common(newtbl, pci->phb->bus->number, create.liobn, win_addr,
1431+
1UL << len, page_shift, NULL, &iommu_table_lpar_multi_ops);
1432+
iommu_init_table(newtbl, pci->phb->node, pci->phb->mem_resources[i].start,
1433+
pci->phb->mem_resources[i].end);
1434+
1435+
pci->table_group->tables[1] = newtbl;
1436+
1437+
/* Keep default DMA window stuct if removed */
1438+
if (default_win_removed) {
1439+
tbl->it_size = 0;
1440+
kfree(tbl->it_map);
1441+
}
1442+
1443+
set_iommu_table_base(&dev->dev, newtbl);
13871444
}
13881445

13891446
spin_lock(&direct_window_list_lock);
@@ -1427,10 +1484,10 @@ static bool enable_ddw(struct pci_dev *dev, struct device_node *pdn)
14271484
* as RAM, then we failed to create a window to cover persistent
14281485
* memory and need to set the DMA limit.
14291486
*/
1430-
if (pmem_present && ddw_enabled && (len == max_ram_len))
1487+
if (pmem_present && ddw_enabled && direct_mapping && len == max_ram_len)
14311488
dev->dev.bus_dma_limit = dev->dev.archdata.dma_offset + (1ULL << len);
14321489

1433-
return ddw_enabled;
1490+
return ddw_enabled && direct_mapping;
14341491
}
14351492

14361493
static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev)
@@ -1572,7 +1629,9 @@ static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long acti
15721629
* we have to remove the property when releasing
15731630
* the device node.
15741631
*/
1575-
remove_ddw(np, false, DIRECT64_PROPNAME);
1632+
if (remove_ddw(np, false, DIRECT64_PROPNAME))
1633+
remove_ddw(np, false, DMA64_PROPNAME);
1634+
15761635
if (pci && pci->table_group)
15771636
iommu_pseries_free_group(pci->table_group,
15781637
np->full_name);

0 commit comments

Comments
 (0)