I looked at what be the clean fix for the MTT SEG handling in mthca,
and I came up with the following (applies on top of the series I posted
earlier). I think this gives us an important optimization.
Roland, could you please give me a hint whether something
like this is too big a change to get into 2.6.20?


Arbel does not actually have a concept of MTT segment.
So we should set MTT segment size to 64 bit (1 entry) for memfree,
otherwise we might be wasting as much as 87% of MTT entries.

Signed-off-by: Michael S. Tsirkin <[EMAIL PROTECTED]>

---

diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c 
b/drivers/infiniband/hw/mthca/mthca_cmd.c
index 7131446..968d151 100644
--- a/drivers/infiniband/hw/mthca/mthca_cmd.c
+++ b/drivers/infiniband/hw/mthca/mthca_cmd.c
@@ -1051,11 +1051,7 @@ int mthca_QUERY_DEV_LIM(struct mthca_dev *dev,
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_EQ_OFFSET);
        dev_lim->max_eqs = 1 << (field & 0x7);
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MTT_OFFSET);
-       if (mthca_is_memfree(dev))
-               dev_lim->reserved_mtts = ALIGN((1 << (field >> 4)) * 
sizeof(u64),
-                                              MTHCA_MTT_SEG_SIZE) / 
MTHCA_MTT_SEG_SIZE;
-       else
-               dev_lim->reserved_mtts = 1 << (field >> 4);
+       dev_lim->reserved_mtts = 1 << (field >> 4);
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_MAX_MRW_SZ_OFFSET);
        dev_lim->max_mrw_sz = 1 << field;
        MTHCA_GET(field, outbox, QUERY_DEV_LIM_RSVD_MRW_OFFSET);
diff --git a/drivers/infiniband/hw/mthca/mthca_dev.h 
b/drivers/infiniband/hw/mthca/mthca_dev.h
index b7e42ef..0973359 100644
--- a/drivers/infiniband/hw/mthca/mthca_dev.h
+++ b/drivers/infiniband/hw/mthca/mthca_dev.h
@@ -78,16 +78,17 @@ enum {
 };
 
 enum {
-       MTHCA_EQ_CONTEXT_SIZE =  0x40,
-       MTHCA_CQ_CONTEXT_SIZE =  0x40,
-       MTHCA_QP_CONTEXT_SIZE = 0x200,
-       MTHCA_RDB_ENTRY_SIZE  =  0x20,
-       MTHCA_AV_SIZE         =  0x20,
-       MTHCA_MGM_ENTRY_SIZE  =  0x40,
+       MTHCA_EQ_CONTEXT_SIZE    =  0x40,
+       MTHCA_CQ_CONTEXT_SIZE    =  0x40,
+       MTHCA_QP_CONTEXT_SIZE    = 0x200,
+       MTHCA_RDB_ENTRY_SIZE     =  0x20,
+       MTHCA_AV_SIZE            =  0x20,
+       MTHCA_MGM_ENTRY_SIZE     =  0x40,
+
+       MTHCA_TAVOR_MTT_SEG_SIZE =  0x40,
 
        /* Arbel FW gives us these, but we need them for Tavor */
        MTHCA_MPT_ENTRY_SIZE  =  0x40,
-       MTHCA_MTT_SEG_SIZE    =  0x40,
 
        MTHCA_QP_PER_MGM      = 4 * (MTHCA_MGM_ENTRY_SIZE / 16 - 2)
 };
@@ -595,4 +596,8 @@ static inline int mthca_is_memfree(struct mthca_dev *dev)
        return dev->mthca_flags & MTHCA_FLAG_MEMFREE;
 }
 
+static inline unsigned mthca_mtt_seg_size(struct mthca_dev *dev)
+{
+       return mthca_is_memfree(dev) ? sizeof(u64) : MTHCA_TAVOR_MTT_SEG_SIZE;
+}
 #endif /* MTHCA_DEV_H */
diff --git a/drivers/infiniband/hw/mthca/mthca_main.c 
b/drivers/infiniband/hw/mthca/mthca_main.c
index bbe9143..d9d5b89 100644
--- a/drivers/infiniband/hw/mthca/mthca_main.c
+++ b/drivers/infiniband/hw/mthca/mthca_main.c
@@ -465,11 +465,11 @@ static int mthca_init_icm(struct mthca_dev *mdev,
        }
 
        /* CPU writes to non-reserved MTTs, while HCA might DMA to reserved 
mtts */
-       mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * 
MTHCA_MTT_SEG_SIZE,
-                                          dma_get_cache_alignment()) / 
MTHCA_MTT_SEG_SIZE;
+       mdev->limits.reserved_mtts = ALIGN(mdev->limits.reserved_mtts * 
sizeof(u64),
+                                          dma_get_cache_alignment()) / 
sizeof(u64);
 
        mdev->mr_table.mtt_table = mthca_alloc_icm_table(mdev, 
init_hca->mtt_base,
-                                                        MTHCA_MTT_SEG_SIZE,
+                                                        sizeof(u64),
                                                         
mdev->limits.num_mtt_segs,
                                                         
mdev->limits.reserved_mtts,
                                                         1, 0);
diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c 
b/drivers/infiniband/hw/mthca/mthca_mr.c
index 88f9dc2..0357dbe 100644
--- a/drivers/infiniband/hw/mthca/mthca_mr.c
+++ b/drivers/infiniband/hw/mthca/mthca_mr.c
@@ -212,7 +212,7 @@ static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev 
*dev, int size,
 
        mtt->buddy = buddy;
        mtt->order = 0;
-       for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
+       for (i = mthca_mtt_seg_size(dev) / sizeof(u64); i < size; i <<= 1)
                ++mtt->order;
 
        mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
@@ -259,7 +259,7 @@ static int __mthca_write_mtt(struct mthca_dev *dev, struct 
mthca_mtt *mtt,
 
        while (list_len > 0) {
                mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
-                                          mtt->first_seg * MTHCA_MTT_SEG_SIZE +
+                                          mtt->first_seg * 
mthca_mtt_seg_size(dev) +
                                           start_index * 8);
                mtt_entry[1] = 0;
                for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
@@ -302,7 +302,7 @@ void mthca_tavor_write_mtt_seg(struct mthca_dev *dev, 
struct mthca_mtt *mtt,
        u32 mtt_seg;
        int i;
 
-       mtt_seg = mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+       mtt_seg = mtt->first_seg * MTHCA_TAVOR_MTT_SEG_SIZE;
                mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg + start_index 
* sizeof (u64);
        for (i = 0; i < list_len; ++i) {
                __be64 mtt_entry = cpu_to_be64(buffer_list[i] |
@@ -321,11 +321,9 @@ void mthca_arbel_write_mtt_seg(struct mthca_dev *dev, 
struct mthca_mtt *mtt,
 
        /* For Arbel, all MTTs must fit in the same page. */
        BUG_ON(s / PAGE_SIZE != (s + list_len * sizeof(u64) - 1) / PAGE_SIZE);
-       /* Require full segments */
-       BUG_ON(s % MTHCA_MTT_SEG_SIZE);
 
        mtts = mthca_table_find(dev->mr_table.mtt_table, mtt->first_seg +
-                               s / MTHCA_MTT_SEG_SIZE, &dma_handle);
+                               s / sizeof(u64), &dma_handle);
 
        BUG_ON(!mtts);
 
@@ -470,7 +468,7 @@ int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int 
buffer_size_shift,
        if (mr->mtt)
                mpt_entry->mtt_seg =
                        cpu_to_be64(dev->mr_table.mtt_base +
-                                   mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
+                                   mr->mtt->first_seg * 
mthca_mtt_seg_size(dev));
 
        if (0) {
                mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
@@ -615,7 +613,7 @@ int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
        if (IS_ERR(mr->mtt))
                goto err_out_table;
 
-       mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
+       mtt_seg = mr->mtt->first_seg * mthca_mtt_seg_size(dev);
 
        if (mthca_is_memfree(dev)) {
                mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
@@ -907,7 +905,7 @@ int mthca_init_mr_table(struct mthca_dev *dev)
                         dev->mr_table.mtt_base);
 
                dev->mr_table.tavor_fmr.mtt_base =
-                       ioremap(addr, mtts * MTHCA_MTT_SEG_SIZE);
+                       ioremap(addr, mtts * MTHCA_TAVOR_MTT_SEG_SIZE);
                if (!dev->mr_table.tavor_fmr.mtt_base) {
                        mthca_warn(dev, "MTT ioremap for FMR failed.\n");
                        err = -ENOMEM;
diff --git a/drivers/infiniband/hw/mthca/mthca_profile.c 
b/drivers/infiniband/hw/mthca/mthca_profile.c
index 26bf86d..7367150 100644
--- a/drivers/infiniband/hw/mthca/mthca_profile.c
+++ b/drivers/infiniband/hw/mthca/mthca_profile.c
@@ -96,7 +96,7 @@ u64 mthca_make_profile(struct mthca_dev *dev,
        profile[MTHCA_RES_RDB].size  = MTHCA_RDB_ENTRY_SIZE;
        profile[MTHCA_RES_MCG].size  = MTHCA_MGM_ENTRY_SIZE;
        profile[MTHCA_RES_MPT].size  = dev_lim->mpt_entry_sz;
-       profile[MTHCA_RES_MTT].size  = MTHCA_MTT_SEG_SIZE;
+       profile[MTHCA_RES_MTT].size  = mthca_mtt_seg_size(dev);
        profile[MTHCA_RES_UAR].size  = dev_lim->uar_scratch_entry_sz;
        profile[MTHCA_RES_UDAV].size = MTHCA_AV_SIZE;
        profile[MTHCA_RES_UARC].size = request->uarc_size;
@@ -234,7 +234,8 @@ u64 mthca_make_profile(struct mthca_dev *dev,
                        dev->limits.num_mtt_segs = profile[i].num;
                        dev->mr_table.mtt_base   = profile[i].start;
                        init_hca->mtt_base       = profile[i].start;
-                       init_hca->mtt_seg_sz     = ffs(MTHCA_MTT_SEG_SIZE) - 7;
+                       if (!mthca_is_memfree(dev))
+                               init_hca->mtt_seg_sz = 
ffs(MTHCA_TAVOR_MTT_SEG_SIZE) - 7;
                        break;
                case MTHCA_RES_UAR:
                        dev->limits.num_uars       = profile[i].num;

-- 
MST

_______________________________________________
openib-general mailing list
[email protected]
http://openib.org/mailman/listinfo/openib-general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to