This is an automated email from the ASF dual-hosted git repository.
ruihangl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new b80d23f81c [Docs] Add tvm.s_tir.tensor_intrin API reference and remove
empty legacy tvm/tir directory (#19386)
b80d23f81c is described below
commit b80d23f81cc20d1cd3143c2044e3d7e4e5cda9dc
Author: Shushi Hong <[email protected]>
AuthorDate: Sat Apr 11 14:29:34 2026 -0400
[Docs] Add tvm.s_tir.tensor_intrin API reference and remove empty legacy
tvm/tir directory (#19386)
as per title
---
docs/reference/api/python/index.rst | 1 +
docs/reference/api/python/s_tir/tensor_intrin.rst | 73 +++++++++++++++++++++++
python/tvm/arith/bound.py | 6 +-
python/tvm/arith/int_set.py | 10 ++--
python/tvm/arith/iter_affine_map.py | 12 ++--
python/tvm/s_tir/tensor_intrin/arm_cpu.py | 61 ++++++++++---------
python/tvm/s_tir/tensor_intrin/riscv_cpu.py | 2 +
7 files changed, 123 insertions(+), 42 deletions(-)
diff --git a/docs/reference/api/python/index.rst
b/docs/reference/api/python/index.rst
index 21d2d8bb32..48ba883c12 100644
--- a/docs/reference/api/python/index.rst
+++ b/docs/reference/api/python/index.rst
@@ -76,6 +76,7 @@ Python API
s_tir/transform
s_tir/dlight
s_tir/backend
+ s_tir/tensor_intrin
.. toctree::
:maxdepth: 1
diff --git a/docs/reference/api/python/s_tir/tensor_intrin.rst
b/docs/reference/api/python/s_tir/tensor_intrin.rst
new file mode 100644
index 0000000000..dfdfed5645
--- /dev/null
+++ b/docs/reference/api/python/s_tir/tensor_intrin.rst
@@ -0,0 +1,73 @@
+.. Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+.. http://www.apache.org/licenses/LICENSE-2.0
+
+.. Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+tvm.s_tir.tensor_intrin
+-----------------------
+
+tvm.s_tir.tensor_intrin
+***********************
+.. automodule:: tvm.s_tir.tensor_intrin
+ :members:
+ :imported-members:
+
+tvm.s_tir.tensor_intrin.cuda
+****************************
+.. automodule:: tvm.s_tir.tensor_intrin.cuda
+ :members:
+ :noindex:
+
+tvm.s_tir.tensor_intrin.arm_cpu
+*******************************
+.. automodule:: tvm.s_tir.tensor_intrin.arm_cpu
+ :members:
+ :noindex:
+
+tvm.s_tir.tensor_intrin.x86
+****************************
+.. automodule:: tvm.s_tir.tensor_intrin.x86
+ :members:
+ :noindex:
+
+tvm.s_tir.tensor_intrin.rocm
+*****************************
+.. automodule:: tvm.s_tir.tensor_intrin.rocm
+ :members:
+ :noindex:
+
+tvm.s_tir.tensor_intrin.metal
+*****************************
+.. automodule:: tvm.s_tir.tensor_intrin.metal
+ :members:
+ :noindex:
+
+tvm.s_tir.tensor_intrin.hexagon
+*******************************
+.. automodule:: tvm.s_tir.tensor_intrin.hexagon
+ :members:
+ :noindex:
+
+tvm.s_tir.tensor_intrin.riscv_cpu
+*********************************
+.. automodule:: tvm.s_tir.tensor_intrin.riscv_cpu
+ :members:
+ :noindex:
+
+tvm.s_tir.tensor_intrin.dot_product_common
+******************************************
+.. automodule:: tvm.s_tir.tensor_intrin.dot_product_common
+ :members:
+ :noindex:
diff --git a/python/tvm/arith/bound.py b/python/tvm/arith/bound.py
index 2abe386e1d..bf8c0edc67 100644
--- a/python/tvm/arith/bound.py
+++ b/python/tvm/arith/bound.py
@@ -24,16 +24,16 @@ def deduce_bound(var, cond, hint_map, relax_map):
Parameters
----------
- var : tvm.tir.Var
+ var : tvm.tirx.Var
The target variable to be deduced.
cond : PrimExpr
The condition
- hint_map : Map[tvm.tir.Var, IntSet]
+ hint_map : Map[tvm.tirx.Var, IntSet]
Domain of variables used to help deduction.
- relax_map : Map[tvm.tir.Var, IntSet]
+ relax_map : Map[tvm.tirx.Var, IntSet]
The fomain of the variables to be relaxed
using the provided domain.
"""
diff --git a/python/tvm/arith/int_set.py b/python/tvm/arith/int_set.py
index 746390187b..9aad8ccfa5 100644
--- a/python/tvm/arith/int_set.py
+++ b/python/tvm/arith/int_set.py
@@ -102,7 +102,7 @@ def estimate_region_lower_bound(region, var_dom, predicate):
region : List[Range]
The region to be analyzed.
- var_dom : Dict[tvm.tir.Var, Range]
+ var_dom : Dict[tvm.tirx.Var, Range]
The ranges of the variables
predicate : PrimExpr
@@ -125,7 +125,7 @@ def estimate_region_strict_bound(region, var_dom,
predicate):
region : List[Range]
The region to be analyzed.
- var_dom : Dict[tvm.tir.Var, Range]
+ var_dom : Dict[tvm.tirx.Var, Range]
The ranges of the variables
predicate : PrimExpr
@@ -149,7 +149,7 @@ def estimate_region_upper_bound(region, var_dom, predicate):
region : List[Range]
The region to be analyzed.
- var_dom : Dict[tvm.tir.Var, Range]
+ var_dom : Dict[tvm.tirx.Var, Range]
The ranges of the variables
predicate : PrimExpr
@@ -168,7 +168,7 @@ def pos_inf():
Returns
----------
- pos_inf : tvm.tir.Var
+ pos_inf : tvm.tirx.Var
A symbolic var that indicates positive infinity
"""
return _ffi_api.PosInf()
@@ -179,7 +179,7 @@ def neg_inf():
Returns
----------
- neg_inf : tvm.tir.Var
+ neg_inf : tvm.tirx.Var
A symbolic var that indicates positive infinity
"""
return _ffi_api.NegInf()
diff --git a/python/tvm/arith/iter_affine_map.py
b/python/tvm/arith/iter_affine_map.py
index 8371d368ee..0dae45c1a5 100644
--- a/python/tvm/arith/iter_affine_map.py
+++ b/python/tvm/arith/iter_affine_map.py
@@ -137,7 +137,7 @@ def detect_iter_map(
indices : List[PrimExpr]
The input indices
- input_iters : Map[tvm.tir.Var, Range]
+ input_iters : Map[tvm.tirx.Var, Range]
The domain of each input iterators.
predicate : PrimExpr
@@ -178,7 +178,7 @@ def normalize_to_iter_sum(index, input_iters):
index : PrimExpr
The input index
- input_iters : Map[tvm.tir.Var, Range]
+ input_iters : Map[tvm.tirx.Var, Range]
The domain of each input iterators.
Returns
@@ -211,7 +211,7 @@ def iter_map_simplify(
indices : List[PrimExpr]
The input indices
- input_iters : Map[tvm.tir.Var, Range]
+ input_iters : Map[tvm.tirx.Var, Range]
The domain of each input iterators.
predicate : PrimExpr
@@ -289,10 +289,10 @@ def subspace_divide(
bindings : List[PrimExpr]
The input bindings
- input_iters : Map[tvm.tir.Var, Range]
+ input_iters : Map[tvm.tirx.Var, Range]
The domain of input iterator, which is the basis of the whole space
- sub_iters : Array[tvm.tir.Var]
+ sub_iters : Array[tvm.tirx.Var]
The subset of input_iters, which is the basis of the subspace
predicate : PrimExpr
@@ -344,7 +344,7 @@ def inverse_affine_iter_map(iter_map, outputs):
Returns
-------
- results : Map[tvm.tir.Var, PrimExpr]
+ results : Map[tvm.tirx.Var, PrimExpr]
The map from the input to the transformed result.
"""
return _ffi_api.InverseAffineIterMap(iter_map, outputs)
diff --git a/python/tvm/s_tir/tensor_intrin/arm_cpu.py
b/python/tvm/s_tir/tensor_intrin/arm_cpu.py
index 259b75d87f..9849755c68 100644
--- a/python/tvm/s_tir/tensor_intrin/arm_cpu.py
+++ b/python/tvm/s_tir/tensor_intrin/arm_cpu.py
@@ -234,16 +234,18 @@ def
get_sme_transpose_interleave_2svlx2svl_fp32_intrin(cols, rows):
the contents of sub-tile 1 and 2 are stored in opposite locations - see
the diagram
below.
- A: Accumulator tile:
A_t:
- 2SVL 2SVL
2SVL
- +----------------+ +-----------------+
+-------------------+
- | --0a-- --1a-- | | |
| | | | | |
- | --0b-- --1b-- | | 0 1 |
| 0a 0b .. 2a 2b .. |
- | ... ... | ld1w.horiz | | st1w.vert
| | | | | |
- 2SVL | --2a-- --3a-- | ====> 2SVL | | ====> 2SVL
| | | | | |
- | --2a-- --3b-- | | 2 3 |
| 1a 1b .. 3a 3b .. |
- | ... ... | | |
| | | | | |
- +----------------+ +-----------------+
+-------------------+
+ ::
+
+ A: Accumulator tile:
A_t:
+ 2SVL 2SVL
2SVL
+ +----------------+ +-----------------+
+-------------------+
+ | --0a-- --1a-- | | |
| | | | | |
+ | --0b-- --1b-- | | 0 1 |
| 0a 0b .. 2a 2b .. |
+ | ... ... | ld1w.horiz | | st1w.vert
| | | | | |
+ 2SVL | --2a-- --3a-- | ====> 2SVL | | ====>
2SVL | | | | | |
+ | --2a-- --3b-- | | 2 3 |
| 1a 1b .. 3a 3b .. |
+ | ... ... | | |
| | | | | |
+ +----------------+ +-----------------+
+-------------------+
Returns
-------
@@ -521,24 +523,26 @@ def get_sme_gemm_interleaved_mopa_2svlx2svl_intrin(M, K,
in_dtype):
Diagram showing outer-product performed on each of the accumulator
sub-tiles
for the fp32 datatype:
- SVL SVL
- +----------------------------+
- | l | h | K
- K +----------------------------+
- +---+ +----------------------------+
- | | | 0: 1: |-+
- | | | mopa(l, l) mopa(l, h) | |-+
- l | | | | | |
- | | | | | |
- |---| | | | |
- | | | 2: 3: | | |
- h | | | mopa(h, l) mopa(h, h) | | |
- | | | | | |
- | | | | | |
- +---+ +----------------------------+ | |
- +----------------------------+ |
- +---------------------------+
- (accumulate K times)
+ ::
+
+ SVL SVL
+ +----------------------------+
+ | l | h | K
+ K +----------------------------+
+ +---+ +----------------------------+
+ | | | 0: 1: |-+
+ | | | mopa(l, l) mopa(l, h) | |-+
+ l | | | | | |
+ | | | | | |
+ |---| | | | |
+ | | | 2: 3: | | |
+ h | | | mopa(h, l) mopa(h, h) | | |
+ | | | | | |
+ | | | | | |
+ +---+ +----------------------------+ | |
+ +----------------------------+ |
+ +---------------------------+
+ (accumulate K times)
Pseudo code computing 2SVL x 2SVL GEMM for fp32 inputs:
@@ -572,6 +576,7 @@ def get_sme_gemm_interleaved_mopa_2svlx2svl_intrin(M, K,
in_dtype):
}
Notes:
+
- Recall that A has been transposed beforehand such that each column is
now accessed
by row.
- 'sme.zero' resets the accumulator tile to contain all zero's.
diff --git a/python/tvm/s_tir/tensor_intrin/riscv_cpu.py
b/python/tvm/s_tir/tensor_intrin/riscv_cpu.py
index e5590fd630..f1ce1c04b4 100644
--- a/python/tvm/s_tir/tensor_intrin/riscv_cpu.py
+++ b/python/tvm/s_tir/tensor_intrin/riscv_cpu.py
@@ -61,7 +61,9 @@ def rvv_vec_dot_product_kernels(
with C[LANES].
The pseudo code is as follows:
+
.. code-block:: c
+
void vec_dot_prod(A[ELEMS], B[LANES][ELEMS], C[LANES]){
for (j = 0; j < LANES; j++) {
for (k = 0; k < ELEMS; k++) {