mshr-h commented on code in PR #18524:
URL: https://github.com/apache/tvm/pull/18524#discussion_r2572980532
##########
tests/python/relax/test_frontend_from_exported_program.py:
##########
@@ -4255,6 +4257,40 @@ def main(
run_ep_decomposition=True,
)
+ # Test 2D input (seq_len, head_dim) - bug fix for #18441
+ class Attention2D(Module):
+ def forward(self, x):
+ return torch.nn.functional.scaled_dot_product_attention(x, x, x,
is_causal=False)
+
+ @I.ir_module
+ class Expected2D:
+ @R.function
+ def main(
+ x: R.Tensor((8, 32), dtype="float32"),
+ ) -> R.Tensor((8, 32), dtype="float32"):
+ with R.dataflow():
+ # Expand to add batch dimension: (8, 32) -> (1, 8, 32)
+ lv: R.Tensor((1, 8, 32), dtype="float32") = R.expand_dims(x,
axis=0)
+ # Expand to add num_heads dimension: (1, 8, 32) -> (1, 1, 8,
32)
+ lv1: R.Tensor((1, 1, 8, 32), dtype="float32") =
R.expand_dims(lv, axis=1)
+ lv2: R.Tensor((1, 1, 8, 32), dtype="float32") =
R.expand_dims(lv, axis=1)
+ lv3: R.Tensor((1, 1, 8, 32), dtype="float32") =
R.expand_dims(lv, axis=1)
+ # Attention operation: (1, 1, 8, 32) -> (1, 1, 8, 32)
+ lv4: R.Tensor((1, 1, 8, 32), dtype="float32") = R.nn.attention(
+ lv1, lv2, lv3, scale=None, causal_mask=None
+ )
+ # Squeeze batch and num_heads dimensions: (1, 1, 8, 32) -> (8,
32)
+ gv: R.Tensor((8, 32), dtype="float32") = R.squeeze(lv4,
axis=[0, 1])
+ R.output(gv)
+ return gv
+
+ verify_model(
+ Attention2D(),
+ (torch.randn(8, 32, dtype=torch.float32),),
+ {},
+ Expected2D,
+ )
Review Comment:
Pass `run_ep_decomposition=False` so that sdpa isn't decomposed.
##########
tests/python/relax/test_frontend_from_exported_program.py:
##########
@@ -4255,6 +4257,40 @@ def main(
run_ep_decomposition=True,
)
+ # Test 2D input (seq_len, head_dim) - bug fix for #18441
+ class Attention2D(Module):
+ def forward(self, x):
+ return torch.nn.functional.scaled_dot_product_attention(x, x, x,
is_causal=False)
+
+ @I.ir_module
+ class Expected2D:
+ @R.function
+ def main(
+ x: R.Tensor((8, 32), dtype="float32"),
+ ) -> R.Tensor((8, 32), dtype="float32"):
+ with R.dataflow():
+ # Expand to add batch dimension: (8, 32) -> (1, 8, 32)
+ lv: R.Tensor((1, 8, 32), dtype="float32") = R.expand_dims(x,
axis=0)
+ # Expand to add num_heads dimension: (1, 8, 32) -> (1, 1, 8,
32)
+ lv1: R.Tensor((1, 1, 8, 32), dtype="float32") =
R.expand_dims(lv, axis=1)
+ lv2: R.Tensor((1, 1, 8, 32), dtype="float32") =
R.expand_dims(lv, axis=1)
+ lv3: R.Tensor((1, 1, 8, 32), dtype="float32") =
R.expand_dims(lv, axis=1)
+ # Attention operation: (1, 1, 8, 32) -> (1, 1, 8, 32)
+ lv4: R.Tensor((1, 1, 8, 32), dtype="float32") = R.nn.attention(
+ lv1, lv2, lv3, scale=None, causal_mask=None
+ )
+ # Squeeze batch and num_heads dimensions: (1, 1, 8, 32) -> (8,
32)
+ gv: R.Tensor((8, 32), dtype="float32") = R.squeeze(lv4,
axis=[0, 1])
+ R.output(gv)
+ return gv
+
+ verify_model(
+ Attention2D(),
+ (torch.randn(8, 32, dtype=torch.float32),),
+ {},
+ Expected2D,
+ )
Review Comment:
You can pass `run_ep_decomposition=False` so that sdpa isn't decomposed.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]