Hello,
so there is actually bug. I was able to reproduce it with very simple
rules on my router:
set skip on em1
block return all
pass out on em0 from 192.168.2.0/24 to any nat-to(em0)
em1 is interface, facing to LAN
em0 is interface to internet where NAT happens.
I did use a scapy to generate ICMP host unreachable:
d = '77.75.77.222'
s = '192.168.2.5'
sp = 1234
dp = 12345
pkt = Ether()/IP(dst=d)/ICMP(type=3, code=1, )/\
IP(src=d', dst=s)/UDP(sport=sp, dport=dp)
sendp(pkt, iface='iwn0')
the packet has been sent from my notebook via LAN towards router. to my
surprise router forwards packet untranslated without creating a state.
the packet indeed matches the 'pass out on em0 rule...'
however 'keep state' and 'nat-to' actions are ignored. To understand
why we must look here at pf_test_rule():
4475 if (pd->virtual_proto != PF_VPROTO_FRAGMENT
4476 && !ctx.state_icmp && r->keep_state) {
4477
....
4491 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks,
4492 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns);
4493
4494 if (action != PF_PASS)
4495 goto cleanup;
4496 if (sks != skw) {
4497 struct pf_state_key *sk;
4498
4499 if (pd->dir == PF_IN)
4500 sk = sks;
4501 else
4502 sk = skw;
4503 rewrite += pf_translate(pd,
4504 &sk->addr[pd->af == pd->naf ? pd->sidx :
pd->didx],
4505 sk->port[pd->af == pd->naf ? pd->sidx :
pd->didx],
4506 &sk->addr[pd->af == pd->naf ? pd->didx :
pd->sidx],
4507 sk->port[pd->af == pd->naf ? pd->didx :
pd->sidx],
4508 virtual_type, ctx.icmp_dir);
4509 }
4510
4511 #ifdef INET6
4512 if (rewrite && skw->af != sks->af)
4513 action = PF_AFRT;
4514 #endif /* INET6 */
4515
4516 } else {
4517 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
4518 SLIST_REMOVE_HEAD(&ctx.rules, entry);
4519 pool_put(&pf_rule_item_pl, ctx.ri);
4520 }
4521 }
note '!ctx.state_icmp' at line 4476. This variable is being set
by pf_icmp_mapping() function very early in pf_test_rule() function.
4354 switch (pd->virtual_proto) {
4355 case IPPROTO_ICMP:
4356 ctx.icmptype = pd->hdr.icmp.icmp_type;
4357 ctx.icmpcode = pd->hdr.icmp.icmp_code;
4358 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
4359 &ctx.icmp_dir, &virtual_id, &virtual_type);
4360 if (ctx.icmp_dir == PF_IN) {
4361 pd->osport = pd->nsport = virtual_id;
for ICMP error messages the ctx.state_icmp value is set to one.
It happens right here in pf_icmp_mapping():
2582 /* These ICMP types map to other connections */
2583 case ICMP_UNREACH:
2584 case ICMP_SOURCEQUENCH:
2585 case ICMP_REDIRECT:
2586 case ICMP_TIMXCEED:
2587 case ICMP_PARAMPROB:
2588 /* These will not be used, but set them anyway */
2589 *icmp_dir = PF_IN;
2590 *virtual_type = htons(type);
2591 *virtual_id = 0;
2592 return (1); /* These types match to another state
*/
this explains why pf ignores 'keep state' and 'nat-to' action for ICMP errors.
It's tempting to fix things up in pf_test_rule() in else branch lines 4517-4520.
However I'm afraid this approach may create some undesired side-effect.
in my opinion is to fix pf_match_rule() function, so ICMP error message
will no longer match 'keep state' rule. Diff below is for IPv4. I still
need to think of more about IPv6. My gut feeling is it will be very similar.
thanks and
regards
sashan
--------8<---------------8<---------------8<------------------8<--------
diff --git a/sys/net/pf.c b/sys/net/pf.c
index 4f0fc3f91a9..0993aed85fb 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -4148,6 +4148,9 @@ enter_ruleset:
(r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
ctx->icmp_dir != PF_IN),
TAILQ_NEXT(r, entries));
+ /* icmp packet must match existing state */
+ PF_TEST_ATTRIB(r->keep_state && ctx->state_icmp,
+ TAILQ_NEXT(r, entries);
break;
case IPPROTO_ICMPV6: