Files
rulekit/internal/engine/optimize.go
NeoMody bb27f2073e Add rule optimization: domain coverage + CIDR aggregation
- domain entries covered by domain_suffix are removed
- domain/suffix entries covered by domain_keyword are removed
- child suffixes covered by parent suffix are removed
- adjacent/contained CIDRs are merged into larger blocks
- Available via --optimize/-O flag on merge and generate commands
- cn-direct: 634 -> 587 rules (-7.4%)
2026-04-01 10:19:18 +08:00

366 lines
8.7 KiB
Go

package engine
import (
"encoding/binary"
"fmt"
"net"
"rulekit/internal/model"
"sort"
"strings"
)
// OptimizeResult tracks what the optimizer did.
type OptimizeResult struct {
Before int
After int
DomainsMerged int // domain entries removed because a domain_suffix covers them
KeywordMerged int // domain/suffix entries removed because a keyword covers them
CIDRsMerged int // CIDR entries merged into larger blocks
Removals []string // human-readable list of what was removed/merged
}
// Optimize performs semantic deduplication on a merged rule set:
// 1. domain_suffix subsumes matching domain entries
// 2. domain_keyword subsumes matching domain/suffix entries
// 3. CIDR aggregation: merge adjacent/contained IP ranges
func Optimize(rules []model.Rule) ([]model.Rule, *OptimizeResult) {
result := &OptimizeResult{Before: len(rules)}
// Separate by type
var domains, suffixes, keywords, regexes, cidrs, procs []model.Rule
for _, r := range rules {
switch r.Type {
case model.RuleDomain:
domains = append(domains, r)
case model.RuleDomainSuffix:
suffixes = append(suffixes, r)
case model.RuleDomainKeyword:
keywords = append(keywords, r)
case model.RuleDomainRegex:
regexes = append(regexes, r)
case model.RuleIPCIDR:
cidrs = append(cidrs, r)
case model.RuleProcessName:
procs = append(procs, r)
}
}
// Build suffix set for fast lookup
suffixSet := map[string]bool{}
for _, s := range suffixes {
suffixSet[s.Value] = true
}
// Build keyword list
keywordVals := make([]string, len(keywords))
for i, k := range keywords {
keywordVals[i] = k.Value
}
// 1. Remove domains covered by domain_suffix
var filteredDomains []model.Rule
for _, d := range domains {
if coveredBySuffix(d.Value, suffixSet) {
result.DomainsMerged++
result.Removals = append(result.Removals,
fmt.Sprintf("domain:%s (covered by suffix)", d.Value))
} else {
filteredDomains = append(filteredDomains, d)
}
}
// 2. Remove domains/suffixes covered by keyword
var filteredDomains2 []model.Rule
for _, d := range filteredDomains {
if coveredByKeyword(d.Value, keywordVals) {
result.KeywordMerged++
result.Removals = append(result.Removals,
fmt.Sprintf("domain:%s (covered by keyword)", d.Value))
} else {
filteredDomains2 = append(filteredDomains2, d)
}
}
var filteredSuffixes []model.Rule
for _, s := range suffixes {
if coveredByKeyword(s.Value, keywordVals) {
result.KeywordMerged++
result.Removals = append(result.Removals,
fmt.Sprintf("domain_suffix:%s (covered by keyword)", s.Value))
} else {
filteredSuffixes = append(filteredSuffixes, s)
}
}
// 2b. Remove suffixes covered by a parent suffix
// e.g., "a.bilibili.com" is redundant if "bilibili.com" exists
filteredSuffixes = removeCoveredSuffixes(filteredSuffixes, result)
// 3. CIDR aggregation
optimizedCIDRs := aggregateCIDRs(cidrs, result)
// Reassemble
var out []model.Rule
out = append(out, filteredDomains2...)
out = append(out, filteredSuffixes...)
out = append(out, keywords...)
out = append(out, regexes...)
out = append(out, optimizedCIDRs...)
out = append(out, procs...)
sort.Slice(out, func(i, j int) bool {
if out[i].Type != out[j].Type {
return typeOrder(out[i].Type) < typeOrder(out[j].Type)
}
return out[i].Value < out[j].Value
})
result.After = len(out)
return out, result
}
// coveredBySuffix checks if a domain is matched by any suffix in the set.
// e.g., "www.bilibili.com" is covered by suffix "bilibili.com"
func coveredBySuffix(domain string, suffixSet map[string]bool) bool {
// Check exact match first
if suffixSet[domain] {
return true
}
// Walk up the domain tree
parts := strings.Split(domain, ".")
for i := 1; i < len(parts); i++ {
parent := strings.Join(parts[i:], ".")
if suffixSet[parent] {
return true
}
}
return false
}
// coveredByKeyword checks if a domain/suffix contains any keyword.
func coveredByKeyword(value string, keywords []string) bool {
for _, kw := range keywords {
if strings.Contains(value, kw) {
return true
}
}
return false
}
// removeCoveredSuffixes removes suffixes that are subdomains of other suffixes.
// e.g., "api.bilibili.com" is redundant if "bilibili.com" exists as a suffix.
func removeCoveredSuffixes(suffixes []model.Rule, result *OptimizeResult) []model.Rule {
suffixSet := map[string]bool{}
for _, s := range suffixes {
suffixSet[s.Value] = true
}
var filtered []model.Rule
for _, s := range suffixes {
parts := strings.Split(s.Value, ".")
covered := false
for i := 1; i < len(parts); i++ {
parent := strings.Join(parts[i:], ".")
if suffixSet[parent] {
covered = true
result.DomainsMerged++
result.Removals = append(result.Removals,
fmt.Sprintf("domain_suffix:%s (covered by suffix:%s)", s.Value, parent))
break
}
}
if !covered {
filtered = append(filtered, s)
}
}
return filtered
}
// aggregateCIDRs merges adjacent and contained CIDR blocks.
func aggregateCIDRs(cidrs []model.Rule, result *OptimizeResult) []model.Rule {
if len(cidrs) == 0 {
return nil
}
// Separate IPv4 and IPv6
var v4nets, v6nets []*net.IPNet
cidrSource := map[string]model.Rule{} // keep first rule for metadata
for _, r := range cidrs {
_, ipnet, err := net.ParseCIDR(r.Value)
if err != nil {
continue
}
key := ipnet.String()
if _, exists := cidrSource[key]; !exists {
cidrSource[key] = r
}
if ipnet.IP.To4() != nil {
v4nets = append(v4nets, ipnet)
} else {
v6nets = append(v6nets, ipnet)
}
}
// Remove contained CIDRs and merge adjacent
v4merged := mergeCIDRList(v4nets)
v6merged := mergeCIDRList(v6nets)
mergedCount := (len(v4nets) + len(v6nets)) - (len(v4merged) + len(v6merged))
result.CIDRsMerged = mergedCount
var out []model.Rule
for _, n := range v4merged {
cidrStr := n.String()
if r, ok := cidrSource[cidrStr]; ok {
out = append(out, r)
} else {
out = append(out, model.Rule{
Type: model.RuleIPCIDR,
Value: cidrStr,
Source: "optimized",
})
}
}
for _, n := range v6merged {
cidrStr := n.String()
if r, ok := cidrSource[cidrStr]; ok {
out = append(out, r)
} else {
out = append(out, model.Rule{
Type: model.RuleIPCIDR,
Value: cidrStr,
Source: "optimized",
})
}
}
return out
}
// mergeCIDRList removes contained CIDRs and merges adjacent ones.
func mergeCIDRList(nets []*net.IPNet) []*net.IPNet {
if len(nets) == 0 {
return nil
}
// Sort by IP then prefix length
sort.Slice(nets, func(i, j int) bool {
cmp := compareIPs(nets[i].IP, nets[j].IP)
if cmp != 0 {
return cmp < 0
}
iOnes, _ := nets[i].Mask.Size()
jOnes, _ := nets[j].Mask.Size()
return iOnes < jOnes // shorter prefix (larger range) first
})
// Remove contained
var deduped []*net.IPNet
for _, n := range nets {
contained := false
for _, existing := range deduped {
if existing.Contains(n.IP) {
onesE, _ := existing.Mask.Size()
onesN, _ := n.Mask.Size()
if onesE <= onesN { // existing has equal or larger range
contained = true
break
}
}
}
if !contained {
deduped = append(deduped, n)
}
}
// Try merging adjacent pairs
changed := true
for changed {
changed = false
var merged []*net.IPNet
skip := map[int]bool{}
for i := 0; i < len(deduped); i++ {
if skip[i] {
continue
}
didMerge := false
for j := i + 1; j < len(deduped); j++ {
if skip[j] {
continue
}
if combined := tryCombine(deduped[i], deduped[j]); combined != nil {
merged = append(merged, combined)
skip[i] = true
skip[j] = true
changed = true
didMerge = true
break
}
}
if !didMerge && !skip[i] {
merged = append(merged, deduped[i])
}
}
deduped = merged
}
return deduped
}
// tryCombine tries to merge two adjacent CIDRs into one.
// e.g., 1.0.0.0/24 + 1.0.1.0/24 = 1.0.0.0/23
func tryCombine(a, b *net.IPNet) *net.IPNet {
onesA, bitsA := a.Mask.Size()
onesB, bitsB := b.Mask.Size()
if onesA != onesB || bitsA != bitsB {
return nil
}
if onesA == 0 {
return nil
}
// The parent prefix is one bit shorter
parentOnes := onesA - 1
parentMask := net.CIDRMask(parentOnes, bitsA)
// Check if both belong to the same parent
parentA := a.IP.Mask(parentMask)
parentB := b.IP.Mask(parentMask)
if !parentA.Equal(parentB) {
return nil
}
return &net.IPNet{
IP: parentA,
Mask: parentMask,
}
}
func compareIPs(a, b net.IP) int {
a4 := a.To4()
b4 := b.To4()
if a4 != nil && b4 != nil {
ai := binary.BigEndian.Uint32(a4)
bi := binary.BigEndian.Uint32(b4)
if ai < bi {
return -1
}
if ai > bi {
return 1
}
return 0
}
a16 := a.To16()
b16 := b.To16()
for i := 0; i < 16; i++ {
if a16[i] < b16[i] {
return -1
}
if a16[i] > b16[i] {
return 1
}
}
return 0
}