1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
use std::fmt;
use memchr::memchr;
use super::{
FAIL_STATE, ROOT_STATE,
StateIdx, PatIdx,
AcAutomaton, Transitions, Match,
};
use super::autiter::Automaton;
#[derive(Clone)]
pub struct FullAcAutomaton<P> {
pats: Vec<P>,
trans: Vec<StateIdx>,
out: Vec<Vec<PatIdx>>,
start_bytes: Vec<u8>,
}
impl<P: AsRef<[u8]>> FullAcAutomaton<P> {
pub fn new<T: Transitions>(ac: AcAutomaton<P, T>) -> FullAcAutomaton<P> {
let mut fac = FullAcAutomaton {
pats: vec![],
trans: vec![FAIL_STATE; 256 * ac.states.len()],
out: vec![vec![]; ac.states.len()],
start_bytes: vec![],
};
fac.build_matrix(&ac);
fac.pats = ac.pats;
fac.start_bytes = ac.start_bytes;
fac
}
fn set(&mut self, si: StateIdx, i: u8, goto: StateIdx) {
let ns = self.num_states();
self.trans[i as usize * ns + si as usize] = goto;
}
#[inline]
fn num_states(&self) -> usize {
self.out.len()
}
}
impl<P: AsRef<[u8]>> Automaton<P> for FullAcAutomaton<P> {
#[inline]
fn next_state(&self, si: StateIdx, i: u8) -> StateIdx {
self.trans[i as usize * self.num_states() + si as usize]
}
#[inline]
fn get_match(&self, si: StateIdx, outi: usize, texti: usize) -> Match {
let pati = self.out[si as usize][outi];
let patlen = self.pats[pati].as_ref().len();
let start = texti + 1 - patlen;
Match {
pati: pati,
start: start,
end: start + patlen,
}
}
#[inline]
fn has_match(&self, si: StateIdx, outi: usize) -> bool {
outi < self.out[si as usize].len()
}
#[inline]
fn skip_to(&self, si: StateIdx, text: &[u8], at: usize) -> usize {
if si != ROOT_STATE || !self.is_skippable() {
return at;
}
let b = self.start_bytes[0];
match memchr(b, &text[at..]) {
None => text.len(),
Some(i) => at + i,
}
}
#[inline]
fn is_skippable(&self) -> bool {
self.start_bytes.len() == 1
}
#[inline]
fn patterns(&self) -> &[P] {
&self.pats
}
#[inline]
fn pattern(&self, i: usize) -> &P {
&self.pats[i]
}
}
impl<P: AsRef<[u8]>> FullAcAutomaton<P> {
fn build_matrix<T: Transitions>(&mut self, ac: &AcAutomaton<P, T>) {
for (si, s) in ac.states.iter().enumerate().skip(1) {
for b in (0..256).map(|b| b as u8) {
self.set(si as StateIdx, b, ac.next_state(si as StateIdx, b));
}
for &pati in &s.out {
self.out[si].push(pati);
}
}
}
}
impl<P: AsRef<[u8]> + fmt::Debug> fmt::Debug for FullAcAutomaton<P> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "FullAcAutomaton({:?})", self.pats)
}
}