scanner.rs 6.7 KB
Newer Older
P
Phodal Huang 已提交
1
use onig::{Regex};
P
Phodal Huang 已提交
2 3


P
Phodal Huang 已提交
4
#[derive(Debug, Clone, Serialize)]
P
Phodal Huang 已提交
5
pub struct IOnigCaptureIndex {
P
Phodal Huang 已提交
6 7 8
    pub start: usize,
    pub end: usize,
    pub length: usize,
P
Phodal Huang 已提交
9 10
}

P
Phodal Huang 已提交
11
#[derive(Debug, Clone, Serialize)]
P
Phodal Huang 已提交
12
pub struct IOnigMatch {
P
Phodal Huang 已提交
13
    pub index: usize,
P
Phodal Huang 已提交
14 15 16
    pub capture_indices: Vec<IOnigCaptureIndex>,
}

P
Phodal Huang 已提交
17
#[derive(Debug, Clone)]
P
Phodal Huang 已提交
18
pub struct Scanner {
P
Phodal Huang 已提交
19
    pub index: usize,
P
Phodal Huang 已提交
20 21 22 23 24 25 26 27 28 29 30
    pub patterns: Vec<String>,
}

impl Scanner {
    pub fn new(patterns: Vec<String>) -> Self {
        Scanner {
            index: 0,
            patterns,
        }
    }

P
Phodal Huang 已提交
31 32 33 34 35
    pub fn dispose(&mut self) {
        self.index = 0
    }

    pub fn find_next_match_sync(&mut self, str: String, start_position: usize) -> Option<IOnigMatch> {
P
Phodal Huang 已提交
36
        if self.index >= self.patterns.clone().len() {
P
Phodal Huang 已提交
37
            self.index = 0;
P
Phodal Huang 已提交
38 39
            return None
        }
P
Phodal Huang 已提交
40 41 42 43 44

        let pattern = self.patterns[self.index].clone();

        let regex = Regex::new(pattern.as_str()).unwrap();
        let mut capture_indices = vec![];
P
Phodal Huang 已提交
45 46
        let _captures = regex.captures(str.as_str());

P
Phodal Huang 已提交
47 48 49
        if let Some(captures) = _captures {
            for (_, pos) in captures.iter_pos().enumerate() {
                if let Some((start, end)) = pos {
P
Phodal Huang 已提交
50
                    if start >= start_position {
P
Phodal Huang 已提交
51 52 53 54 55 56
                        let capture = IOnigCaptureIndex {
                            start,
                            end,
                            length: end - start,
                        };
                        capture_indices.push(capture)
P
Phodal Huang 已提交
57
                    }
P
Phodal Huang 已提交
58
                }
P
Phodal Huang 已提交
59
            }
P
Phodal Huang 已提交
60
        }
P
Phodal Huang 已提交
61

P
Phodal Huang 已提交
62
        if capture_indices.len() <= 0 {
P
Phodal Huang 已提交
63 64
            self.index = self.index + 1;
            self.find_next_match_sync(str.clone(), start_position)
P
Phodal Huang 已提交
65 66
        } else {
            let index = self.index.clone();
P
Phodal Huang 已提交
67
            self.index = 0;
P
Phodal Huang 已提交
68 69 70 71
            Some(IOnigMatch {
                index,
                capture_indices,
            })
P
Phodal Huang 已提交
72
        }
P
Phodal Huang 已提交
73 74 75 76 77 78
    }
}


#[cfg(test)]
mod tests {
P
Phodal Huang 已提交
79
    use crate::scanner::scanner::{Scanner, IOnigMatch};
P
Phodal Huang 已提交
80 81 82 83

    #[test]
    fn should_handle_simple_regex() {
        let regex = vec![String::from("ell"), String::from("wo")];
P
Phodal Huang 已提交
84 85
        let mut scanner = Scanner::new(regex);
        let s = String::from("Hello world!");
P
Phodal Huang 已提交
86
        let result = scanner.find_next_match_sync(s.clone(), 0).unwrap();
P
Phodal Huang 已提交
87 88 89 90
        assert_eq!(result.index, 0);
        assert_eq!(result.capture_indices[0].start, 1);
        assert_eq!(result.capture_indices[0].end, 4);

P
Phodal Huang 已提交
91
        let second_result = scanner.find_next_match_sync(s, 2).unwrap();
P
Phodal Huang 已提交
92 93 94
        assert_eq!(second_result.index, 1);
        assert_eq!(second_result.capture_indices[0].start, 6);
        assert_eq!(second_result.capture_indices[0].end, 8);
P
Phodal Huang 已提交
95
    }
P
Phodal Huang 已提交
96 97 98 99 100 101

    #[test]
    fn should_handle_simple2() {
        let regex = vec![String::from("a"), String::from("b"), String::from("c")];
        let mut scanner = Scanner::new(regex);

P
Phodal Huang 已提交
102 103 104 105 106
        if let None = scanner.find_next_match_sync(String::from("x"), 0) {
            assert_eq!(true, true);
        } else {
            assert_eq!(true, false);
        }
P
Phodal Huang 已提交
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121

        let result = scanner.find_next_match_sync(String::from("xxaxxbxxc"), 0).unwrap();
        assert_eq!(serde_json::to_string(&result).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":2,\"end\":3,\"length\":1}]}"));

        let result2 = scanner.find_next_match_sync(String::from("xxaxxbxxc"), 4).unwrap();
        assert_eq!(serde_json::to_string(&result2).unwrap(), String::from("{\"index\":1,\"capture_indices\":[{\"start\":5,\"end\":6,\"length\":1}]}"));

        let result3 = scanner.find_next_match_sync(String::from("xxaxxbxxc"), 7).unwrap();
        assert_eq!(serde_json::to_string(&result3).unwrap(), String::from("{\"index\":2,\"capture_indices\":[{\"start\":8,\"end\":9,\"length\":1}]}"));

        if let None = scanner.find_next_match_sync(String::from("xxaxxbxxc"), 9) {
            assert_eq!(true, true);
        } else {
            assert_eq!(true, false);
        }
P
Phodal Huang 已提交
122
    }
P
Phodal Huang 已提交
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138

    #[test]
    fn should_handle_unicode1() {
        let regex = vec![String::from("1"), String::from("2")];
        let mut scanner = Scanner::new(regex);

        let result = scanner.find_next_match_sync(String::from("ab…cde21"), 5).unwrap();
        assert_eq!(serde_json::to_string(&result).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":9,\"end\":10,\"length\":1}]}"));
    }

    #[test]
    fn should_handle_unicode2() {
        let mut scanner2 = Scanner::new(vec![String::from("\"")]);
        let result2 = scanner2.find_next_match_sync(String::from("{\"\": 1}"), 1).unwrap();
        assert_eq!(serde_json::to_string(&result2).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":1,\"end\":2,\"length\":1}]}"));
    }
P
Phodal Huang 已提交
139

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
    #[test]
    fn should_handle_unicode3() {
        let regex = vec![String::from("Y"), String::from("X")];
        let mut scanner = Scanner::new(regex);
        let result = scanner.find_next_match_sync(String::from("a💻bYX"), 0).unwrap();
        assert_eq!(serde_json::to_string(&result).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":6,\"end\":7,\"length\":1}]}"));

        let result1 = scanner.find_next_match_sync(String::from("a💻bYX"), 1).unwrap();
        assert_eq!(serde_json::to_string(&result1).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":6,\"end\":7,\"length\":1}]}"));

        let result2 = scanner.find_next_match_sync(String::from("a💻bYX"), 2).unwrap();
        assert_eq!(serde_json::to_string(&result2).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":6,\"end\":7,\"length\":1}]}"));

        let result3 = scanner.find_next_match_sync(String::from("a💻bYX"), 3).unwrap();
        assert_eq!(serde_json::to_string(&result3).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":6,\"end\":7,\"length\":1}]}"));

        let result4 = scanner.find_next_match_sync(String::from("a💻bYX"), 4).unwrap();
        assert_eq!(serde_json::to_string(&result4).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":6,\"end\":7,\"length\":1}]}"));

        let result5 = scanner.find_next_match_sync(String::from("a💻bYX"), 5).unwrap();
        assert_eq!(serde_json::to_string(&result5).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":6,\"end\":7,\"length\":1}]}"));

        let result6 = scanner.find_next_match_sync(String::from("a💻bYX"), 6).unwrap();
        assert_eq!(serde_json::to_string(&result6).unwrap(), String::from("{\"index\":0,\"capture_indices\":[{\"start\":6,\"end\":7,\"length\":1}]}"));

        let result7 = scanner.find_next_match_sync(String::from("a💻bYX"), 7).unwrap();
        assert_eq!(serde_json::to_string(&result7).unwrap(), String::from("{\"index\":1,\"capture_indices\":[{\"start\":7,\"end\":8,\"length\":1}]}"));
    }

P
Phodal Huang 已提交
169
}