grammar.rs 10.1 KB
Newer Older
1
use std::collections::BTreeMap as Map;
P
Phodal Huang 已提交
2 3

use onig::*;
P
Phodal Huang 已提交
4

P
Phodal Huang 已提交
5 6
use crate::grammar::line_tokens::{LineTokens, TokenTypeMatcher};
use crate::grammar::{ScopeListElement, StackElement};
P
Phodal Huang 已提交
7
use crate::inter::{IRawGrammar, IRawRepository, IRawRepositoryMap, IRawRule};
8
use crate::rule::rule_factory::RuleFactory;
P
Phodal Huang 已提交
9
use crate::rule::{AbstractRule, EmptyRule, IGrammarRegistry, IRuleFactoryHelper, IRuleRegistry};
P
Phodal Huang 已提交
10

P
Phodal Huang 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23
pub struct IToken {
    pub start_index: i32,
    pub end_index: i32,
    pub scopes: Vec<String>,
}

pub struct ITokenizeLineResult {
    pub tokens: Vec<IToken>,
    pub rule_stack: Box<StackElement>,
}

pub struct ITokenizeLineResult2 {
    pub tokens: Vec<i32>,
P
Phodal Huang 已提交
24
    pub rule_stack: Box<StackElement>,
P
Phodal Huang 已提交
25 26 27 28 29
}

pub trait IGrammar {
    fn tokenize_line(line_text: String, prev_state: Option<StackElement>) -> ITokenizeLineResult;
    /**
P
Phodal Huang 已提交
30 31 32 33 34 35 36 37 38
     * Tokenize `lineText` using previous line state `prevState`.
     * The result contains the tokens in binary format, resolved with the following information:
     *  - language
     *  - token type (regex, string, comment, other)
     *  - font style
     *  - foreground color
     *  - background color
     * e.g. for getting the languageId: `(metadata & MetadataConsts.LANGUAGEID_MASK) >>> MetadataConsts.LANGUAGEID_OFFSET`
     */
P
Phodal Huang 已提交
39
    fn tokenize_line2(line_text: String, prev_state: Option<StackElement>) -> ITokenizeLineResult2;
P
Phodal Huang 已提交
40 41
}

P
Phodal Huang 已提交
42
pub trait Matcher {}
P
Phodal Huang 已提交
43

P
Phodal Huang 已提交
44
#[derive(Debug, Clone)]
P
Phodal Huang 已提交
45
pub struct Grammar {
46
    root_id: i32,
P
Phodal Huang 已提交
47
    grammar: IRawGrammar,
48
    pub last_rule_id: i32,
49
    pub rule_id2desc: Map<i32, Box<dyn AbstractRule>>,
P
Phodal Huang 已提交
50
    pub _token_type_matchers: Vec<TokenTypeMatcher>,
P
Phodal Huang 已提交
51 52
}

P
Phodal Huang 已提交
53
pub fn init_grammar(grammar: IRawGrammar, _base: Option<IRawRule>) -> IRawGrammar {
P
Phodal Huang 已提交
54 55 56
    let mut _grammar = grammar.clone();

    let mut new_based: IRawRule = IRawRule::new();
P
Phodal Huang 已提交
57 58 59
    if let Some(repo) = grammar.clone().repository {
        new_based.location = repo.clone().location;
    }
P
Phodal Huang 已提交
60 61
    new_based.patterns = Some(grammar.clone().patterns.clone());
    new_based.name = grammar.clone().name;
P
Phodal Huang 已提交
62 63 64 65

    let mut repository_map = IRawRepositoryMap::new();
    repository_map.base_s = Some(new_based.clone());
    repository_map.self_s = Some(new_based.clone());
P
Phodal Huang 已提交
66 67 68
    if let Some(repo) = grammar.clone().repository {
        repository_map.name_map = repo.clone().map.name_map.clone();
    }
P
Phodal Huang 已提交
69 70 71

    _grammar.repository = Some(IRawRepository {
        map: Box::new(repository_map.clone()),
72
        location: None,
P
Phodal Huang 已提交
73 74 75 76 77
    });

    _grammar
}

P
Phodal Huang 已提交
78
impl Grammar {
P
Phodal Huang 已提交
79
    pub fn new(grammar: IRawGrammar) -> Grammar {
P
Phodal Huang 已提交
80
        let _grammar = init_grammar(grammar.clone(), None);
P
Phodal Huang 已提交
81
        Grammar {
82
            last_rule_id: 0,
P
Phodal Huang 已提交
83
            grammar: _grammar,
P
Phodal Huang 已提交
84
            root_id: -1,
85
            rule_id2desc: Map::new(),
P
Phodal Huang 已提交
86
            _token_type_matchers: vec![],
P
Phodal Huang 已提交
87 88 89 90 91 92
        }
    }
    // todo: refactor to callback ??
    pub fn create_onig_scanner(&self, sources: String) -> Regex {
        Regex::new(sources.as_str()).unwrap()
    }
P
Phodal Huang 已提交
93 94
    pub fn create_onig_string(&self, sources: String) -> String {
        sources
P
Phodal Huang 已提交
95 96
    }

P
Phodal Huang 已提交
97
    fn tokenize(
98
        &mut self,
P
Phodal Huang 已提交
99
        line_text: String,
P
Phodal Huang 已提交
100
        mut prev_state: Option<StackElement>,
P
Phodal Huang 已提交
101 102
        emit_binary_tokens: bool,
    ) {
103 104
        if self.root_id.clone() == -1 {
            let mut repository = self.grammar.repository.clone().unwrap();
P
Phodal Huang 已提交
105
            let based = repository.clone().map.self_s.unwrap();
P
Phodal Huang 已提交
106 107 108 109 110 111
            self.root_id = RuleFactory::get_compiled_rule_id(
                based.clone(),
                self,
                &mut repository.clone(),
                String::from(""),
            );
112
        }
P
Phodal Huang 已提交
113

P
Phodal Huang 已提交
114
        let mut is_first_line: bool = false;
P
Phodal Huang 已提交
115
        match prev_state.clone() {
P
Phodal Huang 已提交
116
            None => is_first_line = true,
117 118 119 120
            Some(state) => {
                if state == StackElement::null() {
                    is_first_line = true
                }
P
Phodal Huang 已提交
121
            }
122
        }
P
Phodal Huang 已提交
123

P
Phodal Huang 已提交
124 125
        if is_first_line {
            let scope_list = ScopeListElement::default();
P
Phodal Huang 已提交
126 127 128 129 130 131 132 133 134 135
            prev_state = Some(StackElement::new(
                None,
                self.root_id.clone(),
                -1,
                -1,
                false,
                None,
                scope_list.clone(),
                scope_list.clone(),
            ))
P
Phodal Huang 已提交
136 137
        }

P
Phodal Huang 已提交
138
        let format_line_text = format!("{:?}\n", line_text);
P
Phodal Huang 已提交
139 140 141 142 143
        let line_tokens = LineTokens::new(
            emit_binary_tokens,
            line_text,
            self._token_type_matchers.clone(),
        );
P
Phodal Huang 已提交
144 145 146 147
        self.tokenize_string(
            format_line_text.parse().unwrap(),
            is_first_line,
            0,
P
Phodal Huang 已提交
148
            prev_state.unwrap(),
P
Phodal Huang 已提交
149 150 151
            line_tokens,
            true,
        )
P
Phodal Huang 已提交
152 153
    }

P
Phodal Huang 已提交
154 155 156 157 158
    pub fn tokenize_string(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
P
Phodal Huang 已提交
159
        prev_state: StackElement,
P
Phodal Huang 已提交
160
        line_tokens: LineTokens,
P
Phodal Huang 已提交
161 162
        check_while_conditions: bool,
    ) {
P
Phodal Huang 已提交
163 164
        let _line_length = line_text.len();
        let _stop = false;
P
Phodal Huang 已提交
165
        let mut anchor_position = -1;
P
Phodal Huang 已提交
166 167

        if check_while_conditions {
P
Phodal Huang 已提交
168 169 170 171 172 173 174 175
            // todo: add realy logic
            self.check_while_conditions(
                line_text.clone(),
                is_first_line.clone(),
                line_pos.clone(),
                prev_state.clone(),
                line_tokens.clone(),
            );
P
Phodal Huang 已提交
176 177
        }

P
Phodal Huang 已提交
178 179 180 181 182 183 184
        self.match_rule_or_injections(
            line_text,
            is_first_line,
            line_pos,
            prev_state,
            anchor_position,
        );
P
Phodal Huang 已提交
185 186
    }

P
Phodal Huang 已提交
187 188 189 190 191
    pub fn check_while_conditions(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
P
Phodal Huang 已提交
192
        _stack: StackElement,
P
Phodal Huang 已提交
193 194 195
        line_tokens: LineTokens,
    ) {
        let mut anchor_position = -1;
P
Phodal Huang 已提交
196 197 198
        if _stack.begin_rule_captured_eol {
            anchor_position = 0
        }
P
Phodal Huang 已提交
199 200
        // let while_rules = vec![];
    }
P
Phodal Huang 已提交
201

P
Phodal Huang 已提交
202 203 204 205 206
    pub fn match_rule_or_injections(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
P
Phodal Huang 已提交
207
        stack: StackElement,
P
Phodal Huang 已提交
208
        anchor_position: i32,
P
Phodal Huang 已提交
209
    ) {
P
Phodal Huang 已提交
210 211 212 213 214 215 216
        self.match_rule(
            line_text,
            is_first_line,
            line_pos,
            stack.clone(),
            anchor_position,
        );
P
Phodal Huang 已提交
217 218 219 220 221 222 223 224 225 226
    }

    pub fn match_rule(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
        stack: StackElement,
        anchor_position: i32,
    ) {
227
        let mut rule = stack.get_rule(self);
P
Phodal Huang 已提交
228
        println!("start compile: {:?}", rule.clone().type_of());
P
Phodal Huang 已提交
229 230 231 232 233 234
        rule.compile(
            self,
            stack.end_rule,
            is_first_line,
            line_pos == anchor_position,
        );
235
        println!("{:?}", rule.type_of());
P
Phodal Huang 已提交
236
    }
P
Phodal Huang 已提交
237

238
    pub fn tokenize_line(&mut self, line_text: String, prev_state: Option<StackElement>) {
P
Phodal Huang 已提交
239 240 241
        self.tokenize(line_text, prev_state, false)
    }

P
Phodal Huang 已提交
242 243
    pub fn tokenize_line2(&self, line_text: String, prev_state: Option<StackElement>) {}
}
P
Phodal Huang 已提交
244 245 246 247

impl IRuleFactoryHelper for Grammar {}

impl IGrammarRegistry for Grammar {
P
Phodal Huang 已提交
248 249 250 251 252
    fn get_external_grammar(
        &self,
        scope_name: String,
        repository: IRawRepository,
    ) -> Option<IRawGrammar> {
P
Phodal Huang 已提交
253 254 255 256 257
        None
    }
}

impl IRuleRegistry for Grammar {
P
Phodal Huang 已提交
258 259
    fn register_id(&mut self) -> i32 {
        self.last_rule_id = self.last_rule_id + 1;
P
Phodal Huang 已提交
260
        self.last_rule_id.clone()
P
Phodal Huang 已提交
261 262
    }

P
Phodal Huang 已提交
263 264 265
    fn get_rule(&mut self, pattern_id: i32) -> Box<dyn AbstractRule> {
        if let Some(rule) = self.rule_id2desc.get_mut(&pattern_id) {
            return rule.to_owned();
P
Phodal Huang 已提交
266
        }
P
Phodal Huang 已提交
267
        Box::from(EmptyRule {})
P
Phodal Huang 已提交
268
    }
P
Phodal Huang 已提交
269

P
Phodal Huang 已提交
270
    fn register_rule(&mut self, result: Box<dyn AbstractRule>) -> Box<dyn AbstractRule> {
P
Phodal Huang 已提交
271
        self.rule_id2desc
P
Phodal Huang 已提交
272
            .insert(result.id().clone(), result.clone());
273
        result
P
Phodal Huang 已提交
274
    }
P
Phodal Huang 已提交
275 276 277 278
}

#[cfg(test)]
mod tests {
P
Phodal Huang 已提交
279
    use std::fs::File;
280
    use std::io::{Read, Write};
P
Phodal Huang 已提交
281
    use std::path::Path;
P
Phodal Huang 已提交
282

P
Phodal Huang 已提交
283
    use crate::grammar::Grammar;
P
Phodal Huang 已提交
284 285
    use crate::inter::IRawGrammar;

P
Phodal Huang 已提交
286
    #[test]
P
Phodal Huang 已提交
287
    fn should_build_json_code() {
288 289 290 291 292 293 294 295
        let code = "
#include <stdio.h>
int main() {
printf(\"Hello, World!\");
return 0;
}
";
        let grammar = to_grammar("test-cases/first-mate/fixtures/c.json", code);
296 297
        // todo: fixed it
        // assert_eq!(grammar.rule_id2desc.len(), 162);
298
        // debug_output(&grammar, String::from("program.json"));
299 300
    }

P
Phodal Huang 已提交
301 302 303
    #[test]
    fn should_build_text_grammar() {
        let code = "
P
Phodal Huang 已提交
304
GitHub 漫游指南
P
Phodal Huang 已提交
305 306
";
        let grammar = to_grammar("test-cases/first-mate/fixtures/text.json", code);
307
        assert_eq!(grammar.rule_id2desc.len(), 8);
308 309 310
    }

    fn debug_output(grammar: &Grammar, path: String) {
P
Phodal Huang 已提交
311
        let j = serde_json::to_string(&grammar.rule_id2desc).unwrap();
312
        let mut file = File::create(path).unwrap();
P
Phodal Huang 已提交
313
        match file.write_all(j.as_bytes()) {
P
Phodal Huang 已提交
314 315
            Ok(_) => {}
            Err(_) => {}
P
Phodal Huang 已提交
316
        };
P
Phodal Huang 已提交
317 318
    }

319 320 321 322
    #[test]
    fn should_build_json_grammar() {
        let code = "{}";
        let grammar = to_grammar("test-cases/first-mate/fixtures/json.json", code);
323 324 325 326 327 328 329 330 331 332
        assert_eq!(grammar.rule_id2desc.len(), 22);
        debug_output(&grammar, String::from("program.json"));
    }

    #[test]
    fn should_build_html_grammar() {
        let code = "{}";
        let grammar = to_grammar("test-cases/first-mate/fixtures/html.json", code);
        // assert_eq!(grammar.rule_id2desc.len(), 104);
        assert_eq!(grammar.rule_id2desc.len(), 67);
333 334 335
        debug_output(&grammar, String::from("program.json"));
    }

P
Phodal Huang 已提交
336 337 338 339
    #[test]
    fn should_build_makefile_grammar() {
        let code = "{}";
        let grammar = to_grammar("test-cases/first-mate/fixtures/makefile.json", code);
P
Phodal Huang 已提交
340
        // assert_eq!(grammar.rule_id2desc.len(), 82);
P
Phodal Huang 已提交
341 342 343 344
        assert_eq!(grammar.rule_id2desc.len(), 64);
        debug_output(&grammar, String::from("program.json"));
    }

345 346
    fn to_grammar(grammar_path: &str, code: &str) -> Grammar {
        let path = Path::new(grammar_path);
P
Phodal Huang 已提交
347 348 349 350 351 352
        let mut file = File::open(path).unwrap();
        let mut data = String::new();
        file.read_to_string(&mut data).unwrap();

        let g: IRawGrammar = serde_json::from_str(&data).unwrap();

P
Phodal Huang 已提交
353
        let mut grammar = Grammar::new(g);
354
        let c_code = String::from(code);
P
Phodal Huang 已提交
355 356 357
        for line in c_code.lines() {
            grammar.tokenize_line(String::from(line), None)
        }
358
        grammar
P
Phodal Huang 已提交
359 360
    }
}