grammar.rs 13.4 KB
Newer Older
1
use std::collections::BTreeMap as Map;
P
Phodal Huang 已提交
2

P
Phodal Huang 已提交
3
use crate::grammar::line_tokens::{LineTokens, TokenTypeMatcher};
4
use crate::grammar::{ScopeListElement, StackElement, MatchRuleResult};
P
Phodal Huang 已提交
5
use crate::inter::{IRawGrammar, IRawRepository, IRawRepositoryMap, IRawRule};
6
use crate::rule::rule_factory::RuleFactory;
P
Phodal Huang 已提交
7 8
use crate::rule::{AbstractRule, EmptyRule, IGrammarRegistry, IRuleFactoryHelper, IRuleRegistry, BeginWhileRule, CaptureRule};
use scie_scanner::scanner::scanner::{IOnigMatch, IOnigCaptureIndex};
P
Phodal Huang 已提交
9

P
Phodal Huang 已提交
10 11 12 13 14 15 16 17 18 19 20 21 22
pub struct IToken {
    pub start_index: i32,
    pub end_index: i32,
    pub scopes: Vec<String>,
}

pub struct ITokenizeLineResult {
    pub tokens: Vec<IToken>,
    pub rule_stack: Box<StackElement>,
}

pub struct ITokenizeLineResult2 {
    pub tokens: Vec<i32>,
P
Phodal Huang 已提交
23
    pub rule_stack: Box<StackElement>,
P
Phodal Huang 已提交
24 25 26 27 28
}

pub trait IGrammar {
    fn tokenize_line(line_text: String, prev_state: Option<StackElement>) -> ITokenizeLineResult;
    /**
P
Phodal Huang 已提交
29 30 31 32 33 34 35 36 37
     * Tokenize `lineText` using previous line state `prevState`.
     * The result contains the tokens in binary format, resolved with the following information:
     *  - language
     *  - token type (regex, string, comment, other)
     *  - font style
     *  - foreground color
     *  - background color
     * e.g. for getting the languageId: `(metadata & MetadataConsts.LANGUAGEID_MASK) >>> MetadataConsts.LANGUAGEID_OFFSET`
     */
P
Phodal Huang 已提交
38
    fn tokenize_line2(line_text: String, prev_state: Option<StackElement>) -> ITokenizeLineResult2;
P
Phodal Huang 已提交
39 40
}

P
Phodal Huang 已提交
41
pub trait Matcher {}
P
Phodal Huang 已提交
42

P
Phodal Huang 已提交
43
#[derive(Debug, Clone)]
P
Phodal Huang 已提交
44
pub struct Grammar {
45
    root_id: i32,
P
Phodal Huang 已提交
46
    grammar: IRawGrammar,
47
    pub last_rule_id: i32,
48
    pub rule_id2desc: Map<i32, Box<dyn AbstractRule>>,
P
Phodal Huang 已提交
49
    pub _token_type_matchers: Vec<TokenTypeMatcher>,
P
Phodal Huang 已提交
50 51
}

P
Phodal Huang 已提交
52
pub fn init_grammar(grammar: IRawGrammar, _base: Option<IRawRule>) -> IRawGrammar {
P
Phodal Huang 已提交
53 54 55
    let mut _grammar = grammar.clone();

    let mut new_based: IRawRule = IRawRule::new();
P
Phodal Huang 已提交
56 57 58
    if let Some(repo) = grammar.clone().repository {
        new_based.location = repo.clone().location;
    }
P
Phodal Huang 已提交
59 60
    new_based.patterns = Some(grammar.clone().patterns.clone());
    new_based.name = grammar.clone().name;
P
Phodal Huang 已提交
61 62 63 64

    let mut repository_map = IRawRepositoryMap::new();
    repository_map.base_s = Some(new_based.clone());
    repository_map.self_s = Some(new_based.clone());
P
Phodal Huang 已提交
65 66 67
    if let Some(repo) = grammar.clone().repository {
        repository_map.name_map = repo.clone().map.name_map.clone();
    }
P
Phodal Huang 已提交
68 69 70

    _grammar.repository = Some(IRawRepository {
        map: Box::new(repository_map.clone()),
71
        location: None,
P
Phodal Huang 已提交
72 73 74 75 76
    });

    _grammar
}

P
Phodal Huang 已提交
77
impl Grammar {
P
Phodal Huang 已提交
78
    pub fn new(grammar: IRawGrammar) -> Grammar {
P
Phodal Huang 已提交
79
        let _grammar = init_grammar(grammar.clone(), None);
P
Phodal Huang 已提交
80
        Grammar {
81
            last_rule_id: 0,
P
Phodal Huang 已提交
82
            grammar: _grammar,
P
Phodal Huang 已提交
83
            root_id: -1,
84
            rule_id2desc: Map::new(),
P
Phodal Huang 已提交
85
            _token_type_matchers: vec![],
P
Phodal Huang 已提交
86 87 88
        }
    }

P
Phodal Huang 已提交
89
    fn tokenize(
90
        &mut self,
P
Phodal Huang 已提交
91
        line_text: String,
92
        prev_state: Option<StackElement>,
P
Phodal Huang 已提交
93 94
        emit_binary_tokens: bool,
    ) {
95 96
        if self.root_id.clone() == -1 {
            let mut repository = self.grammar.repository.clone().unwrap();
P
Phodal Huang 已提交
97
            let based = repository.clone().map.self_s.unwrap();
P
Phodal Huang 已提交
98 99 100 101 102 103
            self.root_id = RuleFactory::get_compiled_rule_id(
                based.clone(),
                self,
                &mut repository.clone(),
                String::from(""),
            );
104
        }
P
Phodal Huang 已提交
105

P
Phodal Huang 已提交
106
        let mut is_first_line: bool = false;
107 108 109

        let mut current_state = StackElement::null();

P
Phodal Huang 已提交
110
        match prev_state.clone() {
P
Phodal Huang 已提交
111
            None => is_first_line = true,
112 113 114 115
            Some(state) => {
                if state == StackElement::null() {
                    is_first_line = true
                }
116 117

                current_state = state;
P
Phodal Huang 已提交
118
            }
119
        }
P
Phodal Huang 已提交
120

P
Phodal Huang 已提交
121
        if is_first_line {
P
Phodal Huang 已提交
122 123 124 125 126 127 128 129 130 131
            // let scope_list = ScopeListElement::default();
            let _root_scope_name = self.get_rule(self.root_id.clone())
                .get_name(None, None);
            let mut root_scope_name = String::from("unknown");
            if let Some(name) = _root_scope_name {
                root_scope_name = name
            }

            let scope_list = ScopeListElement::new(
                None,
P
Phodal Huang 已提交
132
                root_scope_name,
P
Phodal Huang 已提交
133
            );
134
            let mut state = StackElement::new(
P
Phodal Huang 已提交
135 136 137 138 139 140 141 142
                None,
                self.root_id.clone(),
                -1,
                -1,
                false,
                None,
                scope_list.clone(),
                scope_list.clone(),
143 144 145
            );

            current_state = state;
P
Phodal Huang 已提交
146 147
        } else {
            is_first_line = false;
P
Phodal Huang 已提交
148 149
        }

P
Phodal Huang 已提交
150
        let format_line_text = format!("{:?}\n", line_text);
P
Phodal Huang 已提交
151 152 153 154 155
        let line_tokens = LineTokens::new(
            emit_binary_tokens,
            line_text,
            self._token_type_matchers.clone(),
        );
P
Phodal Huang 已提交
156 157 158 159
        self.tokenize_string(
            format_line_text.parse().unwrap(),
            is_first_line,
            0,
160
            &mut current_state,
P
Phodal Huang 已提交
161 162
            line_tokens,
            true,
163
        );
P
Phodal Huang 已提交
164 165
    }

P
Phodal Huang 已提交
166 167 168
    pub fn tokenize_string(
        &mut self,
        line_text: String,
169 170
        origin_is_first: bool,
        origin_line_pos: i32,
171 172
        stack: &mut StackElement,
        mut line_tokens: LineTokens,
P
Phodal Huang 已提交
173
        check_while_conditions: bool,
174
    ) -> Option<StackElement> {
P
Phodal Huang 已提交
175
        let _line_length = line_text.len();
176
        let mut _stop = false;
P
Phodal Huang 已提交
177
        let mut anchor_position = -1;
P
Phodal Huang 已提交
178

179

P
Phodal Huang 已提交
180
        if check_while_conditions {
P
Phodal Huang 已提交
181 182 183
            // todo: add realy logic
            self.check_while_conditions(
                line_text.clone(),
184 185
                origin_is_first.clone(),
                origin_line_pos.clone(),
186
                stack.clone(),
P
Phodal Huang 已提交
187 188
                line_tokens.clone(),
            );
P
Phodal Huang 已提交
189 190
        }

191 192 193 194

        let mut line_pos = origin_line_pos.clone();
        let mut is_first_line = origin_is_first.clone();
        while !_stop {
195
            let r = self.match_rule(line_text.clone(), is_first_line, line_pos, stack, anchor_position);
196 197
            if let None = r {
                _stop = true;
P
Phodal Huang 已提交
198
                return None;
199 200
            }

P
Phodal Huang 已提交
201 202 203 204 205 206 207
            let capture_result = r.unwrap();
            let capture_indices = capture_result.capture_indices;
            let matched_rule_id = capture_result.matched_rule_id;
            if matched_rule_id == -1 {
                println!("todo: matched the `end` for this rule => pop it");
            } else {
                let rule = self.get_rule(matched_rule_id);
208 209 210
                line_tokens.produce(stack, capture_indices[0].start as i32);
                let before_push = stack.clone();
                let scope_name = rule.get_name(Some(line_text.clone()), Some(capture_indices.clone()));
P
Phodal Huang 已提交
211
                let name_scopes_list = stack.content_name_scopes_list.clone().push(self, scope_name);
P
Phodal Huang 已提交
212 213 214 215
                let mut begin_rule_capture_eol = false;
                if capture_indices[0].end == _line_length {
                    begin_rule_capture_eol = true;
                }
P
Phodal Huang 已提交
216 217 218 219 220 221 222
                let new_stack = stack.clone().push(
                    matched_rule_id,
                    line_pos,
                    anchor_position,
                    begin_rule_capture_eol,
                    None,
                    name_scopes_list.clone(),
P
Phodal Huang 已提交
223
                    name_scopes_list.clone(),
P
Phodal Huang 已提交
224 225
                );

P
Phodal Huang 已提交
226 227 228 229
                if rule.type_of() == "BeginEndRule" {} else if rule.type_of() == "BeginWhileRule" {
                    // let pushed = rule.clone() as BeginWhileRule;
                    // Grammar::handle_captures(self, line_text.clone(), is_first_line, new_stack, line_tokens.clone(), pushed.begin_captures, capture_indices.clone());
                } else {}
P
Phodal Huang 已提交
230 231
            }

232 233 234 235 236
            if capture_indices[0].end > line_pos as usize {
                line_pos = capture_indices[0].end as i32;
                is_first_line = false;
            }
        }
237
        Some(stack.clone())
P
Phodal Huang 已提交
238 239
    }

P
Phodal Huang 已提交
240 241 242 243
    pub fn handle_captures(grammar: &mut Grammar, line_text: String, is_first_line: bool, stack: StackElement, line_tokens: LineTokens, captures: Vec<Box<dyn AbstractRule>>, captureIndices: Vec<IOnigCaptureIndex>) {

    }

P
Phodal Huang 已提交
244 245 246 247 248
    pub fn check_while_conditions(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
P
Phodal Huang 已提交
249
        _stack: StackElement,
P
Phodal Huang 已提交
250 251 252
        line_tokens: LineTokens,
    ) {
        let mut anchor_position = -1;
P
Phodal Huang 已提交
253 254 255
        if _stack.begin_rule_captured_eol {
            anchor_position = 0
        }
P
Phodal Huang 已提交
256 257
        // let while_rules = vec![];
    }
P
Phodal Huang 已提交
258

P
Phodal Huang 已提交
259 260 261 262 263
    pub fn match_rule_or_injections(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
P
Phodal Huang 已提交
264
        stack: &mut StackElement,
P
Phodal Huang 已提交
265
        anchor_position: i32,
P
Phodal Huang 已提交
266
    ) {
267
        let match_result = self.match_rule(
P
Phodal Huang 已提交
268 269 270
            line_text,
            is_first_line,
            line_pos,
P
Phodal Huang 已提交
271
            stack,
P
Phodal Huang 已提交
272 273
            anchor_position,
        );
274 275 276 277
        if let Some(result) = match_result {} else {
            // None
        };
        // todo: get injections logic
P
Phodal Huang 已提交
278 279 280 281 282 283 284
    }

    pub fn match_rule(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
P
Phodal Huang 已提交
285
        stack: &mut StackElement,
P
Phodal Huang 已提交
286
        anchor_position: i32,
287
    ) -> Option<MatchRuleResult> {
288
        let mut rule = stack.get_rule(self);
P
Phodal Huang 已提交
289
        let mut rule_scanner = rule.compile(
P
Phodal Huang 已提交
290
            self,
P
Phodal Huang 已提交
291
            stack.end_rule.clone(),
P
Phodal Huang 已提交
292 293 294
            is_first_line,
            line_pos == anchor_position,
        );
P
Phodal Huang 已提交
295 296
        let r = rule_scanner.scanner.find_next_match_sync(line_text, line_pos);
        if let Some(result) = r {
297 298
            let match_rule_result = MatchRuleResult {
                capture_indices: result.capture_indices,
299
                matched_rule_id: rule_scanner.rules[result.index],
300 301 302 303
            };

            println!("{:?}", match_rule_result.clone());
            Some(match_rule_result)
P
Phodal Huang 已提交
304 305 306
        } else {
            None
        }
P
Phodal Huang 已提交
307
    }
P
Phodal Huang 已提交
308

309
    pub fn tokenize_line(&mut self, line_text: String, prev_state: Option<StackElement>) {
P
Phodal Huang 已提交
310 311 312
        self.tokenize(line_text, prev_state, false)
    }

P
Phodal Huang 已提交
313 314
    pub fn tokenize_line2(&self, line_text: String, prev_state: Option<StackElement>) {}
}
P
Phodal Huang 已提交
315 316 317 318

impl IRuleFactoryHelper for Grammar {}

impl IGrammarRegistry for Grammar {
P
Phodal Huang 已提交
319 320 321 322 323
    fn get_external_grammar(
        &self,
        scope_name: String,
        repository: IRawRepository,
    ) -> Option<IRawGrammar> {
P
Phodal Huang 已提交
324 325 326 327 328
        None
    }
}

impl IRuleRegistry for Grammar {
P
Phodal Huang 已提交
329 330
    fn register_id(&mut self) -> i32 {
        self.last_rule_id = self.last_rule_id + 1;
P
Phodal Huang 已提交
331
        self.last_rule_id.clone()
P
Phodal Huang 已提交
332 333
    }

P
Phodal Huang 已提交
334 335 336
    fn get_rule(&mut self, pattern_id: i32) -> Box<dyn AbstractRule> {
        if let Some(rule) = self.rule_id2desc.get_mut(&pattern_id) {
            return rule.to_owned();
P
Phodal Huang 已提交
337
        }
P
Phodal Huang 已提交
338
        Box::from(EmptyRule {})
P
Phodal Huang 已提交
339
    }
P
Phodal Huang 已提交
340

P
Phodal Huang 已提交
341
    fn register_rule(&mut self, result: Box<dyn AbstractRule>) -> Box<dyn AbstractRule> {
P
Phodal Huang 已提交
342
        self.rule_id2desc
P
Phodal Huang 已提交
343
            .insert(result.id().clone(), result.clone());
344
        result
P
Phodal Huang 已提交
345
    }
P
Phodal Huang 已提交
346 347 348 349
}

#[cfg(test)]
mod tests {
P
Phodal Huang 已提交
350
    use std::fs::File;
351
    use std::io::{Read, Write};
P
Phodal Huang 已提交
352
    use std::path::Path;
P
Phodal Huang 已提交
353

P
Phodal Huang 已提交
354
    use crate::grammar::Grammar;
P
Phodal Huang 已提交
355
    use crate::inter::IRawGrammar;
356
    use crate::rule::IRuleRegistry;
P
Phodal Huang 已提交
357

P
Phodal Huang 已提交
358
    #[test]
P
Phodal Huang 已提交
359
    fn should_build_json_code() {
360 361 362 363 364 365 366 367
        let code = "
#include <stdio.h>
int main() {
printf(\"Hello, World!\");
return 0;
}
";
        let grammar = to_grammar("test-cases/first-mate/fixtures/c.json", code);
368
        // assert_eq!(grammar.rule_id2desc.len(), 162);
369
        // debug_output(&grammar, String::from("program.json"));
370 371
    }

P
Phodal Huang 已提交
372 373 374
    #[test]
    fn should_build_text_grammar() {
        let code = "
P
Phodal Huang 已提交
375
GitHub 漫游指南
P
Phodal Huang 已提交
376 377
";
        let grammar = to_grammar("test-cases/first-mate/fixtures/text.json", code);
378
        assert_eq!(grammar.rule_id2desc.len(), 8);
379 380 381
    }

    fn debug_output(grammar: &Grammar, path: String) {
P
Phodal Huang 已提交
382
        let j = serde_json::to_string(&grammar.rule_id2desc).unwrap();
383
        let mut file = File::create(path).unwrap();
P
Phodal Huang 已提交
384
        match file.write_all(j.as_bytes()) {
P
Phodal Huang 已提交
385 386
            Ok(_) => {}
            Err(_) => {}
P
Phodal Huang 已提交
387
        };
P
Phodal Huang 已提交
388 389
    }

390 391 392 393
    #[test]
    fn should_build_json_grammar() {
        let code = "{}";
        let grammar = to_grammar("test-cases/first-mate/fixtures/json.json", code);
394 395 396 397 398 399 400 401 402
        assert_eq!(grammar.rule_id2desc.len(), 22);
        debug_output(&grammar, String::from("program.json"));
    }

    #[test]
    fn should_build_html_grammar() {
        let code = "{}";
        let grammar = to_grammar("test-cases/first-mate/fixtures/html.json", code);
        assert_eq!(grammar.rule_id2desc.len(), 67);
403 404 405
        debug_output(&grammar, String::from("program.json"));
    }

P
Phodal Huang 已提交
406 407
    #[test]
    fn should_build_makefile_grammar() {
408 409 410 411 412 413 414 415 416 417
        let code = "CC=gcc
CFLAGS=-I.
DEPS = hellomake.h
OBJ = hellomake.o hellofunc.o

%.o: %.c $(DEPS)
	$(CC) -c -o $@ $< $(CFLAGS)

hellomake: $(OBJ)
	$(CC) -o $@ $^ $(CFLAGS)
P
Phodal Huang 已提交
418
";
419
        let mut grammar = to_grammar("test-cases/first-mate/fixtures/makefile.json", code);
P
Phodal Huang 已提交
420
        assert_eq!(grammar.rule_id2desc.len(), 64);
421
        assert_eq!(grammar.get_rule(1).patterns_length(), 4);
P
Phodal Huang 已提交
422 423 424
        debug_output(&grammar, String::from("program.json"));
    }

425 426
    fn to_grammar(grammar_path: &str, code: &str) -> Grammar {
        let path = Path::new(grammar_path);
P
Phodal Huang 已提交
427 428 429 430 431 432
        let mut file = File::open(path).unwrap();
        let mut data = String::new();
        file.read_to_string(&mut data).unwrap();

        let g: IRawGrammar = serde_json::from_str(&data).unwrap();

P
Phodal Huang 已提交
433
        let mut grammar = Grammar::new(g);
434
        let c_code = String::from(code);
P
Phodal Huang 已提交
435 436 437
        for line in c_code.lines() {
            grammar.tokenize_line(String::from(line), None)
        }
438
        grammar
P
Phodal Huang 已提交
439 440
    }
}