grammar.rs 27.6 KB
Newer Older
1
use std::collections::BTreeMap as Map;
P
Phodal Huang 已提交
2

P
Phodal Huang 已提交
3
use crate::grammar::line_tokens::{IToken, LineTokens, TokenTypeMatcher};
P
Phodal Huang 已提交
4 5
use crate::grammar::local_stack_element::LocalStackElement;
use crate::grammar::{MatchRuleResult, ScopeListElement, StackElement};
P
Phodal Huang 已提交
6
use crate::inter::{IRawGrammar, IRawRepository, IRawRepositoryMap, IRawRule};
P
Phodal Huang 已提交
7
use crate::rule::abstract_rule::RuleEnum;
P
Phodal Huang 已提交
8
use crate::rule::rule_factory::RuleFactory;
P
Phodal Huang 已提交
9 10 11
use crate::rule::{
    AbstractRule, BeginWhileRule, EmptyRule, IGrammarRegistry, IRuleFactoryHelper, IRuleRegistry,
};
P
Phodal Huang 已提交
12
use core::cmp;
P
Phodal Huang 已提交
13
use scie_scanner::scanner::scanner::IOnigCaptureIndex;
P
Phodal Huang 已提交
14 15
use std::fs::File;
use std::io::Read;
16
use std::path::Path;
P
Phodal Huang 已提交
17

P
Phodal Huang 已提交
18
pub trait Matcher {}
P
Phodal Huang 已提交
19

P
Phodal Huang 已提交
20 21 22
#[derive(Debug, Clone)]
pub struct CheckWhileRuleResult {
    pub rule: Box<BeginWhileRule>,
23 24 25 26 27 28
    pub stack: Box<StackElement>,
}

#[derive(Debug, Clone)]
pub struct CheckWhileConditionResult {
    pub stack: Box<StackElement>,
P
Phodal Huang 已提交
29
    pub line_pos: i32,
30
    pub anchor_position: i32,
P
Phodal Huang 已提交
31
    pub is_first_line: bool,
P
Phodal Huang 已提交
32 33
}

P
Phodal Huang 已提交
34 35
#[derive(Debug, Clone)]
pub struct TokenizeResult {
P
Phodal Huang 已提交
36 37
    pub tokens: Vec<IToken>,
    pub rule_stack: Box<Option<StackElement>>,
P
Phodal Huang 已提交
38 39
}

P
Phodal Huang 已提交
40
#[derive(Debug, Clone)]
P
Phodal Huang 已提交
41
pub struct Grammar {
42
    root_id: i32,
P
Phodal Huang 已提交
43
    grammar: IRawGrammar,
44
    pub last_rule_id: i32,
45
    pub rule_id2desc: Map<i32, Box<dyn AbstractRule>>,
P
Phodal Huang 已提交
46
    pub _token_type_matchers: Vec<TokenTypeMatcher>,
P
Phodal Huang 已提交
47 48
}

P
Phodal Huang 已提交
49
pub fn init_grammar(grammar: IRawGrammar, _base: Option<IRawRule>) -> IRawGrammar {
P
Phodal Huang 已提交
50 51 52
    let mut _grammar = grammar.clone();

    let mut new_based: IRawRule = IRawRule::new();
P
Phodal Huang 已提交
53 54 55
    if let Some(repo) = grammar.clone().repository {
        new_based.location = repo.clone().location;
    }
P
Phodal Huang 已提交
56 57
    new_based.patterns = Some(grammar.clone().patterns.clone());
    new_based.name = grammar.clone().name;
P
Phodal Huang 已提交
58 59

    let mut repository_map = IRawRepositoryMap::new();
P
Phodal Huang 已提交
60 61
    repository_map.base_s = Some(Box::from(new_based.clone()));
    repository_map.self_s = Some(Box::from(new_based.clone()));
P
Phodal Huang 已提交
62 63 64
    if let Some(repo) = grammar.clone().repository {
        repository_map.name_map = repo.clone().map.name_map.clone();
    }
P
Phodal Huang 已提交
65 66 67

    _grammar.repository = Some(IRawRepository {
        map: Box::new(repository_map.clone()),
68
        location: None,
P
Phodal Huang 已提交
69 70 71 72 73
    });

    _grammar
}

P
Phodal Huang 已提交
74
impl Grammar {
P
Phodal Huang 已提交
75
    pub fn new(grammar: IRawGrammar) -> Grammar {
P
Phodal Huang 已提交
76
        let _grammar = init_grammar(grammar.clone(), None);
P
Phodal Huang 已提交
77
        Grammar {
78
            last_rule_id: 0,
P
Phodal Huang 已提交
79
            grammar: _grammar,
P
Phodal Huang 已提交
80
            root_id: -1,
81
            rule_id2desc: Map::new(),
P
Phodal Huang 已提交
82
            _token_type_matchers: vec![],
P
Phodal Huang 已提交
83 84 85
        }
    }

P
Phodal Huang 已提交
86
    fn tokenize(
87
        &mut self,
P
Phodal Huang 已提交
88
        line_text: String,
P
Phodal Huang 已提交
89
        prev_state: &mut Option<StackElement>,
P
Phodal Huang 已提交
90
        emit_binary_tokens: bool,
P
Phodal Huang 已提交
91
    ) -> TokenizeResult {
92
        if self.root_id.clone() == -1 {
P
Phodal Huang 已提交
93
            let repository = self.grammar.repository.clone().unwrap();
P
Phodal Huang 已提交
94
            let based = repository.clone().map.self_s.unwrap();
P
Phodal Huang 已提交
95
            self.root_id = RuleFactory::get_compiled_rule_id(
P
Phodal Huang 已提交
96
                *based.clone(),
P
Phodal Huang 已提交
97 98 99 100
                self,
                &mut repository.clone(),
                String::from(""),
            );
101
        }
P
Phodal Huang 已提交
102

P
Phodal Huang 已提交
103
        let mut is_first_line: bool = false;
104 105

        let mut current_state = StackElement::null();
P
Phodal Huang 已提交
106
        match prev_state.clone() {
P
Phodal Huang 已提交
107
            None => is_first_line = true,
108 109 110 111
            Some(state) => {
                if state == StackElement::null() {
                    is_first_line = true
                }
112 113

                current_state = state;
P
Phodal Huang 已提交
114
            }
115
        }
P
Phodal Huang 已提交
116

P
Phodal Huang 已提交
117
        if is_first_line {
P
Phodal Huang 已提交
118
            let _root_scope_name = self.get_rule(self.root_id.clone()).get_name(None, None);
P
Phodal Huang 已提交
119 120 121 122 123
            let mut root_scope_name = String::from("unknown");
            if let Some(name) = _root_scope_name {
                root_scope_name = name
            }

P
Phodal Huang 已提交
124
            let scope_list = ScopeListElement::new(None, root_scope_name);
125
            let state = StackElement::new(
P
Phodal Huang 已提交
126 127 128 129 130 131 132 133
                None,
                self.root_id.clone(),
                -1,
                -1,
                false,
                None,
                scope_list.clone(),
                scope_list.clone(),
134 135 136
            );

            current_state = state;
P
Phodal Huang 已提交
137 138
        } else {
            is_first_line = false;
P
Phodal Huang 已提交
139
            current_state.reset();
P
Phodal Huang 已提交
140 141
        }

142
        let format_line_text = line_text.clone() + "\n";
P
Phodal Huang 已提交
143
        let mut line_tokens = LineTokens::new(
P
Phodal Huang 已提交
144 145 146 147
            emit_binary_tokens,
            line_text,
            self._token_type_matchers.clone(),
        );
P
Phodal Huang 已提交
148 149
        let next_state = self.tokenize_string(
            format_line_text.clone(),
P
Phodal Huang 已提交
150 151
            is_first_line,
            0,
P
Phodal Huang 已提交
152
            current_state,
153
            &mut line_tokens,
P
Phodal Huang 已提交
154
            true,
155
        );
P
Phodal Huang 已提交
156

P
Phodal Huang 已提交
157 158 159
        let line_length = format_line_text.clone().len();
        let stack = &mut next_state.clone().unwrap();
        let vec = line_tokens.get_result(stack, line_length as i32);
P
Phodal Huang 已提交
160
        TokenizeResult {
P
Phodal Huang 已提交
161 162
            tokens: vec,
            rule_stack: Box::new(next_state.clone()),
P
Phodal Huang 已提交
163
        }
P
Phodal Huang 已提交
164 165
    }

P
Phodal Huang 已提交
166 167 168
    pub fn tokenize_string(
        &mut self,
        line_text: String,
P
Phodal Huang 已提交
169
        mut is_first_line: bool,
170
        origin_line_pos: i32,
P
Phodal Huang 已提交
171
        mut stack: StackElement,
P
Phodal Huang 已提交
172
        line_tokens: &mut LineTokens,
P
Phodal Huang 已提交
173
        check_while_conditions: bool,
174
    ) -> Option<StackElement> {
P
Phodal Huang 已提交
175
        let _line_length = line_text.len();
176
        let mut _stop = false;
177
        let mut anchor_position = -1;
178
        let mut line_pos = origin_line_pos.clone();
P
Phodal Huang 已提交
179 180

        if check_while_conditions {
181
            let while_check_result = self.check_while_conditions(
P
Phodal Huang 已提交
182
                line_text.clone(),
P
Phodal Huang 已提交
183
                is_first_line.clone(),
184
                origin_line_pos.clone(),
185
                stack.clone(),
P
Phodal Huang 已提交
186 187
                line_tokens.clone(),
            );
188 189 190 191
            stack = *while_check_result.stack;
            line_pos = while_check_result.line_pos;
            is_first_line = while_check_result.is_first_line;
            anchor_position = while_check_result.anchor_position;
P
Phodal Huang 已提交
192 193
        }

194
        while !_stop {
P
Phodal Huang 已提交
195 196 197 198
            let r = self.match_rule(
                line_text.clone(),
                is_first_line,
                line_pos,
P
Phodal Huang 已提交
199
                &mut stack,
P
Phodal Huang 已提交
200 201
                anchor_position,
            );
202
            if let None = r {
P
Phodal Huang 已提交
203
                line_tokens.produce(&mut stack, _line_length as i32);
204
                _stop = true;
P
Phodal Huang 已提交
205
                return Some(stack.clone());
206 207
            }

P
Phodal Huang 已提交
208 209 210 211
            let capture_result = r.unwrap();
            let capture_indices = capture_result.capture_indices;
            let matched_rule_id = capture_result.matched_rule_id;
            if matched_rule_id == -1 {
212 213 214
                let _popped_rule = stack.get_rule(self);
                if let RuleEnum::BeginEndRule(popped_rule) = _popped_rule.get_rule_instance() {
                    let name_scopes_list = stack.clone().name_scopes_list;
P
Phodal Huang 已提交
215
                    line_tokens.produce(&mut stack, capture_indices[0].start.clone() as i32);
216 217 218
                    stack = stack.set_content_name_scopes_list(name_scopes_list);
                    Grammar::handle_captures(
                        self,
219
                        line_text.clone(),
220 221 222 223 224 225 226 227 228 229 230 231 232
                        is_first_line,
                        &mut stack,
                        line_tokens,
                        popped_rule.end_captures,
                        capture_indices.clone(),
                    );

                    line_tokens.produce(&mut stack, capture_indices[0].end as i32);
                    let popped = stack.clone();
                    if let Some(_stack) = stack.pop() {
                        stack = _stack;
                    }
                    anchor_position = popped.anchor_pos;
P
Phodal Huang 已提交
233 234
                } else {
                    println!("_popped_rule {:?}", _popped_rule.clone());
P
Phodal Huang 已提交
235 236
                    _stop = true;
                    return Some(stack.clone());
237
                }
P
Phodal Huang 已提交
238 239
            } else {
                let rule = self.get_rule(matched_rule_id);
P
Phodal Huang 已提交
240
                line_tokens.produce(&mut stack, capture_indices[0].start as i32);
P
Phodal Huang 已提交
241
                // let before_push = stack.clone();
P
Phodal Huang 已提交
242 243
                let scope_name =
                    rule.get_name(Some(line_text.clone()), Some(capture_indices.clone()));
P
Phodal Huang 已提交
244
                let name_scopes_list = stack.content_name_scopes_list.push(self, scope_name);
P
Phodal Huang 已提交
245 246 247 248
                let mut begin_rule_capture_eol = false;
                if capture_indices[0].end == _line_length {
                    begin_rule_capture_eol = true;
                }
P
Phodal Huang 已提交
249
                stack = stack.push(
P
Phodal Huang 已提交
250 251 252 253 254 255
                    matched_rule_id,
                    line_pos,
                    anchor_position,
                    begin_rule_capture_eol,
                    None,
                    name_scopes_list.clone(),
P
Phodal Huang 已提交
256
                    name_scopes_list.clone(),
P
Phodal Huang 已提交
257 258
                );

P
Phodal Huang 已提交
259 260
                match rule.get_rule_instance() {
                    RuleEnum::BeginEndRule(begin_rule) => {
261
                        let push_rule = begin_rule.clone();
P
Phodal Huang 已提交
262
                        Grammar::handle_captures(
P
Phodal Huang 已提交
263 264 265
                            self,
                            line_text.clone(),
                            is_first_line,
P
Phodal Huang 已提交
266
                            &mut stack,
267
                            line_tokens,
P
Phodal Huang 已提交
268 269 270
                            begin_rule.begin_captures,
                            capture_indices.clone(),
                        );
P
Phodal Huang 已提交
271

P
Phodal Huang 已提交
272
                        line_tokens.produce(&mut stack, capture_indices[0].end.clone() as i32);
273
                        anchor_position = capture_indices[0].end.clone() as i32;
P
Phodal Huang 已提交
274 275 276 277
                        let content_name = push_rule.get_content_name(
                            Some(line_text.clone()),
                            Some(capture_indices.clone()),
                        );
P
Phodal Huang 已提交
278
                        let _content_name_scopes_list = name_scopes_list.push(self, content_name);
279
                        stack = stack.set_content_name_scopes_list(_content_name_scopes_list);
280
                    }
P
Phodal Huang 已提交
281
                    RuleEnum::BeginWhileRule(_while_rule) => {
P
Phodal Huang 已提交
282
                        println!("todo: RuleEnum - BeginWhileRule");
283
                        _stop = true;
P
Phodal Huang 已提交
284
                        return Some(stack.clone());
285
                    }
P
Phodal Huang 已提交
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
                    RuleEnum::MatchRule(match_rule) => {
                        Grammar::handle_captures(
                            self,
                            line_text.clone(),
                            is_first_line,
                            &mut stack,
                            line_tokens,
                            match_rule.captures,
                            capture_indices.clone(),
                        );
                        line_tokens.produce(&mut stack, capture_indices[0].end.clone() as i32);
                        if let Some(_stack) = stack.pop() {
                            stack = _stack;
                        }
                    }
301
                    _ => {
P
Phodal Huang 已提交
302
                        println!("todo: RuleEnum - Others");
303
                        _stop = true;
P
Phodal Huang 已提交
304
                        return Some(stack.clone());
P
Phodal Huang 已提交
305 306
                    }
                }
P
Phodal Huang 已提交
307
            }
308 309 310 311 312

            if capture_indices[0].end > line_pos as usize {
                line_pos = capture_indices[0].end as i32;
                is_first_line = false;
            }
313
        }
314
        Some(stack.clone())
P
Phodal Huang 已提交
315 316
    }

P
Phodal Huang 已提交
317 318 319 320
    pub fn handle_captures(
        grammar: &mut Grammar,
        line_text: String,
        is_first_line: bool,
321 322
        stack: &mut StackElement,
        line_tokens: &mut LineTokens,
P
Phodal Huang 已提交
323 324
        captures: Vec<Box<dyn AbstractRule>>,
        capture_indices: Vec<IOnigCaptureIndex>,
P
Phodal Huang 已提交
325
    ) -> Option<LineTokens> {
P
Phodal Huang 已提交
326 327
        let captures_len = captures.clone().len();
        if captures_len == 0 {
P
Phodal Huang 已提交
328
            return None;
P
Phodal Huang 已提交
329 330 331 332 333 334 335
        }

        let len = cmp::min(captures_len, capture_indices.len());
        let mut local_stack: Vec<LocalStackElement> = vec![];
        let max_end = capture_indices[0].end;
        for i in 0..len {
            let capture_rule = captures[i].clone();
P
Phodal Huang 已提交
336 337 338 339
            if let RuleEnum::CaptureRule(capture) = capture_rule.get_rule_instance() {
                if capture.clone().rule._type == "" {
                    continue;
                }
P
Phodal Huang 已提交
340

P
Phodal Huang 已提交
341 342 343 344
                let capture_index = capture_indices[i].clone();
                if capture_index.length == 0 {
                    continue;
                }
P
Phodal Huang 已提交
345

P
Phodal Huang 已提交
346 347 348
                if capture_index.start > max_end {
                    continue;
                }
349

P
Phodal Huang 已提交
350 351 352 353 354 355 356 357 358 359
                while local_stack.len() > 0
                    && local_stack[local_stack.len() - 1].end_pos <= capture_index.start as i32
                {
                    let mut local_stack_element = local_stack[local_stack.len() - 1].clone();
                    line_tokens.produce_from_scopes(
                        &mut local_stack_element.scopes,
                        local_stack_element.end_pos,
                    );
                    local_stack.pop();
                }
360

P
Phodal Huang 已提交
361 362 363 364 365 366 367 368 369
                if local_stack.len() > 0 {
                    let mut local_stack_element = local_stack[local_stack.len() - 1].clone();
                    line_tokens.produce_from_scopes(
                        &mut local_stack_element.scopes,
                        capture_index.start as i32,
                    );
                } else {
                    line_tokens.produce(stack, capture_index.start as i32);
                }
370

P
Phodal Huang 已提交
371
                if capture.retokenize_captured_with_rule_id != 0 {
P
Phodal Huang 已提交
372 373 374 375 376
                    let scope_name =
                        capture.get_name(Some(line_text.clone()), Some(capture_indices.clone()));
                    let name_scopes_list = stack.content_name_scopes_list.push(grammar, scope_name);
                    let content_name = capture
                        .get_content_name(Some(line_text.clone()), Some(capture_indices.clone()));
P
Phodal Huang 已提交
377 378
                    let content_name_scopes_list = name_scopes_list.push(grammar, content_name);

379
                    let stack_clone = stack.clone().push(
P
Phodal Huang 已提交
380 381 382 383 384 385 386 387 388 389 390 391 392
                        capture.retokenize_captured_with_rule_id,
                        capture_index.start.clone() as i32,
                        -1,
                        false,
                        None,
                        name_scopes_list,
                        content_name_scopes_list,
                    );

                    let sub_text = line_text.split_at(capture_index.end).0;
                    let mut sub_is_first_line = false;
                    if is_first_line && capture_index.start == 0 {
                        sub_is_first_line = true;
393
                    }
P
Phodal Huang 已提交
394 395 396 397 398 399 400 401 402 403 404
                    Grammar::tokenize_string(
                        grammar,
                        String::from(sub_text),
                        sub_is_first_line,
                        capture_index.start as i32,
                        stack_clone,
                        line_tokens,
                        false,
                    );
                    // todo: find the next_text_not_switch_issues
                    continue;
405
                }
P
Phodal Huang 已提交
406

P
Phodal Huang 已提交
407 408
                let capture_scope_name =
                    capture_rule.get_name(Some(line_text.clone()), Some(capture_indices.clone()));
P
Phodal Huang 已提交
409
                if let Some(_name) = capture_scope_name.clone() {
410
                    let mut base = stack.content_name_scopes_list.clone();
P
Phodal Huang 已提交
411
                    if local_stack.len() > 0 {
412
                        base = local_stack[local_stack.len() - 1].scopes.clone();
P
Phodal Huang 已提交
413 414 415 416 417 418
                    }
                    let capture_rule_scopes_list = base.push(grammar, capture_scope_name.clone());
                    local_stack.push(LocalStackElement::new(
                        capture_rule_scopes_list,
                        capture_index.end as i32,
                    ));
P
Phodal Huang 已提交
419
                }
P
Phodal Huang 已提交
420 421
            } else {
                println!("lose rule: {:?}", capture_rule.clone());
P
Phodal Huang 已提交
422 423 424 425
            }
        }

        while local_stack.len() > 0 {
P
Phodal Huang 已提交
426 427
            let mut last_stack = local_stack[local_stack.len() - 1].clone();
            line_tokens.produce_from_scopes(&mut last_stack.scopes, last_stack.end_pos);
P
Phodal Huang 已提交
428
            local_stack.pop();
P
Phodal Huang 已提交
429
        }
P
Phodal Huang 已提交
430 431

        return Some(line_tokens.to_owned());
P
Phodal Huang 已提交
432
    }
P
Phodal Huang 已提交
433 434 435 436 437
    /**
     * Walk the stack from bottom to top, and check each while condition in this order.
     * If any fails, cut off the entire stack above the failed while condition. While conditions
     * may also advance the linePosition.
     */
P
Phodal Huang 已提交
438 439 440 441 442
    pub fn check_while_conditions(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
443
        mut stack: StackElement,
P
Phodal Huang 已提交
444
        _line_tokens: LineTokens,
445
    ) -> CheckWhileConditionResult {
P
Phodal Huang 已提交
446
        let mut anchor_position = -1;
P
Phodal Huang 已提交
447
        if stack.begin_rule_captured_eol {
P
Phodal Huang 已提交
448 449
            anchor_position = 0
        }
P
Phodal Huang 已提交
450 451 452 453 454 455 456 457
        let mut while_rules = vec![];
        let mut has_node = true;
        let mut node = stack.clone();
        while has_node {
            let rule = node.clone().get_rule(self);
            if let RuleEnum::BeginWhileRule(begin_rule) = rule.get_rule_instance() {
                while_rules.push(CheckWhileRuleResult {
                    rule: Box::from(begin_rule),
458
                    stack: Box::from(node.clone()),
P
Phodal Huang 已提交
459 460 461 462
                })
            }

            match node.pop() {
P
Phodal Huang 已提交
463
                None => has_node = false,
P
Phodal Huang 已提交
464 465
                Some(n) => {
                    node = n;
466
                }
P
Phodal Huang 已提交
467 468 469
            }
        }

470 471 472 473 474 475 476 477
        for while_rule in while_rules.clone() {
            let allow_g = anchor_position == line_pos;
            let mut rule_scanner = while_rule.clone().rule.compile_while(
                self,
                while_rule.clone().stack.end_rule,
                is_first_line,
                allow_g,
            );
P
Phodal Huang 已提交
478 479 480
            let match_result = rule_scanner
                .scanner
                .find_next_match_sync(line_text.clone(), line_pos);
481 482 483 484 485 486 487 488 489
            match match_result {
                None => {
                    stack = while_rule.stack.pop().unwrap();
                    break;
                }
                Some(_) => {
                    println!("todo: check_while_conditions");
                }
            }
P
Phodal Huang 已提交
490 491
        }

492 493 494 495 496
        // println!("{:?}", while_rules);
        CheckWhileConditionResult {
            stack: Box::new(stack),
            line_pos,
            anchor_position,
P
Phodal Huang 已提交
497
            is_first_line,
498
        }
P
Phodal Huang 已提交
499
    }
P
Phodal Huang 已提交
500

P
Phodal Huang 已提交
501 502 503 504 505
    pub fn match_rule_or_injections(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
P
Phodal Huang 已提交
506
        stack: &mut StackElement,
P
Phodal Huang 已提交
507
        anchor_position: i32,
P
Phodal Huang 已提交
508
    ) {
P
Phodal Huang 已提交
509 510
        let match_result =
            self.match_rule(line_text, is_first_line, line_pos, stack, anchor_position);
P
Phodal Huang 已提交
511 512
        if let Some(_result) = match_result {
        } else {
513 514 515
            // None
        };
        // todo: get injections logic
P
Phodal Huang 已提交
516 517 518 519 520 521 522
    }

    pub fn match_rule(
        &mut self,
        line_text: String,
        is_first_line: bool,
        line_pos: i32,
P
Phodal Huang 已提交
523
        stack: &mut StackElement,
P
Phodal Huang 已提交
524
        anchor_position: i32,
525
    ) -> Option<MatchRuleResult> {
526
        let mut rule = stack.get_rule(self);
P
Phodal Huang 已提交
527
        let rule_info = rule.clone().get_rule_instance();
P
Phodal Huang 已提交
528

529 530 531 532
        if rule.get_rule().id == 36 {
            println!("{:?}", line_pos);
        }

P
Phodal Huang 已提交
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
        let mut rule_scanner;
        match rule_info {
            RuleEnum::BeginEndRule(mut begin_end_rule) => {
                rule_scanner = begin_end_rule.compile(
                    self,
                    stack.end_rule.clone(),
                    is_first_line,
                    line_pos == anchor_position,
                );
            }
            _ => {
                rule_scanner = rule.compile(
                    self,
                    stack.end_rule.clone(),
                    is_first_line,
                    line_pos == anchor_position,
                );
            }
        }

P
Phodal Huang 已提交
553 554 555
        let r = rule_scanner
            .scanner
            .find_next_match_sync(line_text, line_pos);
P
Phodal Huang 已提交
556
        if let Some(result) = r {
557 558
            let match_rule_result = MatchRuleResult {
                capture_indices: result.capture_indices,
559
                matched_rule_id: rule_scanner.rules[result.index],
560 561 562
            };

            Some(match_rule_result)
P
Phodal Huang 已提交
563 564 565
        } else {
            None
        }
P
Phodal Huang 已提交
566
    }
P
Phodal Huang 已提交
567

P
Phodal Huang 已提交
568 569 570
    pub fn tokenize_line(
        &mut self,
        line_text: String,
P
Phodal Huang 已提交
571
        prev_state: &mut Option<StackElement>,
P
Phodal Huang 已提交
572
    ) -> TokenizeResult {
P
Phodal Huang 已提交
573 574 575
        self.tokenize(line_text, prev_state, false)
    }

P
Phodal Huang 已提交
576
    pub fn tokenize_line2(&self, _line_text: String, _prev_state: Option<StackElement>) {}
P
Phodal Huang 已提交
577 578 579 580 581 582 583 584 585 586

    pub fn to_grammar(grammar_path: &str) -> Grammar {
        let path = Path::new(grammar_path);
        let mut file = File::open(path).unwrap();
        let mut data = String::new();
        file.read_to_string(&mut data).unwrap();

        let g: IRawGrammar = serde_json::from_str(&data).unwrap();
        Grammar::new(g)
    }
P
Phodal Huang 已提交
587
}
P
Phodal Huang 已提交
588 589 590 591

impl IRuleFactoryHelper for Grammar {}

impl IGrammarRegistry for Grammar {
P
Phodal Huang 已提交
592 593
    fn get_external_grammar(
        &self,
P
Phodal Huang 已提交
594 595
        _scope_name: String,
        _repository: IRawRepository,
P
Phodal Huang 已提交
596
    ) -> Option<IRawGrammar> {
P
Phodal Huang 已提交
597 598 599 600 601
        None
    }
}

impl IRuleRegistry for Grammar {
P
Phodal Huang 已提交
602 603
    fn register_id(&mut self) -> i32 {
        self.last_rule_id = self.last_rule_id + 1;
P
Phodal Huang 已提交
604
        self.last_rule_id.clone()
P
Phodal Huang 已提交
605 606
    }

P
Phodal Huang 已提交
607 608 609
    fn get_rule(&mut self, pattern_id: i32) -> Box<dyn AbstractRule> {
        if let Some(rule) = self.rule_id2desc.get_mut(&pattern_id) {
            return rule.to_owned();
P
Phodal Huang 已提交
610
        }
P
Phodal Huang 已提交
611
        Box::from(EmptyRule {})
P
Phodal Huang 已提交
612
    }
P
Phodal Huang 已提交
613

P
Phodal Huang 已提交
614
    fn register_rule(&mut self, result: Box<dyn AbstractRule>) -> Box<dyn AbstractRule> {
P
Phodal Huang 已提交
615
        self.rule_id2desc
P
Phodal Huang 已提交
616
            .insert(result.id().clone(), result.clone());
617
        result
P
Phodal Huang 已提交
618
    }
P
Phodal Huang 已提交
619 620 621 622
}

#[cfg(test)]
mod tests {
P
Phodal Huang 已提交
623
    use std::fs::File;
624
    use std::io::Write;
P
Phodal Huang 已提交
625

P
Phodal Huang 已提交
626
    use crate::grammar::{Grammar, StackElement};
P
Phodal Huang 已提交
627
    use crate::rule::abstract_rule::RuleEnum;
P
Phodal Huang 已提交
628
    use crate::rule::IRuleRegistry;
P
Phodal Huang 已提交
629

P
Phodal Huang 已提交
630
    #[test]
P
Phodal Huang 已提交
631
    fn should_build_grammar_json() {
632 633 634 635 636 637 638
        let code = "
#include <stdio.h>
int main() {
printf(\"Hello, World!\");
return 0;
}
";
P
Phodal Huang 已提交
639
        let grammar = to_grammar_with_code("test-cases/first-mate/fixtures/c.json", code);
640 641
        let first_rule = grammar.rule_id2desc.get(&1).unwrap();
        assert_eq!(28, first_rule.clone().patterns_length());
P
Phodal Huang 已提交
642 643 644 645 646 647
        debug_output(&grammar, String::from("program.json"));
    }

    #[test]
    fn should_identify_c_include() {
        let code = "#include <stdio.h>";
P
Phodal Huang 已提交
648
        let mut grammar = Grammar::to_grammar("test-cases/first-mate/fixtures/c.json");
649 650 651
        let mut rule_stack = Some(StackElement::null());
        let result = grammar.tokenize_line(String::from(code), &mut rule_stack);

P
Phodal Huang 已提交
652 653 654 655 656 657 658
        assert_eq!(6, result.tokens.len());
        assert_eq!(0, result.tokens[0].start_index);
        assert_eq!(1, result.tokens[1].start_index);
        assert_eq!(8, result.tokens[2].start_index);
        assert_eq!(9, result.tokens[3].start_index);
        assert_eq!(10, result.tokens[4].start_index);
        assert_eq!(17, result.tokens[5].start_index);
659 660
    }

P
Phodal Huang 已提交
661 662 663
    #[test]
    fn should_build_text_grammar() {
        let code = "
P
Phodal Huang 已提交
664
GitHub 漫游指南
P
Phodal Huang 已提交
665
";
P
Phodal Huang 已提交
666
        let grammar = to_grammar_with_code("test-cases/first-mate/fixtures/text.json", code);
667
        assert_eq!(grammar.rule_id2desc.len(), 8);
668 669 670
    }

    fn debug_output(grammar: &Grammar, path: String) {
P
Phodal Huang 已提交
671
        let j = serde_json::to_string(&grammar.rule_id2desc).unwrap();
672
        let mut file = File::create(path).unwrap();
P
Phodal Huang 已提交
673
        match file.write_all(j.as_bytes()) {
P
Phodal Huang 已提交
674 675
            Ok(_) => {}
            Err(_) => {}
P
Phodal Huang 已提交
676
        };
P
Phodal Huang 已提交
677 678
    }

679 680 681
    #[test]
    fn should_build_json_grammar() {
        let code = "{}";
P
Phodal Huang 已提交
682
        let grammar = to_grammar_with_code("test-cases/first-mate/fixtures/json.json", code);
683 684 685 686 687 688
        assert_eq!(grammar.rule_id2desc.len(), 22);
        debug_output(&grammar, String::from("program.json"));
    }

    #[test]
    fn should_build_html_grammar() {
P
Phodal Huang 已提交
689
        let code = "<html><html>";
P
Phodal Huang 已提交
690
        let grammar = to_grammar_with_code("test-cases/first-mate/fixtures/html.json", code);
P
Phodal Huang 已提交
691
        assert_eq!(grammar.rule_id2desc.len(), 101);
692 693 694
        debug_output(&grammar, String::from("program.json"));
    }

P
Phodal Huang 已提交
695 696 697 698 699 700 701
    #[test]
    fn should_build_correct_end_rule_id_for_makefile() {
        let code = "CC=gcc
CFLAGS=-I.
DEPS = hellomake.h
OBJ = hellomake.o hellofunc.o
";
P
Phodal Huang 已提交
702 703
        let mut grammar =
            to_grammar_with_code("test-cases/first-mate/fixtures/makefile.json", code);
P
Phodal Huang 已提交
704
        let mut end_rule_count = 0;
P
Phodal Huang 已提交
705
        for (_x, rule) in grammar.rule_id2desc.clone() {
P
Phodal Huang 已提交
706 707
            let rule_instance = rule.get_rule_instance();
            if let RuleEnum::BeginEndRule(rule) = rule_instance {
P
Phodal Huang 已提交
708
                assert_eq!(rule._end.rule_id, -1);
P
Phodal Huang 已提交
709
                end_rule_count = end_rule_count + 1;
P
Phodal Huang 已提交
710 711
            }
        }
P
Phodal Huang 已提交
712
        assert_eq!(grammar.get_rule(1).patterns_length(), 4);
P
Phodal Huang 已提交
713
        assert_eq!(end_rule_count, 24);
P
Phodal Huang 已提交
714 715 716
        debug_output(&grammar, String::from("program.json"));
    }

P
Phodal Huang 已提交
717
    #[test]
P
Phodal Huang 已提交
718
    #[ignore]
P
Phodal Huang 已提交
719
    fn should_build_makefile_grammar() {
720 721 722 723 724 725 726 727 728 729
        let code = "CC=gcc
CFLAGS=-I.
DEPS = hellomake.h
OBJ = hellomake.o hellofunc.o

%.o: %.c $(DEPS)
	$(CC) -c -o $@ $< $(CFLAGS)

hellomake: $(OBJ)
	$(CC) -o $@ $^ $(CFLAGS)
P
Phodal Huang 已提交
730
";
P
Phodal Huang 已提交
731 732
        let mut grammar =
            to_grammar_with_code("test-cases/first-mate/fixtures/makefile.json", code);
P
Phodal Huang 已提交
733
        assert_eq!(grammar.rule_id2desc.len(), 82);
734
        assert_eq!(grammar.get_rule(1).patterns_length(), 4);
P
Phodal Huang 已提交
735 736 737
        debug_output(&grammar, String::from("program.json"));
    }

P
Phodal Huang 已提交
738 739
    #[test]
    fn should_resolve_make_file_error_issues() {
P
Phodal Huang 已提交
740
        let code = "%.o: %.c $(DEPS)";
P
Phodal Huang 已提交
741 742
        let mut grammar =
            to_grammar_with_code("test-cases/first-mate/fixtures/makefile.json", code);
P
Phodal Huang 已提交
743
        let result = grammar.tokenize_line(String::from("%.o: %.c $(DEPS)"), &mut None);
P
Phodal Huang 已提交
744 745
        let tokens = result.tokens.clone();
        assert_eq!(7, tokens.len());
P
Phodal Huang 已提交
746 747 748 749 750 751 752 753
        assert_eq!("Makefile,meta.scope.target.makefile,entity.name.function.target.makefile,constant.other.placeholder.makefile", tokens[0].scopes.join(","));
        assert_eq!(0, tokens[0].start_index);
        assert_eq!(1, tokens[1].start_index);
        assert_eq!(3, tokens[2].start_index);
        assert_eq!(4, tokens[3].start_index);
        assert_eq!(9, tokens[4].start_index);
        assert_eq!(11, tokens[5].start_index);
        assert_eq!(15, tokens[6].start_index);
P
Phodal Huang 已提交
754 755 756
        debug_output(&grammar, String::from("program.json"));
    }

P
Phodal Huang 已提交
757 758 759
    #[test]
    fn should_resolve_make_file_error_issues2() {
        let code = "hellomake: $(OBJ)
P
Phodal Huang 已提交
760
\t$(CC) -o $@ $^ $(CFLAGS)";
P
Phodal Huang 已提交
761 762
        let mut grammar =
            to_grammar_with_code("test-cases/first-mate/fixtures/makefile.json", code);
P
Phodal Huang 已提交
763 764 765 766 767

        let mut rule_stack = Some(StackElement::null());
        let result = grammar.tokenize_line(String::from("hellomake: $(OBJ)"), &mut rule_stack);
        assert_eq!(6, result.tokens.len());
        rule_stack = *result.rule_stack;
P
Phodal Huang 已提交
768 769
        let result2 =
            grammar.tokenize_line(String::from("\t$(CC) -o $@ $^ $(CFLAGS)"), &mut rule_stack);
P
Phodal Huang 已提交
770
        assert_eq!(14, result2.tokens.len());
P
Phodal Huang 已提交
771 772 773
        debug_output(&grammar, String::from("program.json"));
    }

P
Phodal Huang 已提交
774
    fn to_grammar_with_code(grammar_path: &str, code: &str) -> Grammar {
P
Phodal Huang 已提交
775
        let mut grammar = Grammar::to_grammar(grammar_path);
776
        let c_code = String::from(code);
P
Phodal Huang 已提交
777
        let mut rule_stack = Some(StackElement::null());
P
Phodal Huang 已提交
778
        for line in c_code.lines() {
P
Phodal Huang 已提交
779 780
            let result = grammar.tokenize_line(String::from(line), &mut rule_stack);
            rule_stack = *result.rule_stack;
P
Phodal Huang 已提交
781
            for token in result.tokens {
P
Phodal Huang 已提交
782 783
                let start = token.start_index.clone() as usize;
                let end = token.end_index.clone() as usize;
P
Phodal Huang 已提交
784 785 786 787 788
                let new_line: String = String::from(line)
                    .chars()
                    .skip(start)
                    .take(end - start)
                    .collect();
P
Phodal Huang 已提交
789
                let token_str: String = token.scopes.join(", ");
P
Phodal Huang 已提交
790 791 792 793
                println!(
                    " - token from {:?} to {:?} ({:?}) with scopes {:?}",
                    token.start_index, token.end_index, new_line, token_str
                )
P
Phodal Huang 已提交
794
            }
P
Phodal Huang 已提交
795
        }
P
Phodal Huang 已提交
796

797
        grammar
P
Phodal Huang 已提交
798 799
    }
}