re-bytecode.h 5.1 KB
Newer Older
1
/* Copyright JS Foundation and other contributors, http://js.foundation
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef RE_BYTECODE_H
#define RE_BYTECODE_H

19
#if ENABLED (JERRY_BUILTIN_REGEXP)
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76

#include "ecma-globals.h"

/** \addtogroup parser Parser
 * @{
 *
 * \addtogroup regexparser Regular expression
 * @{
 *
 * \addtogroup regexparser_bytecode Bytecode
 * @{
 */

/**
  * Size of the RegExp bytecode cache
  */
#define RE_CACHE_SIZE 8u

/**
  * RegExp flags mask (first 10 bits are for reference count and the rest for the actual RegExp flags)
  */
#define RE_FLAGS_MASK 0x3F

/**
 * RegExp opcodes
 */
typedef enum
{
  RE_OP_EOF,
  /* Group opcode order is important, because RE_IS_CAPTURE_GROUP is based on it.
   * Change it carefully. Capture opcodes should be at first.
   */
  RE_OP_CAPTURE_GROUP_START,                      /**< group start */
  RE_OP_CAPTURE_GREEDY_ZERO_GROUP_START,          /**< greedy zero group start */
  RE_OP_CAPTURE_NON_GREEDY_ZERO_GROUP_START,      /**< non-greedy zero group start */
  RE_OP_CAPTURE_GREEDY_GROUP_END,                 /**< greedy group end */
  RE_OP_CAPTURE_NON_GREEDY_GROUP_END,             /**< non-greedy group end */
  RE_OP_NON_CAPTURE_GROUP_START,                  /**< non-capture group start */
  RE_OP_NON_CAPTURE_GREEDY_ZERO_GROUP_START,      /**< non-capture greedy zero group start */
  RE_OP_NON_CAPTURE_NON_GREEDY_ZERO_GROUP_START,  /**< non-capture non-greedy zero group start */
  RE_OP_NON_CAPTURE_GREEDY_GROUP_END,             /**< non-capture greedy group end */
  RE_OP_NON_CAPTURE_NON_GREEDY_GROUP_END,         /**< non-capture non-greedy group end */

  RE_OP_MATCH,                                    /**< match */
  RE_OP_CHAR,                                     /**< any character */
  RE_OP_SAVE_AT_START,                            /**< save at start */
  RE_OP_SAVE_AND_MATCH,                           /**< save and match */
  RE_OP_PERIOD,                                   /**< "." */
  RE_OP_ALTERNATIVE,                              /**< "|" */
  RE_OP_GREEDY_ITERATOR,                          /**< greedy iterator */
  RE_OP_NON_GREEDY_ITERATOR,                      /**< non-greedy iterator */
  RE_OP_ASSERT_START,                             /**< "^" */
  RE_OP_ASSERT_END,                               /**< "$" */
  RE_OP_ASSERT_WORD_BOUNDARY,                     /**< "\b" */
  RE_OP_ASSERT_NOT_WORD_BOUNDARY,                 /**< "\B" */
  RE_OP_LOOKAHEAD_POS,                            /**< lookahead pos */
  RE_OP_LOOKAHEAD_NEG,                            /**< lookahead neg */
A
Akos Kiss 已提交
77 78 79
  RE_OP_BACKREFERENCE,                            /**< "\[0..9]" */
  RE_OP_CHAR_CLASS,                               /**< "[ ]" */
  RE_OP_INV_CHAR_CLASS                            /**< "[^ ]" */
80 81 82 83 84 85 86
} re_opcode_t;

/**
 * Compiled byte code data.
 */
typedef struct
{
87
  ecma_compiled_code_t header;       /**< compiled code header */
D
Dániel Bátyai 已提交
88 89 90
  ecma_value_t source;               /**< original RegExp pattern */
  uint32_t captures_count;           /**< number of capturing brackets */
  uint32_t non_captures_count;       /**< number of non capturing brackets */
91 92 93 94 95 96 97 98 99 100 101 102
} re_compiled_code_t;

/**
 * Context of RegExp bytecode container
 */
typedef struct
{
  uint8_t *block_start_p;      /**< start of bytecode block */
  uint8_t *block_end_p;        /**< end of bytecode block */
  uint8_t *current_p;          /**< current position in bytecode */
} re_bytecode_ctx_t;

D
Dániel Bátyai 已提交
103 104 105
re_opcode_t re_get_opcode (const uint8_t **bc_p);
ecma_char_t re_get_char (const uint8_t **bc_p);
uint32_t re_get_value (const uint8_t **bc_p);
106
uint32_t JERRY_ATTR_PURE re_get_bytecode_length (re_bytecode_ctx_t *bc_ctx_p);
107

D
Dániel Bátyai 已提交
108 109 110 111 112
void re_initialize_regexp_bytecode (re_bytecode_ctx_t *bc_ctx_p);

void re_append_opcode (re_bytecode_ctx_t *bc_ctx_p, const re_opcode_t opcode);
void re_append_u32 (re_bytecode_ctx_t *bc_ctx_p, const uint32_t value);
void re_append_char (re_bytecode_ctx_t *bc_ctx_p, const ecma_char_t input_char);
113 114
void re_append_jump_offset (re_bytecode_ctx_t *bc_ctx_p, uint32_t value);

D
Dániel Bátyai 已提交
115 116 117 118 119 120
void re_insert_opcode (re_bytecode_ctx_t *bc_ctx_p, const uint32_t offset, const re_opcode_t opcode);
void re_insert_u32 (re_bytecode_ctx_t *bc_ctx_p, const uint32_t offset, const uint32_t value);
void re_bytecode_list_insert (re_bytecode_ctx_t *bc_ctx_p,
                              const size_t offset,
                              const uint8_t *bytecode_p,
                              const size_t length);
121

122
#if ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE)
123
void re_dump_bytecode (re_bytecode_ctx_t *bc_ctx);
124
#endif /* ENABLED (JERRY_REGEXP_DUMP_BYTE_CODE) */
125 126 127 128 129 130 131

/**
 * @}
 * @}
 * @}
 */

132
#endif /* ENABLED (JERRY_BUILTIN_REGEXP) */
133
#endif /* !RE_BYTECODE_H */