Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
xindoo
regex
提交
90ea8636
R
regex
项目概览
xindoo
/
regex
通知
4
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
R
regex
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
90ea8636
编写于
5月 19, 2020
作者:
xindoo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
增加math获取匹配字符串集合的方法
上级
4475c7af
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
121 addition
and
47 deletion
+121
-47
src/main/java/xyz/xindoo/re/Regex.java
src/main/java/xyz/xindoo/re/Regex.java
+117
-46
src/main/java/xyz/xindoo/re/RegexTest.java
src/main/java/xyz/xindoo/re/RegexTest.java
+4
-1
未找到文件。
src/main/java/xyz/xindoo/re/Regex.java
浏览文件 @
90ea8636
...
...
@@ -14,6 +14,7 @@ import java.util.ArrayDeque;
import
java.util.Collections
;
import
java.util.HashSet
;
import
java.util.LinkedList
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Queue
;
import
java.util.Set
;
...
...
@@ -65,7 +66,6 @@ public class Regex {
addedStates
.
add
(((
DFAState
)
dfaGraph
.
start
).
getAllStateIds
());
while
(!
queue
.
isEmpty
())
{
State
curState
=
queue
.
poll
();
for
(
Map
.
Entry
<
String
,
Set
<
State
>>
entry
:
curState
.
next
.
entrySet
())
{
String
key
=
entry
.
getKey
();
Set
<
State
>
nexts
=
entry
.
getValue
();
...
...
@@ -114,6 +114,7 @@ public class Regex {
edge
=
getCharSetMatch
(
reader
);
break
;
}
// 暂时未支持零宽断言
case
'^'
:
{
break
;
}
...
...
@@ -185,6 +186,7 @@ public class Regex {
private
static
DFAGraph
convertNfa2Dfa
(
NFAGraph
nfaGraph
)
{
DFAGraph
dfaGraph
=
new
DFAGraph
();
Set
<
State
>
startStates
=
new
HashSet
<>();
// 用NFA图的起始节点构造DFA的起始节点
startStates
.
addAll
(
getNextEStates
(
nfaGraph
.
start
,
new
HashSet
<>()));
if
(
startStates
.
size
()
==
0
)
{
startStates
.
add
(
nfaGraph
.
start
);
...
...
@@ -192,6 +194,7 @@ public class Regex {
dfaGraph
.
start
=
dfaGraph
.
getOrBuild
(
startStates
);
Queue
<
DFAState
>
queue
=
new
LinkedList
<>();
Set
<
State
>
finishedStates
=
new
HashSet
<>();
// 如果BFS的方式从已找到的起始节点遍历并构建DFA
queue
.
add
(
dfaGraph
.
start
);
while
(!
queue
.
isEmpty
())
{
DFAState
curState
=
queue
.
poll
();
...
...
@@ -244,7 +247,7 @@ public class Regex {
reader
.
next
();
break
;
}
case
'{'
:
{
//
//
暂未支持{}指定重复次数
break
;
}
default
:
{
return
;
...
...
@@ -252,6 +255,60 @@ public class Regex {
}
}
/**
* 获取[]中表示的字符集,只支持字母 数字
* */
private
static
String
getCharSetMatch
(
Reader
reader
)
{
String
charSet
=
""
;
char
ch
;
while
((
ch
=
reader
.
next
())
!=
']'
)
{
charSet
+=
ch
;
}
return
charSet
;
}
private
static
int
[]
getRange
(
Reader
reader
)
{
String
rangeStr
=
""
;
char
ch
;
while
((
ch
=
reader
.
next
())
!=
'}'
)
{
if
(
ch
==
' '
)
{
continue
;
}
rangeStr
+=
ch
;
}
int
[]
res
=
new
int
[
2
];
if
(!
rangeStr
.
contains
(
","
))
{
res
[
0
]
=
Integer
.
parseInt
(
rangeStr
);
res
[
1
]
=
res
[
0
];
}
else
{
String
[]
se
=
rangeStr
.
split
(
","
,
-
1
);
res
[
0
]
=
Integer
.
parseInt
(
se
[
0
]);
if
(
se
[
1
].
length
()
==
0
)
{
res
[
1
]
=
Integer
.
MAX_VALUE
;
}
else
{
res
[
1
]
=
Integer
.
parseInt
(
se
[
1
]);
}
}
return
res
;
}
// 获取Epsilon可达节点列表
private
static
Set
<
State
>
getNextEStates
(
State
curState
,
Set
<
State
>
stateSet
)
{
if
(!
curState
.
next
.
containsKey
(
Constant
.
EPSILON
))
{
return
Collections
.
emptySet
();
}
Set
<
State
>
res
=
new
HashSet
<>();
for
(
State
state
:
curState
.
next
.
get
(
Constant
.
EPSILON
))
{
if
(
stateSet
.
contains
(
state
))
{
continue
;
}
res
.
add
(
state
);
res
.
addAll
(
getNextEStates
(
state
,
stateSet
));
stateSet
.
add
(
state
);
}
return
res
;
}
public
boolean
isMatch
(
String
text
)
{
return
isMatch
(
text
,
0
);
}
...
...
@@ -264,12 +321,17 @@ public class Regex {
return
isMatch
(
text
,
0
,
start
);
}
private
boolean
isMatch
(
String
text
,
int
pos
,
State
curNFAState
)
{
/**
* 匹配过程就是根据输入遍历图的过程, 这里DFA和NFA用了同样的代码, 但实际上因为DFA的特性是不会产生回溯的,
* 所以DFA可以换成非递归的形式
*/
private
boolean
isMatch
(
String
text
,
int
pos
,
State
curState
)
{
if
(
pos
==
text
.
length
())
{
if
(
cur
NFA
State
.
isEndState
())
{
if
(
curState
.
isEndState
())
{
return
true
;
}
for
(
State
nextState
:
cur
NFA
State
.
next
.
getOrDefault
(
Constant
.
EPSILON
,
Collections
.
emptySet
()))
{
for
(
State
nextState
:
curState
.
next
.
getOrDefault
(
Constant
.
EPSILON
,
Collections
.
emptySet
()))
{
if
(
isMatch
(
text
,
pos
,
nextState
))
{
return
true
;
}
...
...
@@ -277,7 +339,7 @@ public class Regex {
return
false
;
}
for
(
Map
.
Entry
<
String
,
Set
<
State
>>
entry
:
cur
NFA
State
.
next
.
entrySet
())
{
for
(
Map
.
Entry
<
String
,
Set
<
State
>>
entry
:
curState
.
next
.
entrySet
())
{
String
edge
=
entry
.
getKey
();
if
(
Constant
.
EPSILON
.
equals
(
edge
))
{
for
(
State
nextState
:
entry
.
getValue
())
{
...
...
@@ -301,56 +363,65 @@ public class Regex {
return
false
;
}
/**
* 暂时只支持字母 数字
* */
private
static
String
getCharSetMatch
(
Reader
reader
)
{
String
charSet
=
""
;
char
ch
;
while
((
ch
=
reader
.
next
())
!=
']'
)
{
charSet
+=
ch
;
}
return
charSet
;
public
List
<
String
>
match
(
String
text
)
{
return
match
(
text
,
0
);
}
private
static
int
[]
getRange
(
Reader
reader
)
{
String
rangeStr
=
""
;
char
ch
;
while
((
ch
=
reader
.
next
())
!=
'}'
)
{
if
(
ch
==
' '
)
{
continue
;
}
rangeStr
+=
ch
;
}
int
[]
res
=
new
int
[
2
];
if
(!
rangeStr
.
contains
(
","
))
{
res
[
0
]
=
Integer
.
parseInt
(
rangeStr
);
res
[
1
]
=
res
[
0
];
}
else
{
String
[]
se
=
rangeStr
.
split
(
","
,
-
1
);
res
[
0
]
=
Integer
.
parseInt
(
se
[
0
]);
if
(
se
[
1
].
length
()
==
0
)
{
res
[
1
]
=
Integer
.
MAX_VALUE
;
public
List
<
String
>
match
(
String
text
,
int
mod
)
{
int
s
=
0
;
int
e
=
-
1
;
List
<
String
>
res
=
new
LinkedList
<>();
while
(
s
!=
text
.
length
())
{
e
=
getMatchEnd
(
text
,
s
,
dfaGraph
.
start
);
if
(
e
!=
-
1
)
{
res
.
add
(
text
.
substring
(
s
,
e
));
s
=
e
;
}
else
{
res
[
1
]
=
Integer
.
parseInt
(
se
[
1
])
;
s
++
;
}
}
return
res
;
}
private
static
Set
<
State
>
getNextEStates
(
State
curState
,
Set
<
State
>
stateSet
)
{
if
(!
curState
.
next
.
containsKey
(
Constant
.
EPSILON
))
{
return
Collections
.
emptySet
();
// 获取正则表达式在字符串中能匹配到的结尾的位置
private
int
getMatchEnd
(
String
text
,
int
pos
,
State
curState
)
{
int
end
=
-
1
;
if
(
curState
.
isEndState
())
{
return
pos
;
}
Set
<
State
>
res
=
new
HashSet
<>();
for
(
State
state
:
curState
.
next
.
get
(
Constant
.
EPSILON
))
{
if
(
stateSet
.
contains
(
state
))
{
continue
;
if
(
pos
==
text
.
length
())
{
for
(
State
nextState
:
curState
.
next
.
getOrDefault
(
Constant
.
EPSILON
,
Collections
.
emptySet
()))
{
end
=
getMatchEnd
(
text
,
pos
,
nextState
);
if
(
end
!=
-
1
)
{
return
end
;
}
}
res
.
add
(
state
);
res
.
addAll
(
getNextEStates
(
state
,
stateSet
));
stateSet
.
add
(
state
);
}
return
res
;
for
(
Map
.
Entry
<
String
,
Set
<
State
>>
entry
:
curState
.
next
.
entrySet
())
{
String
edge
=
entry
.
getKey
();
if
(
Constant
.
EPSILON
.
equals
(
edge
))
{
for
(
State
nextState
:
entry
.
getValue
())
{
end
=
getMatchEnd
(
text
,
pos
,
nextState
);
if
(
end
!=
-
1
)
{
return
end
;
}
}
}
else
{
MatchStrategy
matchStrategy
=
MatchStrategyManager
.
getStrategy
(
edge
);
if
(!
matchStrategy
.
isMatch
(
text
.
charAt
(
pos
),
edge
))
{
continue
;
}
// 遍历匹配策略
for
(
State
nextState
:
entry
.
getValue
())
{
end
=
getMatchEnd
(
text
,
pos
+
1
,
nextState
);
if
(
end
!=
-
1
)
{
return
end
;
}
}
}
}
return
-
1
;
}
}
src/main/java/xyz/xindoo/re/RegexTest.java
浏览文件 @
90ea8636
package
xyz.xindoo.re
;
import
java.util.List
;
public
class
RegexTest
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
Regex
regex
=
Regex
.
compile
(
"a(b|c)*"
);
Regex
regex
=
Regex
.
compile
(
"a(b|c)*c"
);
List
<
String
>
res
=
regex
.
match
(
"aabacabbbcaccc"
);
regex
.
printNfa
();
System
.
out
.
println
(
""
);
regex
.
printDfa
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录