提交 28259df5 编写于 作者: N Nick Craig-Wood

filter: add `{{ regexp }}` syntax to pattern matches - fixes #4074

There has been a desire from more advanced rclone users to have regexp
filtering as well as the glob filtering.

This patch adds regexp filtering using this syntax `{{ regexp }}`
which is currently a syntax error, so is backwards compatibile.

This means regexps can be used everywhere globs can be used, and that
they also can be mixed with globs in the same pattern, eg `*.{{jpe?g}}`
上级 269f90c1
......@@ -33,6 +33,9 @@ you expect. Instead use a `--filter...` flag.
### Pattern syntax
Here is a formal definition of the pattern syntax,
[examples](#examples) are below.
Rclone matching rules follow a glob style:
* matches any sequence of non-separator (/) characters
......@@ -42,8 +45,10 @@ Rclone matching rules follow a glob style:
character class (must be non-empty)
{ pattern-list }
pattern alternatives
{{ regexp }}
pattern alternatives
c matches character c (c != *, **, ?, \, [, {, })
\c matches reserved character c (c = *, **, ?, \, [, {, })
\c matches reserved character c (c = *, **, ?, \, [, {, }) or character class
character-range:
......@@ -62,6 +67,10 @@ character classes (see [Go regular expression reference](https://golang.org/pkg/
Perl character classes (e.g. \s, \S, \w, \W)
ASCII character classes (e.g. [[:alnum:]], [[:alpha:]], [[:punct:]], [[:xdigit:]])
regexp for advanced users to insert a regular expression - see [below](#regexp) for more info:
Any re2 regular expression not containing `}}`
If the filter pattern starts with a `/` then it only matches
at the top level of the directory tree,
**relative to the root of the remote** (not necessarily the root
......@@ -111,6 +120,58 @@ With `--ignore-case`
potato - matches "potato"
- matches "POTATO"
## Using regular expressions in filter patterns {#regexp}
The syntax of filter patterns is glob style matching (like `bash`
uses) to make things easy for users. However this does not provide
absolute control over the matching, so for advanced users rclone also
provides a regular expression syntax.
The regular expressions used are as defined in the [Go regular
expression reference](https://golang.org/pkg/regexp/syntax/). Regular
expressions should be enclosed in `{{` `}}`. They will match only the
last path segment if the glob doesn't start with `/` or the whole path
name if it does.
Here is how the `{{regexp}}` is transformed into an full regular
expression to match the entire path:
{{regexp}} becomes (^|/)(regexp)$
/{{regexp}} becomes ^(regexp)$
Regexp syntax can be mixed with glob syntax, for example
*.{{jpe?g}} to match file.jpg, file.jpeg but not file.png
Note that you can use `-vv --dump filters` to show the filter patterns
in regexp format - rclone implements the glob patters by transforming
them into regular expressions.
## Filter pattern examples {#examples}
| Description | Pattern | Matches | Does not match |
| ----------- |-------- | ------- | -------------- |
| Wildcard | `*.jpg` | `/file.jpg` | `/file.png` |
| | | `/dir/file.jpg` | `/dir/file.png` |
| Rooted | `/*.jpg` | `/file.jpg` | `/file.png` |
| | | `/file2.jpg` | `/dir/file.jpg` |
| Alternates | `*.{jpg,png}` | `/file.jpg` | `/file.gif` |
| | | `/dir/file.gif` | `/dir/file.gif` |
| Path Wildcard | `dir/**` | `/dir/anyfile` | `file.png` |
| | | `/subdir/dir/subsubdir/anyfile` | `/subdir/file.png` |
| Any Char | `*.t?t` | `/file.txt` | `/file.qxt` |
| | | `/dir/file.tzt` | `/dir/file.png` |
| Range | `*.[a-z]` | `/file.a` | `/file.0` |
| | | `/dir/file.b` | `/dir/file.1` |
| Escape | `*.\?\?\?` | `/file.???` | `/file.abc` |
| | | `/dir/file.???` | `/dir/file.def` |
| Class | `*.\d\d\d` | `/file.012` | `/file.abc` |
| | | `/dir/file.345` | `/dir/file.def` |
| Regexp | `*.{{jpe?g}}` | `/file.jpeg` | `/file.png` |
| | | `/dir/file.jpg` | `/dir/file.jpeeg` |
| Rooted Regexp | `/{{.*\.jpe?g}}` | `/file.jpeg` | `/file.png` |
| | | `/file.jpg` | `/dir/file.jpg` |
## How filter rules are applied to files
Rclone path/file name filters are made up of one or more of the following flags:
......
......@@ -42,12 +42,26 @@ func globToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
inBraces := false
inBrackets := 0
slashed := false
inRegexp := false
var next, last rune
for _, c := range glob {
next, last = c, next
if slashed {
_, _ = re.WriteRune(c)
slashed = false
continue
}
if inRegexp {
if c == '}' && last == '}' {
inRegexp = false
// Unwrite last }
re.Truncate(re.Len() - 1)
_, _ = re.WriteRune(')')
} else {
_, _ = re.WriteRune(c)
}
continue
}
if c != '*' {
err := insertStars()
if err != nil {
......@@ -79,10 +93,16 @@ func globToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
return nil, errors.Errorf("mismatched ']' in glob %q", glob)
case '{':
if inBraces {
return nil, errors.Errorf("can't nest '{' '}' in glob %q", glob)
if last == '{' {
inRegexp = true
inBraces = false
} else {
return nil, errors.Errorf("can't nest '{' '}' in glob %q", glob)
}
} else {
inBraces = true
_, _ = re.WriteRune('(')
}
inBraces = true
_, _ = re.WriteRune('(')
case '}':
if !inBraces {
return nil, errors.Errorf("mismatched '{' and '}' in glob %q", glob)
......@@ -112,6 +132,9 @@ func globToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
if inBraces {
return nil, errors.Errorf("mismatched '{' and '}' in glob %q", glob)
}
if inRegexp {
return nil, errors.Errorf("mismatched '{{' and '}}' in glob %q", glob)
}
_, _ = re.WriteRune('$')
result, err := regexp.Compile(re.String())
if err != nil {
......@@ -121,8 +144,10 @@ func globToRegexp(glob string, ignoreCase bool) (*regexp.Regexp, error) {
}
var (
// Can't deal with / or ** in {}
tooHardRe = regexp.MustCompile(`{[^{}]*(\*\*|/)[^{}]*}`)
// Can't deal with
// / or ** in {}
// {{ regexp }}
tooHardRe = regexp.MustCompile(`({[^{}]*(\*\*|/)[^{}]*})|\{\{|\}\}`)
// Squash all /
squashSlash = regexp.MustCompile(`/{2,}`)
......
......@@ -32,7 +32,7 @@ func TestGlobToRegexp(t *testing.T) {
{`***`, `(^|/)`, `too many stars`},
{`ab]c`, `(^|/)`, `mismatched ']'`},
{`ab[c`, `(^|/)`, `mismatched '[' and ']'`},
{`ab{{cd`, `(^|/)`, `can't nest`},
{`ab{x{cd`, `(^|/)`, `can't nest`},
{`ab{}}cd`, `(^|/)`, `mismatched '{' and '}'`},
{`ab}c`, `(^|/)`, `mismatched '{' and '}'`},
{`ab{c`, `(^|/)`, `mismatched '{' and '}'`},
......@@ -40,16 +40,21 @@ func TestGlobToRegexp(t *testing.T) {
{`[a--b]`, `(^|/)`, `bad glob pattern`},
{`a\*b`, `(^|/)a\*b$`, ``},
{`a\\b`, `(^|/)a\\b$`, ``},
{`a{{.*}}b`, `(^|/)a(.*)b$`, ``},
{`a{{.*}`, `(^|/)a(.*)b$`, `mismatched '{{' and '}}'`},
{`{{regexp}}`, `(^|/)(regexp)$`, ``},
{`\{{{regexp}}`, `(^|/)\{(regexp)$`, ``},
{`/{{regexp}}`, `^(regexp)$`, ``},
} {
for _, ignoreCase := range []bool{false, true} {
gotRe, err := globToRegexp(test.in, ignoreCase)
if test.error == "" {
require.NoError(t, err, test.in)
prefix := ""
if ignoreCase {
prefix = "(?i)"
}
got := gotRe.String()
require.NoError(t, err, test.in)
assert.Equal(t, prefix+test.want, got, test.in)
} else {
require.Error(t, err, test.in)
......@@ -84,6 +89,7 @@ func TestGlobToDirGlobs(t *testing.T) {
{`/a/{jpg,png,gif}/*.{jpg,png,gif}`, []string{"/a/{jpg,png,gif}/", "/a/", "/"}},
{`a/{a,a*b,a**c}/d/`, []string{"/**"}},
{`/a/{a,a*b,a/c,d}/d/`, []string{"/**"}},
{`/a/{{.*}}/d/`, []string{"/**"}},
{`**`, []string{"**/"}},
{`a**`, []string{"a**/"}},
{`a**b`, []string{"a**/"}},
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册