Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
knowledgebao
Python
提交
6c63adc2
P
Python
项目概览
knowledgebao
/
Python
与 Fork 源项目一致
Fork自
inscode / Python
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Python
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
6c63adc2
编写于
5月 17, 2024
作者:
K
knowledgebao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fri May 17 22:04:00 CST 2024 inscode
上级
2eebcf6c
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
140 addition
and
2 deletion
+140
-2
.inscode
.inscode
+5
-1
main.py
main.py
+135
-1
未找到文件。
.inscode
浏览文件 @
6c63adc2
run = "pip install -r requirements.txt;python main.py"
language = "python"
[packager]
AUTO_PIP = true
...
...
@@ -9,4 +10,7 @@ PATH = "${VIRTUAL_ENV}/bin:${PATH}"
PYTHONPATH = "$PYTHONHOME/lib/python3.10:${VIRTUAL_ENV}/lib/python3.10/site-packages"
REPLIT_POETRY_PYPI_REPOSITORY = "http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/simple"
MPLBACKEND = "TkAgg"
POETRY_CACHE_DIR = "/root/${PROJECT_DIR}/.cache/pypoetry"
\ No newline at end of file
POETRY_CACHE_DIR = "/root/${PROJECT_DIR}/.cache/pypoetry"
[debugger]
program = "main.py"
main.py
浏览文件 @
6c63adc2
print
(
'欢迎来到 InsCode'
)
\ No newline at end of file
#!/usr/bin/env python
# A simple Python script to remove duplicate files...Coded by Knowledgebao
import
hashlib
import
os
import
sys
import
argparse
import
logging
import
re
logging
.
basicConfig
(
format
=
'[%(asctime)s.%(msecs)03d] [%(levelname).1s] [%(filename)s:%(lineno)d] %(message)s'
,
datefmt
=
'%Y-%m-%d %H:%M:%S'
)
logger
=
logging
.
getLogger
(
__name__
)
# define a function to calculate md5checksum for a given file:
def
md5
(
f
):
"""takes one file f as an argument and calculates md5checksum for that file"""
md5Hash
=
hashlib
.
md5
()
with
open
(
f
,
"rb"
)
as
f
:
for
chunk
in
iter
(
lambda
:
f
.
read
(
4096
),
b
""
):
md5Hash
.
update
(
chunk
)
return
md5Hash
.
hexdigest
()
class
file_process
():
def
__init__
(
self
):
self
.
del_count
=
0
self
.
md5_dict
=
{}
def
__is_valid
(
self
,
file_name
,
exp_list
):
for
exp
in
exp_list
:
if
file_name
.
endswith
(
exp
):
return
False
return
True
def
__add_dict
(
self
,
src_dir
,
exps
):
for
root
,
_
,
files
in
os
.
walk
(
src_dir
):
# the os.walk function allows checking subdirectories too...
for
f
in
files
:
if
not
self
.
__is_valid
(
f
,
exps
):
logger
.
debug
(
f
"skip
{
os
.
path
.
join
(
root
,
f
)
}
"
)
continue
filePath
=
os
.
path
.
join
(
root
,
f
)
md5Hash
=
md5
(
filePath
)
size
=
os
.
path
.
getsize
(
filePath
)
fileComb
=
str
(
md5Hash
)
+
str
(
size
)
if
fileComb
in
self
.
md5_dict
:
self
.
md5_dict
[
fileComb
].
append
(
filePath
)
self
.
del_count
+=
1
else
:
self
.
md5_dict
.
update
({
fileComb
:
[
filePath
]})
logger
.
debug
(
f
"add
{
fileComb
}
:
{
filePath
}
to self.md5_dict"
)
def
__get_valid_file
(
self
,
files
,
del_dir
):
min_len
=
files
[
0
]
min_len_nomal
=
None
for
f
in
files
:
if
f
.
find
(
del_dir
)
==
-
1
:
if
not
min_len_nomal
or
len
(
f
)
<
len
(
min_len_nomal
):
min_len_nomal
=
f
if
len
(
f
)
<
len
(
min_len
):
min_len
=
f
if
not
min_len_nomal
:
min_len_nomal
=
min_len
return
min_len_nomal
# 优先删除包含 del_dir 的文件,其次删除文件名最长的
def
__rm_files
(
self
,
files
,
del_dir
):
if
not
files
:
return
min_len_nomal
=
self
.
__get_valid_file
(
files
,
del_dir
)
for
f
in
files
:
if
f
!=
min_len_nomal
and
os
.
path
.
exists
(
f
):
os
.
remove
(
f
)
logger
.
info
(
f
"delete
{
f
}
"
)
def
rm_dup
(
self
,
src_dir
,
del_dir
,
exps
,
ask
):
if
not
os
.
path
.
isdir
(
src_dir
):
# make sure the given directory exists
logger
.
info
(
"specified directory does not exist!"
)
return
logger
.
info
(
"Working..."
)
# add md5+size:filepath in self.md5_dict
self
.
__add_dict
(
del_dir
,
exps
)
# 此目录中的重复文件优先被删除
self
.
__add_dict
(
src_dir
,
exps
)
if
ask
and
self
.
del_count
:
print
(
"===================="
)
for
key
in
self
.
md5_dict
:
min_len_nomal
=
self
.
__get_valid_file
(
self
.
md5_dict
[
key
],
del_dir
)
for
f
in
self
.
md5_dict
[
key
]:
if
f
!=
min_len_nomal
and
os
.
path
.
exists
(
f
):
print
(
f
)
print
(
"===================="
)
logger
.
info
(
"Done! Above files will be deleted:
\n
"
)
if
input
(
"
\n
Enter (y)es to confirm operation or anything else to abort: "
).
lower
()
not
in
(
"y"
,
"yes"
):
sys
.
exit
(
"Operation cancelled by user. Exiting..."
)
logger
.
info
(
f
"will del count
{
self
.
del_count
}
"
)
will_del_count
=
self
.
del_count
for
key
in
self
.
md5_dict
:
self
.
__rm_files
(
self
.
md5_dict
[
key
],
del_dir
)
will_del_count
-=
len
(
self
.
md5_dict
[
key
])
-
1
logger
.
info
(
f
"del... have
{
will_del_count
}
"
)
logger
.
info
(
f
"del count
{
self
.
del_count
}
over"
)
# 日志级别设置
def
set_log_level
(
level
):
log_levels
=
{
0
:
logging
.
DEBUG
,
1
:
logging
.
INFO
,
2
:
logging
.
WARNING
,
3
:
logging
.
ERROR
,
4
:
logging
.
CRITICAL
}
if
level
in
log_levels
:
logger
.
setLevel
(
log_levels
[
level
])
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
description
=
'please enter correct para'
parser
.
add_argument
(
"-s"
,
"--src_dir"
,
help
=
"first dir"
,
type
=
str
)
parser
.
add_argument
(
"-d"
,
"--del_dir"
,
help
=
"second dir, if dup, del file in this dir"
,
type
=
str
)
parser
.
add_argument
(
"-l"
,
"--log_level"
,
help
=
"0-4, debug,info,warning,error,critical"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"-e"
,
"--exps"
,
help
=
"not del ext"
,
type
=
str
,
nargs
=
'+'
)
parser
.
add_argument
(
"-a"
,
"--auto"
,
action
=
"store_false"
,
help
=
"auto del, not answer"
)
args
=
parser
.
parse_args
()
set_log_level
(
args
.
log_level
)
logger
.
info
(
f
"Starting...,
{
args
.
src_dir
}
,
{
args
.
del_dir
}
,
{
args
.
exps
}
,
{
args
.
auto
}
"
)
pro
=
file_process
()
pro
.
rm_dup
(
args
.
src_dir
,
args
.
del_dir
,
args
.
exps
,
args
.
auto
)
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录