提交 6c63adc2 编写于 作者: K knowledgebao

Fri May 17 22:04:00 CST 2024 inscode

上级 2eebcf6c
run = "pip install -r requirements.txt;python main.py"
language = "python"
[packager]
AUTO_PIP = true
......@@ -9,4 +10,7 @@ PATH = "${VIRTUAL_ENV}/bin:${PATH}"
PYTHONPATH = "$PYTHONHOME/lib/python3.10:${VIRTUAL_ENV}/lib/python3.10/site-packages"
REPLIT_POETRY_PYPI_REPOSITORY = "http://mirrors.csdn.net.cn/repository/csdn-pypi-mirrors/simple"
MPLBACKEND = "TkAgg"
POETRY_CACHE_DIR = "/root/${PROJECT_DIR}/.cache/pypoetry"
\ No newline at end of file
POETRY_CACHE_DIR = "/root/${PROJECT_DIR}/.cache/pypoetry"
[debugger]
program = "main.py"
print('欢迎来到 InsCode')
\ No newline at end of file
#!/usr/bin/env python
# A simple Python script to remove duplicate files...Coded by Knowledgebao
import hashlib
import os
import sys
import argparse
import logging
import re
logging.basicConfig(format='[%(asctime)s.%(msecs)03d] [%(levelname).1s] [%(filename)s:%(lineno)d] %(message)s',
datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)
# define a function to calculate md5checksum for a given file:
def md5(f):
"""takes one file f as an argument and calculates md5checksum for that file"""
md5Hash = hashlib.md5()
with open(f, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
md5Hash.update(chunk)
return md5Hash.hexdigest()
class file_process():
def __init__(self):
self.del_count = 0
self.md5_dict = {}
def __is_valid(self, file_name, exp_list):
for exp in exp_list:
if file_name.endswith(exp):
return False
return True
def __add_dict(self, src_dir, exps):
for root, _, files in os.walk(src_dir): # the os.walk function allows checking subdirectories too...
for f in files:
if not self.__is_valid(f, exps):
logger.debug(f"skip {os.path.join(root, f)}")
continue
filePath = os.path.join(root, f)
md5Hash = md5(filePath)
size = os.path.getsize(filePath)
fileComb = str(md5Hash) + str(size)
if fileComb in self.md5_dict:
self.md5_dict[fileComb].append(filePath)
self.del_count += 1
else:
self.md5_dict.update({fileComb: [filePath]})
logger.debug(f"add {fileComb}: {filePath} to self.md5_dict")
def __get_valid_file(self, files, del_dir):
min_len = files[0]
min_len_nomal = None
for f in files:
if f.find(del_dir) == -1:
if not min_len_nomal or len(f) < len(min_len_nomal):
min_len_nomal = f
if len(f) < len(min_len):
min_len = f
if not min_len_nomal:
min_len_nomal = min_len
return min_len_nomal
# 优先删除包含 del_dir 的文件,其次删除文件名最长的
def __rm_files(self, files, del_dir):
if not files:
return
min_len_nomal = self.__get_valid_file(files, del_dir)
for f in files:
if f != min_len_nomal and os.path.exists(f):
os.remove(f)
logger.info(f"delete {f}")
def rm_dup(self, src_dir, del_dir, exps, ask):
if not os.path.isdir(src_dir): # make sure the given directory exists
logger.info("specified directory does not exist!")
return
logger.info("Working...")
# add md5+size:filepath in self.md5_dict
self.__add_dict(del_dir, exps) # 此目录中的重复文件优先被删除
self.__add_dict(src_dir, exps)
if ask and self.del_count:
print("====================")
for key in self.md5_dict:
min_len_nomal = self.__get_valid_file(self.md5_dict[key], del_dir)
for f in self.md5_dict[key]:
if f != min_len_nomal and os.path.exists(f):
print(f)
print("====================")
logger.info("Done! Above files will be deleted:\n")
if input("\nEnter (y)es to confirm operation or anything else to abort: ").lower() not in ("y", "yes"):
sys.exit("Operation cancelled by user. Exiting...")
logger.info(f"will del count {self.del_count}")
will_del_count = self.del_count
for key in self.md5_dict:
self.__rm_files(self.md5_dict[key], del_dir)
will_del_count -= len(self.md5_dict[key])-1
logger.info(f"del... have {will_del_count}")
logger.info(f"del count {self.del_count} over")
# 日志级别设置
def set_log_level(level):
log_levels = {
0: logging.DEBUG,
1: logging.INFO,
2: logging.WARNING,
3: logging.ERROR,
4: logging.CRITICAL
}
if level in log_levels:
logger.setLevel(log_levels[level])
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.description = 'please enter correct para'
parser.add_argument("-s", "--src_dir", help="first dir", type=str)
parser.add_argument("-d", "--del_dir",help="second dir, if dup, del file in this dir", type=str)
parser.add_argument(
"-l", "--log_level", help="0-4, debug,info,warning,error,critical", type=int, default=1)
parser.add_argument("-e", "--exps", help="not del ext", type=str, nargs='+')
parser.add_argument("-a", "--auto", action="store_false", help="auto del, not answer")
args = parser.parse_args()
set_log_level(args.log_level)
logger.info(f"Starting..., {args.src_dir}, {args.del_dir}, {args.exps}, {args.auto}")
pro = file_process()
pro.rm_dup(args.src_dir, args.del_dir, args.exps, args.auto)
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册