81 lines
3.0 KiB
Python
81 lines
3.0 KiB
Python
from pathlib import Path
|
|
|
|
ASMRThreePath = Path("C:\\ASMRThree")
|
|
ASMRTwoPath = Path("D:\\ASMRTwo")
|
|
ASMROnePath = Path("E:\\ASMROne")
|
|
|
|
size_one, size_two, size_three = 0, 0, 0
|
|
files_one, files_two, files_three = [], [], []
|
|
folders_one, folders_two, folders_three = [], [], []
|
|
|
|
# Statistic calculation for ASMROne
|
|
for root, dirs, files in ASMROnePath.walk(): # Root will iterate through all folders
|
|
if root.absolute() != ASMROnePath.absolute(): # Skip root of ASMROnePath
|
|
folders_one.append(root) # Add folder to list
|
|
for fname in files: # Iterate through all files in current root
|
|
file = root/fname # Get file path
|
|
assert file.is_file()
|
|
files_one.append(file)
|
|
size_one += file.stat().st_size # Get file size
|
|
|
|
# Statistic calculation for ASMRTwo
|
|
for root, dirs, files in ASMRTwoPath.walk(): # Root will iterate through all folders
|
|
if root.absolute() != ASMRTwoPath.absolute(): # Skip root of ASMRTwoPath
|
|
folders_two.append(root) # Add folder to list
|
|
for fname in files: # Iterate through all files in current root
|
|
file = root/fname # Get file path
|
|
assert file.is_file()
|
|
files_two.append(file)
|
|
size_two += file.stat().st_size # Get file size
|
|
|
|
# Statistic calculation for ASMRThree
|
|
for root, dirs, files in ASMRThreePath.walk(): # Root will iterate through all folders
|
|
if root.absolute() != ASMRThreePath.absolute(): # Skip root of ASMRThreePath
|
|
folders_three.append(root) # Add folder to list
|
|
for fname in files: # Iterate through all files in current root
|
|
file = root/fname # Get file path
|
|
assert file.is_file()
|
|
files_three.append(file)
|
|
size_three += file.stat().st_size # Get file size
|
|
|
|
DataSubsetPaths = [ASMROnePath, ASMRTwoPath, ASMRThreePath]
|
|
DLSiteWorksPaths = []
|
|
# Collect ASMR Works (RJ ID, Paths)
|
|
for ASMRSubsetPath in DataSubsetPaths:
|
|
for WorkPaths in ASMRSubsetPath.iterdir():
|
|
DLSiteWorksPaths.append(WorkPaths)
|
|
|
|
fileExt2fileType = {
|
|
".TXT": "Document",
|
|
".WAV": "Audio",
|
|
".MP3": "Audio",
|
|
".PNG": "Image",
|
|
".JPG": "Image",
|
|
".VTT": "Subtitle",
|
|
".PDF": "Document",
|
|
".FLAC": "Audio",
|
|
".MP4": "Video",
|
|
".LRC": "Subtitle",
|
|
".SRT": "Subtitle",
|
|
".JPEG": "Image",
|
|
".ASS": "Subtitle",
|
|
"": "NO EXTENSION",
|
|
".M4A": "Audio",
|
|
".MKV": "Video"
|
|
}
|
|
fileext_stat = {}
|
|
file_list = files_one + files_two + files_three
|
|
file_list_count = len(file_list)
|
|
|
|
for file in file_list:
|
|
f_ext = file.suffix.upper()
|
|
if (f_ext in fileext_stat.keys()):
|
|
fileext_stat[f_ext]['Count'] += 1
|
|
fileext_stat[f_ext]['List'].append(file)
|
|
fileext_stat[f_ext]['ExtensionMass'] += file.stat().st_size
|
|
else:
|
|
fileext_stat[f_ext] = {}
|
|
fileext_stat[f_ext]['Count'] = 1
|
|
fileext_stat[f_ext]['List'] = [file]
|
|
fileext_stat[f_ext]['ExtensionMass'] = file.stat().st_size # The total sum of sizes of the same file extension
|
|
fileext_stat[f_ext]['MediaType'] = fileExt2fileType[f_ext] |