#python #file #directory #copy #shutil
Вопрос:
Я использую shutil.copy
для отправки файла в предназначенную папку в каталоге на основе имени файла.
У меня есть следующий список файлов в папке под названием «Загруженные данные».:
Subject_Blue_Cold_D_A_1.csv
Subject_Blue_Cold_D_A_2.csv
Subject_Blue_Cold_D_A_3.csv
Subject_Blue_Cold_D_B_1.csv
Subject_Blue_Cold_D_B_2.csv
Subject_Blue_Cold_D_B_3.csv
Subject_Blue_Cold_D_C_1.csv
Subject_Blue_Cold_D_C_2.csv
Subject_Blue_Cold_D_C_3.csv
Subject_Blue_Mild_D_A_1.csv
Subject_Blue_Mild_D_A_2.csv
Subject_Blue_Mild_D_A_3.csv
Subject_Blue_Mild_D_B_1.csv
Subject_Blue_Mild_D_B_2.csv
Subject_Blue_Mild_D_B_3.csv
Subject_Blue_Mild_D_C_1.csv
Subject_Blue_Mild_D_C_2.csv
Subject_Blue_Mild_D_C_3.csv
Subject_Blue_Hot_D_A_1.csv
Subject_Blue_Hot_D_A_2.csv
Subject_Blue_Hot_D_A_3.csv
Subject_Blue_Hot_D_B_1.csv
Subject_Blue_Hot_D_B_2.csv
Subject_Blue_Hot_D_B_3.csv
Subject_Blue_Hot_D_C_1.csv
Subject_Blue_Hot_D_C_2.csv
Subject_Blue_Hot_D_C_3.csv
Subject_Red_Cold_D_A_1.csv
Subject_Red_Cold_D_A_2.csv
Subject_Red_Cold_D_A_3.csv
Subject_Red_Cold_D_B_1.csv
Subject_Red_Cold_D_B_2.csv
Subject_Red_Cold_D_B_3.csv
Subject_Red_Cold_D_C_1.csv
Subject_Red_Cold_D_C_2.csv
Subject_Red_Cold_D_C_3.csv
Subject_Red_Mild_D_A_1.csv
Subject_Red_Mild_D_A_2.csv
Subject_Red_Mild_D_A_3.csv
Subject_Red_Mild_D_B_1.csv
Subject_Red_Mild_D_B_2.csv
Subject_Red_Mild_D_B_3.csv
Subject_Red_Mild_D_C_1.csv
Subject_Red_Mild_D_C_2.csv
Subject_Red_Mild_D_C_3.csv
Subject_Red_Hot_D_A_1.csv
Subject_Red_Hot_D_A_2.csv
Subject_Red_Hot_D_A_3.csv
Subject_Red_Hot_D_B_1.csv
Subject_Red_Hot_D_B_2.csv
Subject_Red_Hot_D_B_3.csv
Subject_Red_Hot_D_C_1.csv
Subject_Red_Hot_D_C_2.csv
Subject_Red_Hot_D_C_3.csv
Затем мне нужно отсортировать все эти файлы в их предполагаемую подпапку в папке под названием «Input_Data. И поэтому мне нужно поместить эти файлы в следующий каталог файлов на основе их имен файлов:
Input_Data > Blue_Cold
> DA
> DB
> DC
> Blue_Mild
> DA
> DB
> DC
> Blue_Hot
> DA
> DB
> DC
> Red_Cold
> DA
> DB
> DC
> Red_Mild
> DA
> DB
> DC
> Red_Hot
> DA
> DB
> DC
Вот код, который я использую до сих пор:
import os
source_folder = os.path.join("Downloaded_Data")
for filename in os.listdir(source_folder):
name_parts = filename.split("_")
first_folder = "_".join([name_parts[1], name_parts[2]])
second_folder = "D" name_parts[4]
dest_root_dir = Path('Input_Data')
dest_dir = dest_root_dir.joinpath(first_folder, second_folder, filename)
dest_dir.mkdir(exist_ok=True)
dest_filepath = dest_dir.joinpath(filename)
shutil.copy2(filename, dest_dir)
И я получаю следующую ошибку:
---------------------------------------------------------------------------
FileNotFoundError Traceback (most recent call last)
<ipython-input-50-257fc6665649> in <module>()
29 dest_dir.mkdir(exist_ok=True)
30 dest_filepath = dest_dir.joinpath(filename)
---> 31 shutil.copy2(filename, dest_dir)
32
33
~Anaconda3libshutil.py in copy2(src, dst, follow_symlinks)
255 if os.path.isdir(dst):
256 dst = os.path.join(dst, os.path.basename(src))
--> 257 copyfile(src, dst, follow_symlinks=follow_symlinks)
258 copystat(src, dst, follow_symlinks=follow_symlinks)
259 return dst
~Anaconda3libshutil.py in copyfile(src, dst, follow_symlinks)
118 os.symlink(os.readlink(src), dst)
119 else:
--> 120 with open(src, 'rb') as fsrc:
121 with open(dst, 'wb') as fdst:
122 copyfileobj(fsrc, fdst)
FileNotFoundError: [Errno 2] No such file or directory: 'Subject_Blue_Cold_D_A_1.csv .csv'
Почему я получаю эту ошибку, если этот файл существует и ранее распознан кодом? Как я могу это исправить, чтобы эти файлы оказались в правильной папке?
Ответ №1:
os.listdir
возвращает только базовые имена файлов, без их имен путей. При выполнении любых операций, основанных на именах файлов, вы должны добавлять имена файлов к именам путей:
shutil.copy2(os.path.join(source_folder, filename), dest_dir)
Ответ №2:
Есть простое решение.
import pandas as pd
from pathlib import Path
import shutil
def custom_copy(series: Path, source_path: Path):
source = source_path.joinpath(series.name)
series.parent.mkdir(exist_ok=True, parents=True)
shutil.copy2(source, series.parent)
source_path = Path("./Downloaded_Data/")
target_path = Path("./Input_Data/")
# file_pattern: extract info from file name.
# group a: (?P<a>[a-zA-Z] _[a-zA-Z] ) -> Red_Mild
# group b: (?P<b>[A-Z]_[A-Z]) -> D_A
# group c: (?P<c>[a-zA-Z] _(?P<a>[a-zA-Z] _[a-zA-Z] )_(?P<b>[A-Z]_[A-Z])_d.csv) -> Subject_Blue_Cold_D_A_1.csv
file_pattern = r"(?P<c>[a-zA-Z] _(?P<a>[a-zA-Z] _[a-zA-Z] )_(?P<b>[A-Z]_[A-Z])_d.csv)"
_ = (
pd.Series(source_path.glob("Subject_*.csv")) # 1. get all files from the source folder.
.apply(lambda x: x.name) # 2. extract filenames form files.
.str.extract(file_pattern) # 3. extract file patterns from filenames.
.assign(b=lambda x: x.b.str.replace("_", "")) # 4. D_A -> DA: can be simplified
.sort_index(axis=1) # 5. sort groups to a-b-c.
.apply(lambda x: target_path.joinpath(*x.values), axis=1) # 6. construct target file path.
.apply(custom_copy, source_path=source_path) # 7. execute the copy command.
)
И в результате получается:
$ tree Input_Data
Input_Data
├── Blue_Cold
│ ├── DA
│ │ ├── Subject_Blue_Cold_D_A_1.csv
│ │ ├── Subject_Blue_Cold_D_A_2.csv
│ │ └── Subject_Blue_Cold_D_A_3.csv
│ ├── DB
│ │ ├── Subject_Blue_Cold_D_B_1.csv
│ │ ├── Subject_Blue_Cold_D_B_2.csv
│ │ └── Subject_Blue_Cold_D_B_3.csv
│ └── DC
│ ├── Subject_Blue_Cold_D_C_1.csv
│ ├── Subject_Blue_Cold_D_C_2.csv
│ └── Subject_Blue_Cold_D_C_3.csv
└── Red_Mild
├── DA
│ ├── Subject_Red_Mild_D_A_1.csv
│ ├── Subject_Red_Mild_D_A_2.csv
│ └── Subject_Red_Mild_D_A_3.csv
├── DB
│ ├── Subject_Red_Mild_D_B_1.csv
│ ├── Subject_Red_Mild_D_B_2.csv
│ └── Subject_Red_Mild_D_B_3.csv
└── DC
├── Subject_Red_Mild_D_C_1.csv
├── Subject_Red_Mild_D_C_2.csv
└── Subject_Red_Mild_D_C_3.csv