# -*- coding: utf-8 -*- """ Created on Wed Jun 19 09:53:39 2019 @author: root """ import os,shutil from sklearn.cross_validation import train_test_split def splitDir(dirPath,random_state): path_type=['验证','测试','训练'] class_arr=[] class_name_arr=[] class_tmp=[] name_dir="name_dir" for (root, dirs, files) in os.walk(dirPath): if files: for f in files:if name_dir not in root: #切换下一个分类时,将上一个分类的数据存入 if name_dir!="name_dir":#第一次运行即刻进入,故需判断 print('class 【%s】 had read to array'%name_dir) class_arr.append(class_tmp) class_name_arr.append(name_dir) name_dir=root.split('\\')[-1] class_tmp=[]path = os.path.join(root,f)path=path.replace(dirPath,'')#删除根路径,消除不同根目录名称的影响class_tmp.append(path) #最后一个分类执行结束,没有被加入 class_arr.append(class_tmp) class_name_arr.append(name_dir) print('class 【%s】 had read to array'%name_dir) for class_tmp,class_name in zip(class_arr,class_name_arr): #1、将数据划分为训练集、测试集两部分 使用随机数种子,确保可以复现 train, test = train_test_split(class_tmp, test_size = 0.4,random_state=random_state) #2、将测试集数据划分为 测试集 和 验证集 两部分 test,vaild = train_test_split(class_tmp, test_size = 0.5,random_state=random_state)#使用随机数种子,确保可以复现 data_split=[train,test,vaild] print('had deal to 【%s】 '%class_name) for data,dtype in zip(data_split,path_type): for path in data:path=dirPath+pathfileName=path.replace(dirPath,dirPath+'-'+dtype)fpath,fname=os.path.split(fileName) #分离文件名和路径if not os.path.exists(fpath): os.makedirs(fpath) #创建路径shutil.copyfile(path,fileName) splitDir (r'flower_photos_HSV',random_state=12345)