n =100000 k = 50000.0 _dict_f64_primitive = {"open": [random.random() +k for i in range(n)] , "high":[random.random() +k for i in range(n)] , "low":[random.random() +k for i in range(n)] , "close":[random.random() +k for i in range(n)] , } _dict_f64_np = {"open": np.array(_dict_f64_primitive["open"]) , "high":np.array(_dict_f64_primitive["high"]) , "low":np.array(_dict_f64_primitive["low"]) , "close":np.array(_dict_f64_primitive["close"]) , } _dict_f64_np2 = {"open": np.array([random.random() +k for i in range(n)] ) , "high":np.array([random.random() +k for i in range(n)] ) , "low":np.array([random.random() +k for i in range(n)] ) , "close":np.array([random.random() +k for i in range(n)] ) , } print(f"_dict_f64_primitive size : {sys.getsizeof(_dict_f64_primitive)}") print(f"_dict_f64_np size : {sys.getsizeof(_dict_f64_np)}") print(f"_dict_f64_np2 size : {sys.getsizeof(_dict_f64_np2)}\n") t0 = t.time() pk_primitive = pickle.dumps(_dict_f64_primitive) t1 = t.time() pk_np = pickle.dumps(_dict_f64_np) t2 = t.time() pk_np2 = pickle.dumps(_dict_f64_np2) t3 = t.time() gzip_primitive = gzip.compress(pk_primitive) t4 =t.time() gzip_np = gzip.compress(pk_np) t5 =t.time() gzip_np2 = gzip.compress(pk_np2) t6 =t.time() print(f"pk_primitive -> binary cost time :{t1-t0} seconds") print(f"pk_np -> binary cost time :{t2-t1} seconds") print(f"pk_np2 -> binary cost time :{t3-t2} seconds\n") print(f"pk_primitive -> cpmpress cost time :{t4-t3} seconds") print(f"pk_np -> compress cost time :{t5-t4} seconds") print(f"pk_np2 -> compress cost time :{t6-t5} seconds")
输出:
_dict_f64_primitive size : 232 _dict_f64_np size : 232 _dict_f64_np2 size : 232 pk_primitive -> binary cost time :0.012560844421386719 seconds pk_np -> binary cost time :0.006933927536010742 seconds pk_np2 -> binary cost time :0.0059435367584228516 seconds pk_primitive -> cpmpress cost time :3.5468034744262695 seconds pk_np -> compress cost time :2.197758674621582 seconds pk_np2 -> compress cost time :2.230668783187866 seconds
但这三者占用空间又相同。但序列化和压缩用时有一定差异。感觉numpy要快一些。
三、转换的偏差
numbers = [12.345888888888888888888,123456789.0,123456789.978654412,123456782229.978,3309.07,7896.353,123456789.88] for number in numbers: assert isinstance(number,float) f_64 = np.float64(number) f_32 = np.float32(number) f_32_64 = np.float64(f_32)
error = f_64 - f_32_64 print(f"f_64 : {type(f_64)} value :{f_64} ") print(f"f_32 : {type(f_32)} value :{f_32} error : {error}" ) print(f"f_32_64 : {type(f_32_64)} value :{f_32} \n" )
输出:
f_64 : value :12.345888888888888 f_32 : value :12.3458890914917 error : -2.0260281097250754e-07 f_32_64 : value :12.3458890914917 f_64 : value :123456789.0 f_32 : value :123456792.0 error : -3.0 f_32_64 : value :123456792.0 f_64 : value :123456789.97865441 f_32 : value :123456792.0 error : -2.0213455855846405 f_32_64 : value :123456792.0 f_64 : value :123456782229.978 f_32 : value :123456782336.0 error : -106.02200317382812 f_32_64 : value :123456782336.0 f_64 : value :3309.07 f_32 : value :3309.070068359375 error : -6.835937483629095e-05 f_32_64 : value :3309.070068359375 f_64 : value :7896.353 f_32 : value :7896.35302734375 error : -2.734374993451638e-05 f_32_64 : value :7896.35302734375 f_64 : value :123456789.88 f_32 : value :123456792.0 error : -2.1200000047683716 f_32_64 : value :123456792.0
比如:123456789.0 【f64】 ->【f32】这么简单转换却存在难以理解的偏差:
f_32 : value :123456792.0 error : -3.0 f_32_64 : value :123456792.0