一个 PyTorch FishData 例子

import torch
import pandas as pd

url = "https://raw.githubusercontent.com/kittenpub/database-repository/main/Fish_Dataset_Pytorch.csv"
fish_data = pd.read_csv(url)
print(fish_data.head())

#   Species  Weight  Length1  Length2  Length3   Height   Width
# 0   Bream   242.0     23.2     25.4     30.0  11.5200  4.0200
# 1   Bream   290.0     24.0     26.3     31.2  12.4800  4.3056
# 2   Bream   340.0     23.9     26.5     31.1  12.3778  4.6961
# 3   Bream   363.0     26.3     29.0     33.5  12.7300  4.4555
# 4   Bream   430.0     26.5     29.0     34.0  12.4440  5.1340

features = fish_data.drop('Species', axis=1).values
labels = fish_data['Species'].values

features_tensor = torch.tensor(features, dtype=torch.float32)
print(f"Shape of the fish tensor: {features_tensor.shape}")

# # Reshape into 2 columns with inferred rows
reshaped_tensor = features_tensor.reshape(-1, 2)
print(f"Shape of the reshaped tensor: {reshaped_tensor.shape}")

print(reshaped_tensor[:10])  # 前10行所有列


sliced_tensor = reshaped_tensor[:5, :3]
print(sliced_tensor)  # 前5行前3列

# perform aggregation operations on the Fish Dataset.
# dim=0 → 按列计算（跨行），即 计算每个特征的平均值。
# dim=1 → 按行计算（跨列），即 计算每条鱼的平均特征值。

mean_tensor = torch.mean(reshaped_tensor, dim=0) # Mean across each column
sum_tensor = torch.sum(reshaped_tensor, dim=1) # Sum across each row

print(f"Mean Tensor (Column-wise): {mean_tensor}")
print(f"Sum Tensor (Row-wise): {sum_tensor}")


#  Z-Score : Normalize the tensor (mean = 0, std = 1)
mean = reshaped_tensor.mean(dim=0, keepdim=True)
std = reshaped_tensor.std(dim=0, keepdim=True)
normalized_tensor = (reshaped_tensor - mean) / std
print(f"Normalized Tensor:\n{normalized_tensor}")

# Z-Score 标准化公式：(x - μ) / σ
# μ 是均值
# σ 是标准差
THE END