作者:mobiledu2502926273 | 来源:互联网 | 2024-12-12 14:21
NumPy 实现逻辑回归
# -*- coding: utf-8 -*-
"""
创建时间:2014年12月09日 21:54:00
作者:wepon
功能描述:
- loadData 函数从指定目录加载所有文件,将32x32图像转换为1x1024的一维向量,返回m个样本的1024维矩阵及对应标签。
- sigmoid 函数实现Sigmoid激活函数。
- gradAscent 函数使用梯度下降法求解回归系数。
- classfy 函数基于回归系数对新样本进行分类。
"""
#!/usr/bin/python
import numpy as np
from os import listdir
def loadData(directory):
fileList = listdir(directory) # 获取目录下的所有文件名
numFiles = len(fileList) # 文件数量
dataMatrix = np.zeros((numFiles, 1024)) # 初始化数据矩阵
labels = np.zeros((numFiles, 1)) # 初始化标签数组
for index in range(numFiles):
featureVector = np.zeros((1, 1024)) # 单个文件的特征向量
fileName = fileList[index]
with open(f'{directory}/{fileName}') as file:
for row in range(32):
line = file.readline().strip()
for col in range(32):
featureVector[0, 32*row + col] = int(line[col]) # 将32x32的数字矩阵转换为1x1024的向量
dataMatrix[index, :] = featureVector # 存储特征向量
label = int(fileName.split('_')[0]) # 解析文件名获取标签
labels[index] = label # 存储标签
return dataMatrix, labels
def sigmoid(inputX):
return 1.0 / (1 + np.exp(-inputX)) # Sigmoid函数实现
def gradientAscent(dataMatrix, labels, alpha=0.01, maxCycles=500):
dataMatrix = np.mat(dataMatrix)
labelMatrix = np.mat(labels).transpose()
m, n = np.shape(dataMatrix)
weights = np.ones((n, 1))
for cycle in range(maxCycles):
h = sigmoid(dataMatrix * weights)
error = labelMatrix - h
weights += alpha * dataMatrix.transpose() * error # 更新权重
return weights
def classify(testDirectory, weights):
testData, testLabels = loadData(testDirectory)
testDataMatrix = np.mat(testData)
testLabelMatrix = np.mat(testLabels).transpose()
predictiOns= sigmoid(testDataMatrix * weights)
m = len(predictions)
errorCount = 0
for i in range(m):
if predictions[i] > 0.5:
print(f'预测结果: 1, 实际标签: {testLabelMatrix[i][0]}')
if testLabelMatrix[i][0] != 1:
errorCount += 1
else:
print(f'预测结果: 0, 实际标签: {testLabelMatrix[i][0]}')
if testLabelMatrix[i][0] != 0:
errorCount += 1
print(f'错误率: {errorCount / m:.4f}')
def digitRecognition(trainingDirectory, testingDirectory, alpha=0.01, maxCycles=500):
trainingData, trainingLabels = loadData(trainingDirectory)
weights = gradientAscent(trainingData, trainingLabels, alpha, maxCycles)
classify(testingDirectory, weights)
# 示例调用
trainingPath = 'path_to_training_data'
testingPath = 'path_to_testing_data'
digitRecognition(trainingPath, testingPath, alpha=0.01, maxCycles=500)
TensorFlow 实现逻辑回归
import tensorflow as tf
# 导入MNIST数据
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
# 参数设置
learningRate = 0.01
epochs = 25
batchSize = 100
displayStep = 1
# 输入占位符
x = tf.placeholder(tf.float32, [None, 784]) # MNIST图像数据,形状为28*28=784
y = tf.placeholder(tf.float32, [None, 10]) # 0-9数字识别,共10类
# 定义模型参数
weights = tf.Variable(tf.zeros([784, 10]))
bias = tf.Variable(tf.zeros([10]))
# 构建模型
predictiOns= tf.nn.softmax(tf.matmul(x, weights) + bias) # 使用Softmax函数作为激活函数
# 定义损失函数和优化器
loss = tf.reduce_mean(-tf.reduce_sum(y * tf.log(predictions), reduction_indices=[1])) # 交叉熵损失
optimizer = tf.train.GradientDescentOptimizer(learningRate).minimize(loss) # 梯度下降优化器
# 初始化变量
init = tf.global_variables_initializer()
# 开始训练
with tf.Session() as session:
session.run(init)
for epoch in range(epochs):
avgLoss = 0.
totalBatches = int(mnist.train.num_examples / batchSize)
for batchIndex in range(totalBatches):
batchX, batchY = mnist.train.next_batch(batchSize)
_, currentLoss = session.run([optimizer, loss], feed_dict={x: batchX, y: batchY})
avgLoss += currentLoss / totalBatches
if (epoch + 1) % displayStep == 0:
print(f'Epoch: {epoch + 1}, Loss: {avgLoss:.9f}')
print('优化完成!')
# 测试模型准确性
correctPredictiOns= tf.equal(tf.argmax(predictions, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correctPredictions, tf.float32))
print(f'准确率: {accuracy.eval({x: mnist.test.images[:3000], y: mnist.test.labels[:3000]})}')