83 lines
2.6 KiB
Python
83 lines
2.6 KiB
Python
|
import numpy
|
||
|
import theano
|
||
|
import theano.tensor as T
|
||
|
rng = numpy.random
|
||
|
|
||
|
N = 400 # training sample size
|
||
|
feats = 784 # number of input variables
|
||
|
|
||
|
# generate a dataset: D = (input_values, target_class)
|
||
|
D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
|
||
|
training_steps = 10000
|
||
|
|
||
|
# Declare Theano symbolic variables
|
||
|
x = T.dmatrix("x")
|
||
|
y = T.dvector("y")
|
||
|
|
||
|
# initialize the weight vector w randomly
|
||
|
#
|
||
|
# this and the following bias variable b
|
||
|
# are shared so they keep their values
|
||
|
# between training iterations (updates)
|
||
|
w = theano.shared(rng.randn(feats), name="w")
|
||
|
|
||
|
# initialize the bias term
|
||
|
b = theano.shared(0., name="b")
|
||
|
|
||
|
print("Initial model:")
|
||
|
print(w.get_value())
|
||
|
print(b.get_value())
|
||
|
|
||
|
# Construct Theano expression graph
|
||
|
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1
|
||
|
prediction = p_1 > 0.5 # The prediction thresholded
|
||
|
xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
|
||
|
cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
|
||
|
gw, gb = T.grad(cost, [w, b]) # Compute the gradient of the cost
|
||
|
# w.r.t weight vector w and
|
||
|
# bias term b
|
||
|
# (we shall return to this in a
|
||
|
# following section of this tutorial)
|
||
|
|
||
|
|
||
|
def set_value_at_position(x, y, prediction, xent, w, b):
|
||
|
p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1
|
||
|
prediction = p_1 > 0.5 # The prediction thresholded
|
||
|
xent = -y * T.log(p_1) - (1 - y) * T.log(1 - p_1) # Cross-entropy loss function
|
||
|
cost = xent.mean() + 0.01 * (w ** 2).sum() # The cost to minimize
|
||
|
gw, gb = T.grad(cost, [w, b])
|
||
|
w = w - 0.1 * gw
|
||
|
b = b - 0.1 * gb
|
||
|
return w, b
|
||
|
|
||
|
|
||
|
result, updates = theano.scan(fn=set_value_at_position,
|
||
|
outputs_info=[prediction, xent],
|
||
|
sequences=[x, y],
|
||
|
non_sequences=[w, b],
|
||
|
n_steps=training_steps)
|
||
|
|
||
|
calculate_scan = theano.function(inputs=[x, y], outputs=[prediction, xent], updates=updates)
|
||
|
|
||
|
|
||
|
# Compile
|
||
|
train = theano.function(
|
||
|
inputs=[x,y],
|
||
|
outputs=[prediction, xent],
|
||
|
updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
|
||
|
predict = theano.function(inputs=[x], outputs=prediction)
|
||
|
|
||
|
|
||
|
|
||
|
# Train
|
||
|
for i in range(training_steps):
|
||
|
pred, err = train(D[0], D[1])
|
||
|
|
||
|
print("Final model:")
|
||
|
print(w.get_value())
|
||
|
print(b.get_value())
|
||
|
print("target values for D:")
|
||
|
print(D[1])
|
||
|
print("prediction on D:")
|
||
|
print(predict(D[0]))
|