diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/theano_tutorial/logistic_regression.py b/theano_tutorial/logistic_regression.py new file mode 100644 index 0000000..26a683d --- /dev/null +++ b/theano_tutorial/logistic_regression.py @@ -0,0 +1,61 @@ +import numpy +import theano +import theano.tensor as T +rng = numpy.random + +N = 400 # training sample size +feats = 784 # number of input variables + +# generate a dataset: D = (input_values, target_class) +D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2)) +training_steps = 10000 + +# Declare Theano symbolic variables +x = T.dmatrix("x") +y = T.dvector("y") + +# initialize the weight vector w randomly +# +# this and the following bias variable b +# are shared so they keep their values +# between training iterations (updates) +w = theano.shared(rng.randn(feats), name="w") + +# initialize the bias term +b = theano.shared(0., name="b") + +print("Initial model:") +print(w.get_value()) +print(b.get_value()) + +# Construct Theano expression graph +p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b)) # Probability that target = 1 +prediction = p_1 > 0.5 # The prediction thresholded +xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function +cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize +gw, gb = T.grad(cost, [w, b]) # Compute the gradient of the cost + # w.r.t weight vector w and + # bias term b + # (we shall return to this in a + # following section of this tutorial) + +# Compile +train = theano.function( + inputs=[x,y], + outputs=[prediction, xent], + updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb))) +predict = theano.function(inputs=[x], outputs=prediction) + + + +# Train +for i in range(training_steps): + pred, err = train(D[0], D[1]) + +print("Final model:") +print(w.get_value()) +print(b.get_value()) +print("target values for D:") +print(D[1]) +print("prediction on D:") +print(predict(D[0])) diff --git a/theano_tutorial/tutorial_conditions.py b/theano_tutorial/tutorial_conditions.py new file mode 100644 index 0000000..db0e855 --- /dev/null +++ b/theano_tutorial/tutorial_conditions.py @@ -0,0 +1,34 @@ +# if: (if(smth) else) +# switch: (if(smth) elif(smth)) + +from theano import tensor as T +from theano.ifelse import ifelse +import theano, time, numpy + +a,b = T.scalars('a', 'b') +x,y = T.matrices('x', 'y') + +z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y)) +z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y)) + +f_switch = theano.function([a, b, x, y], z_switch, + mode=theano.Mode(linker='vm')) +f_lazyifelse = theano.function([a, b, x, y], z_lazy, + mode=theano.Mode(linker='vm')) + +val1 = 0. +val2 = 1. +big_mat1 = numpy.ones((10000, 1000)) +big_mat2 = numpy.ones((10000, 1000)) + +n_times = 10 + +tic = time.clock() +for i in range(n_times): + f_switch(val1, val2, big_mat1, big_mat2) +print('time spent evaluating both values %f sec' % (time.clock() - tic)) + +tic = time.clock() +for i in range(n_times): + f_lazyifelse(val1, val2, big_mat1, big_mat2) +print('time spent evaluating one value %f sec' % (time.clock() - tic)) \ No newline at end of file diff --git a/theano_tutorial/tutorial_derivates.py b/theano_tutorial/tutorial_derivates.py new file mode 100644 index 0000000..ed1a295 --- /dev/null +++ b/theano_tutorial/tutorial_derivates.py @@ -0,0 +1,94 @@ +import numpy as np +import theano +import theano.tensor as T + +# normal gradient +x = T.dscalar('x') +z = T.dscalar('z') +y = x ** 3 + z ** 2 +gy = T.grad(y, [x, z]) + +f = theano.function([x, z], gy) + +# print(theano.pp(f.maker.fgraph.outputs[0])) +# print(theano.pp(f.maker.fgraph.outputs[1])) + +print(f(4, 8)) + +# logistic gradient +x = T.dmatrix('x') +l = T.sum(1 / (1 + T.exp(-x))) +gl = T.grad(l, x) + +f_lg = theano.function([x], gl) + +print(f_lg([[0, 1], [-1, -2]])) + +# np.matrix([[1, 2], [3, 4]]) + +# jacobian matrix +print('jacobian matrix1') +x = T.dvector('x') +y = x ** 2 +J, updates = theano.scan(lambda i, y, x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x]) +f = theano.function([x], J, updates=updates) +print(f([1, 2, 3, 4, 5])) + +# already implemented jacobian matrix +# W, V = T.dmatrices('W', 'V') +J = theano.gradient.jacobian(y, x) +f2 = theano.function([x], J) +print(f2([1, 2, 3, 4, 5])) + +# jacobian matrix with matrix :) +W, V = T.dmatrices('W', 'V') +x = T.dvector('x') +y = T.dot(x, W) +J = theano.gradient.jacobian(y, W) +f2 = theano.function([W, x], J) +print(f2(np.array([[1, 1], [1, 1]]), np.array([0, 1]))) + +JV2 = T.dot(J, V) +f2 = theano.function([W, V, x], JV2) +print(f2(np.array([[1, 1], [1, 1]]), np.array([[2, 2], [2, 2]]), np.array([0, 1]))) + + +print('jacobian matrix2') +x = T.dvector('x') +z = T.dvector('z') +y = x ** 2 + z ** 2 +J, updates = theano.scan(lambda i, y, x, z: T.grad(y[i], [x, z]), sequences=T.arange(y.shape[0]), non_sequences=[y,x,z]) +f = theano.function([x, z], J, updates=updates) +test = T.arange(y.shape[0]) +t_f = theano.function([x, z], test) +print(f([4, 4], [1, 1])) +print(t_f([4, 4], [1, 1])) + +# hessian matrix +x = T.dvector('x') +y = x ** 3 +cost = y.sum() +gy = T.grad(cost, x) +H, updates = theano.scan(lambda i, gy, x : T.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x]) +f = theano.function([x], H, updates=updates) +print(f([4, 4])) + +# jacobian times vector + +# R-operator +W = T.dmatrix('W') +V = T.dmatrix('V') +x = T.dvector('x') +y = T.dot(x, W) +JV = T.Rop(y, W, V) +f = theano.function([W, V, x], JV) +print(f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1])) + +# L-operator +W = T.dmatrix('W') +v = T.dvector('v') +x = T.dvector('x') +y = T.dot(x, W) +VJ = T.Lop(y, W, v) +f = theano.function([v,x], VJ) +print(f([2, 2], [0, 1])) \ No newline at end of file diff --git a/theano_tutorial/tutorial_loop.py b/theano_tutorial/tutorial_loop.py new file mode 100644 index 0000000..7dbebe0 --- /dev/null +++ b/theano_tutorial/tutorial_loop.py @@ -0,0 +1,100 @@ +import theano +import theano.tensor as T + +k = T.iscalar("k") +A = T.vector("A") + +# Symbolic description of the result +result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A, + outputs_info=T.ones_like(A), + non_sequences=A, + n_steps=k) + +# We only care about A**k, but scan has provided us with A**1 through A**k. +# Discard the values that we don't care about. Scan is smart enough to +# notice this and not waste memory saving them. +final_result = result[-1] + +# compiled function that returns A**k +power = theano.function(inputs=[A,k], outputs=final_result, updates=updates) + +print(power(range(10),2)) +print(power(range(10),4)) + +print('P2:') +import numpy + +coefficients = theano.tensor.vector("coefficients") +x = T.scalar("x") + +max_coefficients_supported = 10000 + +# Generate the components of the polynomial +components, updates = theano.scan(fn=lambda coefficient, power, prior_result, free_variable: prior_result + (coefficient * (free_variable ** power)), + outputs_info=T.zeros(1), + sequences=[coefficients, theano.tensor.arange(max_coefficients_supported)], + non_sequences=x) +# Sum them up +polynomial = components.sum() + +pol = components[-1] + +# Compile a function +calculate_polynomial = theano.function(inputs=[coefficients, x], outputs=components) + +# Test +test_coefficients = numpy.asarray([1, 0, 2], dtype=numpy.float32) +test_value = 3 +print(calculate_polynomial(test_coefficients, test_value)) +print(1.0 * (3 ** 0) + 0.0 * (3 ** 1) + 2.0 * (3 ** 2)) + +print('P3:') +import numpy as np +import theano +import theano.tensor as T + +up_to = T.iscalar("up_to") + +# define a named function, rather than using lambda +def accumulate_by_adding(arange_val, prior_result): + return prior_result + arange_val +seq = T.arange(up_to) + +# An unauthorized implicit downcast from the dtype of 'seq', to that of +# 'T.as_tensor_variable(0)' which is of dtype 'int8' by default would occur +# if this instruction were to be used instead of the next one: +# outputs_info = T.as_tensor_variable(0) + +outputs_info = T.as_tensor_variable(np.asarray(0, seq.dtype)) +scan_result, scan_updates = theano.scan(fn=accumulate_by_adding, + outputs_info=outputs_info, + sequences=seq) +triangular_sequence = theano.function(inputs=[up_to], outputs=scan_result) + +# test +some_num = 15 +print(triangular_sequence(some_num)) +print([n * (n + 1) // 2 for n in range(some_num)]) + +print('P4:') +location = T.imatrix("location") +values = T.vector("values") +output_model = T.matrix("output_model") + +def set_value_at_position(a_location, a_value, output_model): + zeros = T.zeros_like(output_model) + zeros_subtensor = zeros[a_location[0], a_location[1]] + return T.set_subtensor(zeros_subtensor, a_value) + +result, updates = theano.scan(fn=set_value_at_position, + outputs_info=None, + sequences=[location, values], + non_sequences=output_model) + +assign_values_at_positions = theano.function(inputs=[location, values, output_model], outputs=result) + +# test +test_locations = numpy.asarray([[1, 1], [2, 3]], dtype=numpy.int32) +test_values = numpy.asarray([42, 50], dtype=numpy.float32) +test_output_model = numpy.zeros((5, 5), dtype=numpy.float32) +print(assign_values_at_positions(test_locations, test_values, test_output_model)) \ No newline at end of file