More tutorial

2017-04-13 11:10:23 +02:00 · 2017-04-13 11:10:23 +02:00 · 658c55d280
commit 658c55d280
parent 9455f26423
5 changed files with 295 additions and 0 deletions
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="$PROJECT_DIR$" vcs="Git" />
+  </component>
+</project>
--- a/theano_tutorial/logistic_regression.py
+++ b/theano_tutorial/logistic_regression.py
@ -0,0 +1,61 @@
+import numpy
+import theano
+import theano.tensor as T
+rng = numpy.random
+
+N = 400                                   # training sample size
+feats = 784                               # number of input variables
+
+# generate a dataset: D = (input_values, target_class)
+D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
+training_steps = 10000
+
+# Declare Theano symbolic variables
+x = T.dmatrix("x")
+y = T.dvector("y")
+
+# initialize the weight vector w randomly
+#
+# this and the following bias variable b
+# are shared so they keep their values
+# between training iterations (updates)
+w = theano.shared(rng.randn(feats), name="w")
+
+# initialize the bias term
+b = theano.shared(0., name="b")
+
+print("Initial model:")
+print(w.get_value())
+print(b.get_value())
+
+# Construct Theano expression graph
+p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   # Probability that target = 1
+prediction = p_1 > 0.5                    # The prediction thresholded
+xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
+cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
+gw, gb = T.grad(cost, [w, b])             # Compute the gradient of the cost
+                                          # w.r.t weight vector w and
+                                          # bias term b
+                                          # (we shall return to this in a
+                                          # following section of this tutorial)
+
+# Compile
+train = theano.function(
+          inputs=[x,y],
+          outputs=[prediction, xent],
+          updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
+predict = theano.function(inputs=[x], outputs=prediction)
+
+
+
+# Train
+for i in range(training_steps):
+    pred, err = train(D[0], D[1])
+
+print("Final model:")
+print(w.get_value())
+print(b.get_value())
+print("target values for D:")
+print(D[1])
+print("prediction on D:")
+print(predict(D[0]))
--- a/theano_tutorial/tutorial_conditions.py
+++ b/theano_tutorial/tutorial_conditions.py
@ -0,0 +1,34 @@
+# if: (if(smth) else)
+# switch: (if(smth) elif(smth))
+
+from theano import tensor as T
+from theano.ifelse import ifelse
+import theano, time, numpy
+
+a,b = T.scalars('a', 'b')
+x,y = T.matrices('x', 'y')
+
+z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))
+z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y))
+
+f_switch = theano.function([a, b, x, y], z_switch,
+                           mode=theano.Mode(linker='vm'))
+f_lazyifelse = theano.function([a, b, x, y], z_lazy,
+                               mode=theano.Mode(linker='vm'))
+
+val1 = 0.
+val2 = 1.
+big_mat1 = numpy.ones((10000, 1000))
+big_mat2 = numpy.ones((10000, 1000))
+
+n_times = 10
+
+tic = time.clock()
+for i in range(n_times):
+    f_switch(val1, val2, big_mat1, big_mat2)
+print('time spent evaluating both values %f sec' % (time.clock() - tic))
+
+tic = time.clock()
+for i in range(n_times):
+    f_lazyifelse(val1, val2, big_mat1, big_mat2)
+print('time spent evaluating one value %f sec' % (time.clock() - tic))
--- a/theano_tutorial/tutorial_derivates.py
+++ b/theano_tutorial/tutorial_derivates.py
@ -0,0 +1,94 @@
+import numpy as np
+import theano
+import theano.tensor as T
+
+# normal gradient
+x = T.dscalar('x')
+z = T.dscalar('z')
+y = x ** 3 + z ** 2
+gy = T.grad(y, [x, z])
+
+f = theano.function([x, z], gy)
+
+# print(theano.pp(f.maker.fgraph.outputs[0]))
+# print(theano.pp(f.maker.fgraph.outputs[1]))
+
+print(f(4, 8))
+
+# logistic gradient
+x = T.dmatrix('x')
+l = T.sum(1 / (1 + T.exp(-x)))
+gl = T.grad(l, x)
+
+f_lg = theano.function([x], gl)
+
+print(f_lg([[0, 1], [-1, -2]]))
+
+# np.matrix([[1, 2], [3, 4]])
+
+# jacobian matrix
+print('jacobian matrix1')
+x = T.dvector('x')
+y = x ** 2
+J, updates = theano.scan(lambda i, y, x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
+f = theano.function([x], J, updates=updates)
+print(f([1, 2, 3, 4, 5]))
+
+# already implemented jacobian matrix
+# W, V = T.dmatrices('W', 'V')
+J = theano.gradient.jacobian(y, x)
+f2 = theano.function([x], J)
+print(f2([1, 2, 3, 4, 5]))
+
+# jacobian matrix with matrix :)
+W, V = T.dmatrices('W', 'V')
+x = T.dvector('x')
+y = T.dot(x, W)
+J = theano.gradient.jacobian(y, W)
+f2 = theano.function([W, x], J)
+print(f2(np.array([[1, 1], [1, 1]]), np.array([0, 1])))
+
+JV2 = T.dot(J, V)
+f2 = theano.function([W, V, x], JV2)
+print(f2(np.array([[1, 1], [1, 1]]),  np.array([[2, 2], [2, 2]]), np.array([0, 1])))
+
+
+print('jacobian matrix2')
+x = T.dvector('x')
+z = T.dvector('z')
+y = x ** 2 + z ** 2
+J, updates = theano.scan(lambda i, y, x, z: T.grad(y[i], [x, z]), sequences=T.arange(y.shape[0]), non_sequences=[y,x,z])
+f = theano.function([x, z], J, updates=updates)
+test = T.arange(y.shape[0])
+t_f = theano.function([x, z], test)
+print(f([4, 4], [1, 1]))
+print(t_f([4, 4], [1, 1]))
+
+# hessian matrix
+x = T.dvector('x')
+y = x ** 3
+cost = y.sum()
+gy = T.grad(cost, x)
+H, updates = theano.scan(lambda i, gy, x : T.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
+f = theano.function([x], H, updates=updates)
+print(f([4, 4]))
+
+# jacobian times vector
+
+# R-operator
+W = T.dmatrix('W')
+V = T.dmatrix('V')
+x = T.dvector('x')
+y = T.dot(x, W)
+JV = T.Rop(y, W, V)
+f = theano.function([W, V, x], JV)
+print(f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1]))
+
+# L-operator
+W = T.dmatrix('W')
+v = T.dvector('v')
+x = T.dvector('x')
+y = T.dot(x, W)
+VJ = T.Lop(y, W, v)
+f = theano.function([v,x], VJ)
+print(f([2, 2], [0, 1]))
--- a/theano_tutorial/tutorial_loop.py
+++ b/theano_tutorial/tutorial_loop.py
@ -0,0 +1,100 @@
+import theano
+import theano.tensor as T
+
+k = T.iscalar("k")
+A = T.vector("A")
+
+# Symbolic description of the result
+result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A,
+                              outputs_info=T.ones_like(A),
+                              non_sequences=A,
+                              n_steps=k)
+
+# We only care about A**k, but scan has provided us with A**1 through A**k.
+# Discard the values that we don't care about. Scan is smart enough to
+# notice this and not waste memory saving them.
+final_result = result[-1]
+
+# compiled function that returns A**k
+power = theano.function(inputs=[A,k], outputs=final_result, updates=updates)
+
+print(power(range(10),2))
+print(power(range(10),4))
+
+print('P2:')
+import numpy
+
+coefficients = theano.tensor.vector("coefficients")
+x = T.scalar("x")
+
+max_coefficients_supported = 10000
+
+# Generate the components of the polynomial
+components, updates = theano.scan(fn=lambda coefficient, power, prior_result, free_variable: prior_result + (coefficient * (free_variable ** power)),
+                                  outputs_info=T.zeros(1),
+                                  sequences=[coefficients, theano.tensor.arange(max_coefficients_supported)],
+                                  non_sequences=x)
+# Sum them up
+polynomial = components.sum()
+
+pol = components[-1]
+
+# Compile a function
+calculate_polynomial = theano.function(inputs=[coefficients, x], outputs=components)
+
+# Test
+test_coefficients = numpy.asarray([1, 0, 2], dtype=numpy.float32)
+test_value = 3
+print(calculate_polynomial(test_coefficients, test_value))
+print(1.0 * (3 ** 0) + 0.0 * (3 ** 1) + 2.0 * (3 ** 2))
+
+print('P3:')
+import numpy as np
+import theano
+import theano.tensor as T
+
+up_to = T.iscalar("up_to")
+
+# define a named function, rather than using lambda
+def accumulate_by_adding(arange_val, prior_result):
+    return prior_result + arange_val
+seq = T.arange(up_to)
+
+# An unauthorized implicit downcast from the dtype of 'seq', to that of
+# 'T.as_tensor_variable(0)' which is of dtype 'int8' by default would occur
+# if this instruction were to be used instead of the next one:
+# outputs_info = T.as_tensor_variable(0)
+
+outputs_info = T.as_tensor_variable(np.asarray(0, seq.dtype))
+scan_result, scan_updates = theano.scan(fn=accumulate_by_adding,
+                                        outputs_info=outputs_info,
+                                        sequences=seq)
+triangular_sequence = theano.function(inputs=[up_to], outputs=scan_result)
+
+# test
+some_num = 15
+print(triangular_sequence(some_num))
+print([n * (n + 1) // 2 for n in range(some_num)])
+
+print('P4:')
+location = T.imatrix("location")
+values = T.vector("values")
+output_model = T.matrix("output_model")
+
+def set_value_at_position(a_location, a_value, output_model):
+    zeros = T.zeros_like(output_model)
+    zeros_subtensor = zeros[a_location[0], a_location[1]]
+    return T.set_subtensor(zeros_subtensor, a_value)
+
+result, updates = theano.scan(fn=set_value_at_position,
+                              outputs_info=None,
+                              sequences=[location, values],
+                              non_sequences=output_model)
+
+assign_values_at_positions = theano.function(inputs=[location, values, output_model], outputs=result)
+
+# test
+test_locations = numpy.asarray([[1, 1], [2, 3]], dtype=numpy.int32)
+test_values = numpy.asarray([42, 50], dtype=numpy.float32)
+test_output_model = numpy.zeros((5, 5), dtype=numpy.float32)
+print(assign_values_at_positions(test_locations, test_values, test_output_model))