More tutorial

2017-04-13 11:10:23 +02:00 · 2017-04-13 11:10:23 +02:00 · 658c55d280
commit 658c55d280
parent 9455f26423
5 changed files with 295 additions and 0 deletions
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/theano_tutorial/logistic_regression.py
+++ b/theano_tutorial/logistic_regression.py
@ -0,0 +1,61 @@
 import numpy
 import theano
 import theano.tensor as T
 rng = numpy.random
 N = 400                                   # training sample size
 feats = 784                               # number of input variables
 # generate a dataset: D = (input_values, target_class)
 D = (rng.randn(N, feats), rng.randint(size=N, low=0, high=2))
 training_steps = 10000
 # Declare Theano symbolic variables
 x = T.dmatrix("x")
 y = T.dvector("y")
 # initialize the weight vector w randomly
 #
 # this and the following bias variable b
 # are shared so they keep their values
 # between training iterations (updates)
 w = theano.shared(rng.randn(feats), name="w")
 # initialize the bias term
 b = theano.shared(0., name="b")
 print("Initial model:")
 print(w.get_value())
 print(b.get_value())
 # Construct Theano expression graph
 p_1 = 1 / (1 + T.exp(-T.dot(x, w) - b))   # Probability that target = 1
 prediction = p_1 > 0.5                    # The prediction thresholded
 xent = -y * T.log(p_1) - (1-y) * T.log(1-p_1) # Cross-entropy loss function
 cost = xent.mean() + 0.01 * (w ** 2).sum()# The cost to minimize
 gw, gb = T.grad(cost, [w, b])             # Compute the gradient of the cost
                                          # w.r.t weight vector w and
                                          # bias term b
                                          # (we shall return to this in a
                                          # following section of this tutorial)
 # Compile
 train = theano.function(
          inputs=[x,y],
          outputs=[prediction, xent],
          updates=((w, w - 0.1 * gw), (b, b - 0.1 * gb)))
 predict = theano.function(inputs=[x], outputs=prediction)
 # Train
 for i in range(training_steps):
    pred, err = train(D[0], D[1])
 print("Final model:")
 print(w.get_value())
 print(b.get_value())
 print("target values for D:")
 print(D[1])
 print("prediction on D:")
 print(predict(D[0]))
--- a/theano_tutorial/tutorial_conditions.py
+++ b/theano_tutorial/tutorial_conditions.py
@ -0,0 +1,34 @@
 # if: (if(smth) else)
 # switch: (if(smth) elif(smth))
 from theano import tensor as T
 from theano.ifelse import ifelse
 import theano, time, numpy
 a,b = T.scalars('a', 'b')
 x,y = T.matrices('x', 'y')
 z_switch = T.switch(T.lt(a, b), T.mean(x), T.mean(y))
 z_lazy = ifelse(T.lt(a, b), T.mean(x), T.mean(y))
 f_switch = theano.function([a, b, x, y], z_switch,
                           mode=theano.Mode(linker='vm'))
 f_lazyifelse = theano.function([a, b, x, y], z_lazy,
                               mode=theano.Mode(linker='vm'))
 val1 = 0.
 val2 = 1.
 big_mat1 = numpy.ones((10000, 1000))
 big_mat2 = numpy.ones((10000, 1000))
 n_times = 10
 tic = time.clock()
 for i in range(n_times):
    f_switch(val1, val2, big_mat1, big_mat2)
 print('time spent evaluating both values %f sec' % (time.clock() - tic))
 tic = time.clock()
 for i in range(n_times):
    f_lazyifelse(val1, val2, big_mat1, big_mat2)
 print('time spent evaluating one value %f sec' % (time.clock() - tic))
--- a/theano_tutorial/tutorial_derivates.py
+++ b/theano_tutorial/tutorial_derivates.py
@ -0,0 +1,94 @@
 import numpy as np
 import theano
 import theano.tensor as T
 # normal gradient
 x = T.dscalar('x')
 z = T.dscalar('z')
 y = x ** 3 + z ** 2
 gy = T.grad(y, [x, z])
 f = theano.function([x, z], gy)
 # print(theano.pp(f.maker.fgraph.outputs[0]))
 # print(theano.pp(f.maker.fgraph.outputs[1]))
 print(f(4, 8))
 # logistic gradient
 x = T.dmatrix('x')
 l = T.sum(1 / (1 + T.exp(-x)))
 gl = T.grad(l, x)
 f_lg = theano.function([x], gl)
 print(f_lg([[0, 1], [-1, -2]]))
 # np.matrix([[1, 2], [3, 4]])
 # jacobian matrix
 print('jacobian matrix1')
 x = T.dvector('x')
 y = x ** 2
 J, updates = theano.scan(lambda i, y, x : T.grad(y[i], x), sequences=T.arange(y.shape[0]), non_sequences=[y, x])
 f = theano.function([x], J, updates=updates)
 print(f([1, 2, 3, 4, 5]))
 # already implemented jacobian matrix
 # W, V = T.dmatrices('W', 'V')
 J = theano.gradient.jacobian(y, x)
 f2 = theano.function([x], J)
 print(f2([1, 2, 3, 4, 5]))
 # jacobian matrix with matrix :)
 W, V = T.dmatrices('W', 'V')
 x = T.dvector('x')
 y = T.dot(x, W)
 J = theano.gradient.jacobian(y, W)
 f2 = theano.function([W, x], J)
 print(f2(np.array([[1, 1], [1, 1]]), np.array([0, 1])))
 JV2 = T.dot(J, V)
 f2 = theano.function([W, V, x], JV2)
 print(f2(np.array([[1, 1], [1, 1]]),  np.array([[2, 2], [2, 2]]), np.array([0, 1])))
 print('jacobian matrix2')
 x = T.dvector('x')
 z = T.dvector('z')
 y = x ** 2 + z ** 2
 J, updates = theano.scan(lambda i, y, x, z: T.grad(y[i], [x, z]), sequences=T.arange(y.shape[0]), non_sequences=[y,x,z])
 f = theano.function([x, z], J, updates=updates)
 test = T.arange(y.shape[0])
 t_f = theano.function([x, z], test)
 print(f([4, 4], [1, 1]))
 print(t_f([4, 4], [1, 1]))
 # hessian matrix
 x = T.dvector('x')
 y = x ** 3
 cost = y.sum()
 gy = T.grad(cost, x)
 H, updates = theano.scan(lambda i, gy, x : T.grad(gy[i], x), sequences=T.arange(gy.shape[0]), non_sequences=[gy, x])
 f = theano.function([x], H, updates=updates)
 print(f([4, 4]))
 # jacobian times vector
 # R-operator
 W = T.dmatrix('W')
 V = T.dmatrix('V')
 x = T.dvector('x')
 y = T.dot(x, W)
 JV = T.Rop(y, W, V)
 f = theano.function([W, V, x], JV)
 print(f([[1, 1], [1, 1]], [[2, 2], [2, 2]], [0,1]))
 # L-operator
 W = T.dmatrix('W')
 v = T.dvector('v')
 x = T.dvector('x')
 y = T.dot(x, W)
 VJ = T.Lop(y, W, v)
 f = theano.function([v,x], VJ)
 print(f([2, 2], [0, 1]))
--- a/theano_tutorial/tutorial_loop.py
+++ b/theano_tutorial/tutorial_loop.py
@ -0,0 +1,100 @@
 import theano
 import theano.tensor as T
 k = T.iscalar("k")
 A = T.vector("A")
 # Symbolic description of the result
 result, updates = theano.scan(fn=lambda prior_result, A: prior_result * A,
                              outputs_info=T.ones_like(A),
                              non_sequences=A,
                              n_steps=k)
 # We only care about A**k, but scan has provided us with A**1 through A**k.
 # Discard the values that we don't care about. Scan is smart enough to
 # notice this and not waste memory saving them.
 final_result = result[-1]
 # compiled function that returns A**k
 power = theano.function(inputs=[A,k], outputs=final_result, updates=updates)
 print(power(range(10),2))
 print(power(range(10),4))
 print('P2:')
 import numpy
 coefficients = theano.tensor.vector("coefficients")
 x = T.scalar("x")
 max_coefficients_supported = 10000
 # Generate the components of the polynomial
 components, updates = theano.scan(fn=lambda coefficient, power, prior_result, free_variable: prior_result + (coefficient * (free_variable ** power)),
                                  outputs_info=T.zeros(1),
                                  sequences=[coefficients, theano.tensor.arange(max_coefficients_supported)],
                                  non_sequences=x)
 # Sum them up
 polynomial = components.sum()
 pol = components[-1]
 # Compile a function
 calculate_polynomial = theano.function(inputs=[coefficients, x], outputs=components)
 # Test
 test_coefficients = numpy.asarray([1, 0, 2], dtype=numpy.float32)
 test_value = 3
 print(calculate_polynomial(test_coefficients, test_value))
 print(1.0 * (3 ** 0) + 0.0 * (3 ** 1) + 2.0 * (3 ** 2))
 print('P3:')
 import numpy as np
 import theano
 import theano.tensor as T
 up_to = T.iscalar("up_to")
 # define a named function, rather than using lambda
 def accumulate_by_adding(arange_val, prior_result):
    return prior_result + arange_val
 seq = T.arange(up_to)
 # An unauthorized implicit downcast from the dtype of 'seq', to that of
 # 'T.as_tensor_variable(0)' which is of dtype 'int8' by default would occur
 # if this instruction were to be used instead of the next one:
 # outputs_info = T.as_tensor_variable(0)
 outputs_info = T.as_tensor_variable(np.asarray(0, seq.dtype))
 scan_result, scan_updates = theano.scan(fn=accumulate_by_adding,
                                        outputs_info=outputs_info,
                                        sequences=seq)
 triangular_sequence = theano.function(inputs=[up_to], outputs=scan_result)
 # test
 some_num = 15
 print(triangular_sequence(some_num))
 print([n * (n + 1) // 2 for n in range(some_num)])
 print('P4:')
 location = T.imatrix("location")
 values = T.vector("values")
 output_model = T.matrix("output_model")
 def set_value_at_position(a_location, a_value, output_model):
    zeros = T.zeros_like(output_model)
    zeros_subtensor = zeros[a_location[0], a_location[1]]
    return T.set_subtensor(zeros_subtensor, a_value)
 result, updates = theano.scan(fn=set_value_at_position,
                              outputs_info=None,
                              sequences=[location, values],
                              non_sequences=output_model)
 assign_values_at_positions = theano.function(inputs=[location, values, output_model], outputs=result)
 # test
 test_locations = numpy.asarray([[1, 1], [2, 3]], dtype=numpy.int32)
 test_values = numpy.asarray([42, 50], dtype=numpy.float32)
 test_output_model = numpy.zeros((5, 5), dtype=numpy.float32)
 print(assign_values_at_positions(test_locations, test_values, test_output_model))