From caa463c8d2cfb4efd88efbb3dec9f2665032e749 Mon Sep 17 00:00:00 2001 From: Luka Date: Thu, 12 Apr 2018 10:52:12 +0200 Subject: [PATCH] Added some predictions and some learners --- .gitignore | 1 + .idea/workspace.xml | 365 +++++++++++++++++++++----------------------- prepare_data.py | 24 ++- 3 files changed, 198 insertions(+), 192 deletions(-) diff --git a/.gitignore b/.gitignore index f2b01ae..16a6d2e 100644 --- a/.gitignore +++ b/.gitignore @@ -20,6 +20,7 @@ lib64/ parts/ sdist/ var/ +.idea/ *.egg-info/ .installed.cfg *.egg diff --git a/.idea/workspace.xml b/.idea/workspace.xml index d7293b6..156233c 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,14 +2,14 @@ - - - + - - - + + + + + @@ -325,15 +223,15 @@ @@ -359,7 +257,6 @@ - @@ -372,8 +269,103 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -883,7 +875,7 @@ - + @@ -893,26 +885,18 @@ - - + + - - - - - - - - - - + + @@ -921,7 +905,6 @@ - @@ -929,7 +912,6 @@ - @@ -945,7 +927,7 @@ - + @@ -953,98 +935,103 @@ - + - + - - - - - - - - - - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - - + + - + - + - + - - + + - + - - + + + + + + + + + + - + + + + + + + diff --git a/prepare_data.py b/prepare_data.py index f6d23ac..fae903d 100644 --- a/prepare_data.py +++ b/prepare_data.py @@ -51,7 +51,8 @@ class Data: def generate_data(self, train_inputs_name, test_inputs_name, validate_inputs_name, test_and_validation_size=0.1, force_override=False, content_name='SlovarIJS_BESEDE_utf8.lex', content_shuffle_vector='content_shuffle_vector', shuffle_vector='shuffle_vector', - inputs_location='../../internal_representations/inputs/', content_location='../../../data/', test_set=False): + inputs_location='../../internal_representations/inputs/', content_location='../../../data/', + test_set=False, complete_set=False): content_path = '{}{}'.format(content_location, content_name) train_path = '{}{}.h5'.format(inputs_location, train_inputs_name) test_path = '{}{}.h5'.format(inputs_location, test_inputs_name) @@ -78,6 +79,16 @@ class Data: self.x_other_features_test = self.x_other_features_validate self.y_test = self.y_validate + if complete_set: + self.x_train = np.concatenate((self.x_train, self.x_test, self.x_validate), axis=0) + self.x_other_features_train = np.concatenate((self.x_other_features_train, self.x_other_features_test, self.x_other_features_validate), + axis=0) + self.y_train = np.concatenate((self.y_train, self.y_test, self.y_validate), axis=0) + + self.x_test = self.x_validate + self.x_other_features_test = self.x_other_features_validate + self.y_test = self.y_validate + def _generate_inputs(self, content_location, content_shuffle_vector_location, shuffle_vector_location, test_and_validation_size, train_path, test_path, validate_path): print('READING CONTENT...') @@ -956,12 +967,19 @@ class Data: i += 1 return res - def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None): + def test_accuracy(self, predictions, x, x_other_features, y, dictionary, feature_dictionary, vowels, syllable_dictionary=None, + threshold=0.4999955): errors = [] num_of_pred = len(predictions) num_of_correct_pred = 0 for i in range(predictions.shape[0]): - if (np.around(predictions[i]) == y[i]).all(): + correct_prediction = True + for j in range(len(y[i])): + if (predictions[i][j] < threshold and y[i][j] == 1.0) or (predictions[i][j] >= threshold and y[i][j] == 0.0): + correct_prediction = False + break + # if (np.around(predictions[i]) == y[i]).all(): + if correct_prediction: num_of_correct_pred += 1 else: if self._input_type == 'l':