From ad20f1149b4994671b7090c70709869ad86d4716 Mon Sep 17 00:00:00 2001 From: voje Date: Fri, 22 Feb 2019 08:13:44 +0100 Subject: [PATCH] fillpred_model in progress --- tools/fillpred_model/fpmodel.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 tools/fillpred_model/fpmodel.py diff --git a/tools/fillpred_model/fpmodel.py b/tools/fillpred_model/fpmodel.py new file mode 100644 index 0000000..bac8bb5 --- /dev/null +++ b/tools/fillpred_model/fpmodel.py @@ -0,0 +1,28 @@ +from pathlib import Path +import numpy as np +import pandas as pd + + +def main(): + ssj_mate = Path("../../data/mate_train/sl.all.mate") + df = pd.read_csv(ssj_mate, sep='~', header=None) + df = df.iloc[:,0].str.split('\t', n=14, expand=True) + print(df.head()) + + msd_set = set() + for i, r in df.iterrows(): + msd_set.update(r[6].split("|")) + + labels = ["biti"] + sorted(list(msd_set)) + ["fillpred"] + print("labels: \n", labels) + + ndf = pd.DataFrame(columns=labels, dtype=bool) + for i, r in df.iterrows(): + row = [] + row.append(r[2] == "biti") + row.extend() ## TODO + + + +if __name__ == "__main__": + main() \ No newline at end of file