from pathlib import Path import numpy as np import pandas as pd def main(): ssj_mate = Path("../../data/mate_train/sl.all.mate") df = pd.read_csv(ssj_mate, sep='~', header=None) df = df.iloc[:,0].str.split('\t', n=14, expand=True) print(df.head()) msd_set = set() for i, r in df.iterrows(): msd_set.update(r[6].split("|")) labels = ["biti"] + sorted(list(msd_set)) + ["fillpred"] print("labels: \n", labels) ndf = pd.DataFrame(columns=labels, dtype=bool) for i, r in df.iterrows(): row = [] row.append(r[2] == "biti") row.extend() ## TODO if __name__ == "__main__": main()