You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
28 lines
599 B
28 lines
599 B
from pathlib import Path
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
|
|
def main():
|
|
ssj_mate = Path("../../data/mate_train/sl.all.mate")
|
|
df = pd.read_csv(ssj_mate, sep='~', header=None)
|
|
df = df.iloc[:,0].str.split('\t', n=14, expand=True)
|
|
print(df.head())
|
|
|
|
msd_set = set()
|
|
for i, r in df.iterrows():
|
|
msd_set.update(r[6].split("|"))
|
|
|
|
labels = ["biti"] + sorted(list(msd_set)) + ["fillpred"]
|
|
print("labels: \n", labels)
|
|
|
|
ndf = pd.DataFrame(columns=labels, dtype=bool)
|
|
for i, r in df.iterrows():
|
|
row = []
|
|
row.append(r[2] == "biti")
|
|
row.extend() ## TODO
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main() |