luscenje_struktur/issue1000/step1.py
2020-03-02 19:13:19 +01:00

44 lines
937 B
Python

import sqlite3
import sys
STRUCTURE_ID = '1'
con = sqlite3.connect(sys.argv[1])
cur = con.cursor()
data_out = {}
cur.execute("""SELECT Matches.match_id, Matches.word_id, Colocations.colocation_id
FROM Matches, Colocations, ColocationMatches
WHERE Matches.match_id = ColocationMatches.mid_match_id
AND Colocations.colocation_id = ColocationMatches.mid_colocation_id
AND Colocations.structure_id = ?""", (STRUCTURE_ID, ))
prev_mid = None
idx = 0
while True:
row = cur.fetchone()
if row is None:
break
mid, wid, cid = row
if mid == prev_mid:
continue
elif cid not in data_out:
data_out[cid] = []
wid_int = int(wid[2:9])
data_out[cid].append(wid_int)
prev_mid = mid
idx += 1
if(idx % 10000 == 0):
print("\r{}".format(idx), end="", flush=True, file=sys.stderr)
print("", file=sys.stderr)
for mid, wids in data_out.items():
print(mid, *wids)
con.close()