# Deprecated! import requests from bs4 import BeautifulSoup from time import time from valency import k_utils SSKJ_BASE = "http://bos.zrc-sazu.si/cgi/a03.exe?name=sskj_testa&expression=" class SskjScraper: def __init__(self): self.base_url = SSKJ_BASE def scrape(self, word): # returns unique set of words soup = BeautifulSoup( requests.get(self.base_url + word).content, "html.parser" ) # Check for failure. h2 = soup.find_all("h2") if len(h2) >= 2: #

Zadetkov ni bilo: ...

return [] li_elements = soup.find_all('li', class_="nounderline") if len(li_elements) == 0: return [] li = li_elements[0] # It was horrible... #
  • ...
  • ...
  • ...
  • # Parse sequence until you find a sedond
  • txts = [] for el in li.find_all(): if el.name == "li": break txts.append(el.get_text()) print("sskj scraped {}.".format(word)) return k_utils.tokenize(txts) if __name__ == "__main__": sskjScr = SskjScraper() word = "tek" tp = sskjScr.scrape("čaj") print(tp)