import nltk from estnltk import Text import urllib.request aadress1 = "http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/A2_2018_I.txt" tekst1 = urllib.request.urlopen(aadress1).read().decode("utf8").lower() aadress2 = "http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/B1_2018_I.txt" tekst2 = urllib.request.urlopen(aadress2).read().decode("utf8").lower() paarid1 = nltk.collocations.BigramCollocationFinder.from_words(Text(tekst1).postag_descriptions).ngram_fd.items() paarid2 = nltk.collocations.BigramCollocationFinder.from_words(Text(tekst2).postag_descriptions).ngram_fd.items() f = open("A2_sonaliigipaarid_seletused.txt", "w") print("Sõnaliigipaar,A2_sagedus", file = f) for paar in reversed(sorted(paarid1, key=lambda p: p[1])): if paar[1] >= 10: print("-".join(paar[0]), ",", paar[1], sep = "", file = f) f.close() f = open("B1_sonaliigipaarid_seletused.txt", "w") print("Sõnaliigipaar,B1_sagedus", file = f) for paar in reversed(sorted(paarid2, key=lambda p: p[1])): if paar[1] >= 34: print("-".join(paar[0]), ",", paar[1], sep = "", file = f) f.close()