import urllib.request from collections import Counter from estnltk import Text from estnltk.syntax.parsers import VISLCG3Parser import re aadress1 = "http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/A2_2018_I.txt" A2 = urllib.request.urlopen(aadress1).read().decode("utf8").lower() aadress2 = "http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/B1_2018_I.txt" B1 = urllib.request.urlopen(aadress2).read().decode("utf8").lower() parser = VISLCG3Parser() andmed1 = parser.parse_text(Text(A2), return_type = 'vislcg3') lauseliikmed1 = [re.findall("@[A-Z\<\>]+", rida)[0] for rida in andmed1 if "@" in rida] kogused1 = Counter(lauseliikmed1) f1 = open("A2_lauseliikmed.txt", "w") for m in kogused1.most_common(30): print(m[0]+":", m[1], sep = "", file = f1) f1.close() andmed2 = parser.parse_text(Text(B1), return_type = 'vislcg3') lauseliikmed2 = [re.findall("@[A-Z\<\>]+", rida)[0] for rida in andmed2 if "@" in rida] kogused2 = Counter(lauseliikmed2) f2 = open("B1_lauseliikmed.txt", "w") for m in kogused2.most_common(30): print(m[0]+":", m[1], sep = "", file = f2) f2.close()