import urllib.request from estnltk import Text import pandas as pd aadress1 = "http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/A2_2018_I.txt" A2 = urllib.request.urlopen(aadress1).read().decode("utf8").lower() sonadearv1 = len(A2.split()) aadress2 = "http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/B1_2018_I.txt" B1 = urllib.request.urlopen(aadress2).read().decode("utf8").lower() sonadearv2 = len(B1.split()) t1 = Text(A2) t2 = Text(B1) df1_1 = t1.get.postag_descriptions.as_dataframe.groupby("postag_descriptions").postag_descriptions.count().to_frame() df1_1["Sõnaliik"] = df1_1.index df1_1 = df1_1.rename(columns = {"postag_descriptions":"A2_sagedus"}) df1_2 = t2.get.postag_descriptions.as_dataframe.groupby("postag_descriptions").postag_descriptions.count().to_frame() df1_2["Sõnaliik"] = df1_2.index df1_2 = df1_2.rename(columns = {"postag_descriptions":"B1_sagedus"}) df1 = df1_1.merge(df1_2, left_on = "Sõnaliik", right_on = "Sõnaliik").fillna(0)[["Sõnaliik", "A2_sagedus", "B1_sagedus"]] df1 = df1.iloc[1:] df1 = df1.sort_values(by="Sõnaliik") df1["A2_osakaal"] = df1.A2_sagedus / df1.A2_sagedus.sum() * 100 df1["B1_osakaal"] = df1.B1_sagedus / df1.B1_sagedus.sum() * 100 df1["Osakaaluvahe"] = df1.A2_osakaal - df1.B1_osakaal df1["Osakaalusuhe"] = df1.A2_osakaal / df1.B1_osakaal df1 = df1.round(3) df1 = df1.rename(columns = {"A2_sagedus":"Sagedus A2-tasemel", "B1_sagedus":"Sagedus B1-tasemel", "A2_osakaal":"Osakaal A2-tasemel (%)", "B1_osakaal":"Osakaal B1-tasemel (%)"}) df2_1 = pd.read_csv("http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/A2_lauseliikmed.txt", sep = ":") df2_2 = pd.read_csv("http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/B1_lauseliikmed.txt", sep = ":") df2_3 = df2_1.merge(df2_2, left_on = "Lauseliige", right_on = "Lauseliige").fillna(0)[["Lauseliige", "A2_sagedus", "B1_sagedus"]] df2_4 = pd.read_csv("http://www.tlu.ee/~kais/Digihum_tehnoloogiad/Nadal6/Syntaksimargendid.txt", sep = ":") df2 = df2_3.merge(df2_4, how = "left", left_on = "Lauseliige", right_on = "Märgend").fillna(0)[["Selgitus", "A2_sagedus", "B1_sagedus"]] #df2 = df2.sort_values(by = "Selgitus") df2["A2_osakaal"] = df2.A2_sagedus / df2.A2_sagedus.sum() * 100 df2["B1_osakaal"] = df2.B1_sagedus / df2.B1_sagedus.sum() * 100 df2["Osakaaluvahe"] = df2.A2_osakaal - df2.B1_osakaal df2["Osakaalusuhe"] = df2.A2_osakaal / df2.B1_osakaal df2 = df2.round(3) df2 = df2.rename(columns = {"Selgitus":"Lauseliige", "A2_sagedus":"Sagedus A2-tasemel", "B1_sagedus":"Sagedus B1-tasemel", "A2_osakaal":"Osakaal A2-tasemel (%)", "B1_osakaal":"Osakaal B1-tasemel (%)"}) df2.index = df2.index + 1 f = open("oppijavordlus.html", "w") f.write("\n") f.write("\n") f.write(" \n") f.write("
\n") f.write("