From c83f9dd296827826c98ebea9c67d89dab8bda4a5 Mon Sep 17 00:00:00 2001 From: Joachim Bache-Mathiesen Date: Tue, 18 May 2021 13:56:12 +0200 Subject: [PATCH 1/6] Initial version of documentation generation --- __init__.py | 0 generate_documentation.py | 19 + unlabeled_preprocessing/prep_hdro_v2.ipynb | 2358 +++++++++++--------- 3 files changed, 1305 insertions(+), 1072 deletions(-) create mode 100644 __init__.py create mode 100644 generate_documentation.py diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/generate_documentation.py b/generate_documentation.py new file mode 100644 index 0000000..e7e5fdf --- /dev/null +++ b/generate_documentation.py @@ -0,0 +1,19 @@ +import pdoc +import os +context = pdoc.Context() + +modules = pdoc.Module(".", context=context, skip_errors=True) + +pdoc.link_inheritance(context) + +def recursive_htmls(mod): + yield mod.name, mod.html() + for submod in mod.submodules(): + yield from recursive_htmls(submod) + +for module_name, html in recursive_htmls(modules): + fname = f"documentation/{'/'.join(module_name.split('.'))}/index.html" + os.makedirs(os.path.dirname(fname), exist_ok=True) + with open(fname,"w", encoding="utf-8") as f: + f.writelines(html) + diff --git a/unlabeled_preprocessing/prep_hdro_v2.ipynb b/unlabeled_preprocessing/prep_hdro_v2.ipynb index 820f22f..6cf9b35 100644 --- a/unlabeled_preprocessing/prep_hdro_v2.ipynb +++ b/unlabeled_preprocessing/prep_hdro_v2.ipynb @@ -7,16 +7,9 @@ "outputs": [ { "output_type": "stream", - "name": "stdout", + "name": "stderr", "text": [ - "Something went wrong loading the Economic Fitness Dataset [Errno 2] File b'../data/unlabeled/raw/Economic_Fitness_CSV\\\\Country.csv' does not exist: b'../data/unlabeled/raw/Economic_Fitness_CSV\\\\Country.csv'\n", - "Something went wrong loading the Education Dataset [Errno 2] File b'../data/unlabeled/raw/Edstats_csv/EdStatsCountry.csv' does not exist: b'../data/unlabeled/raw/Edstats_csv/EdStatsCountry.csv'\n", - "../data/unlabeled/raw/__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n", - " aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)\n", - "../data/unlabeled/raw/__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n", - " aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)\n", - "../data/unlabeled/raw/__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n", - " aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)\n" + "..\\data\\unlabeled\\raw\\__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)\n..\\data\\unlabeled\\raw\\__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)\n..\\data\\unlabeled\\raw\\__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)\n" ] } ], @@ -38,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 5, "metadata": { "tags": [] }, @@ -58,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 6, "metadata": { "tags": [] }, @@ -316,10 +309,10 @@ "\n", "[195 rows x 98 columns]" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gross enrolment ratio, pre-primary (% of preschool-age children)Percentage of primary schools with access to the internetPercentage of secondary schools with access to the internetGross enrolment ratio, tertiary (% of tertiary school-age population)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, female (%)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, male (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are female (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are male (%)Primary school teachers trained to teach (%)Pupil-teacher ratio, primary school (pupils per teacher)
AFG26.08013.22036.9201.9486.00627.24468.95779.72638.04225.8...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AGO30.23223.13338.0564.0236.35930.000150.52665.99531.82566.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ALB93.17493.70092.4979.70210.61429.50819.64252.8522.88161.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AND72.32771.48473.32710.43910.56446.429NaNNaN0.07788.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ARG57.15859.16154.82811.12310.72939.87762.78221.80544.78192.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
WSM74.94279.12771.583NaNNaN10.00023.88629.9830.19718.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEM28.02019.92036.9182.8805.1460.97160.35245.62729.16237.3...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZAF75.47874.97778.20710.03110.29145.33367.90810.29858.55866.9...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZMB44.44038.48854.0686.2838.17617.964120.11278.13417.86144.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZWE64.93559.79270.7838.0668.92334.57186.13564.73914.64532.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n

195 rows × 98 columns

\n
" + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gross enrolment ratio, pre-primary (% of preschool-age children)Percentage of primary schools with access to the internetPercentage of secondary schools with access to the internetGross enrolment ratio, tertiary (% of tertiary school-age population)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, female (%)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, male (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are female (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are male (%)Primary school teachers trained to teach (%)Pupil-teacher ratio, primary school (pupils per teacher)
AFG26.08013.22036.9201.9486.00627.24468.95779.72638.04225.8...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AGO30.23223.13338.0564.0236.35930.000150.52665.99531.82566.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ALB93.17493.70092.4979.70210.61429.50819.64252.8522.88161.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AND72.32771.48473.32710.43910.56446.429NaNNaN0.07788.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ARG57.15859.16154.82811.12310.72939.87762.78221.80544.78192.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
WSM74.94279.12771.583NaNNaN10.00023.88629.9830.19718.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEM28.02019.92036.9182.8805.1460.97160.35245.62729.16237.3...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZAF75.47874.97778.20710.03110.29145.33367.90810.29858.55866.9...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZMB44.44038.48854.0686.2838.17617.964120.11278.13417.86144.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZWE64.93559.79270.7838.0668.92334.57186.13564.73914.64532.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n

195 rows × 98 columns

\n
" }, "metadata": {}, - "execution_count": 3 + "execution_count": 6 } ], "source": [ @@ -338,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -356,7 +349,7 @@ ] }, "metadata": {}, - "execution_count": 4 + "execution_count": 7 } ], "source": [ @@ -372,7 +365,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -388,14 +381,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Max, min and mean number of missing values for the columns\nMax: 49.743589743589745 %\nMin: 0.0 %\nMean: 10.616150019135096 %\n" + "Max, min and mean number of missing values for the columns\nMax: 49.743589743589745 %\nMin: 0.0 %\nMean: 10.616150019135095 %\n" ] } ], @@ -405,7 +398,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -574,10 +567,10 @@ "\n", "[8 rows x 67 columns]" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
count175.000000167.000000167.000000174.000000174.000000193.000000185.000000180.000000195.000000195.000000...167.000000178.000000178.000000167.000000167.000000156.000000152.000000156.000000152.000000152.000000
mean61.06807461.73601265.8235878.4800179.13340222.98129548.30934638.25801139.39142659.257436...0.93899414440.99851124458.3313540.7026830.7421860.54506419.38914523.40138519.0025990.595250
std29.61079829.28405026.3953583.4213322.80298411.82878440.52883427.774415146.48585523.231038...0.07455915359.93598623943.4826280.1658670.1431940.1727899.9487189.7440049.7773240.190002
min0.0000001.7380009.0000001.0700002.2560000.1000000.2830000.1440000.01100013.200000...0.488000186.041000640.1050000.2700000.4320000.1760004.4440008.5000004.4240000.232000
25%37.29650036.92500045.3565005.8500006.73375014.76500013.17700012.5382502.08100041.200000...0.9085002925.6142506275.9337500.5770000.6215000.40475010.79225016.60300010.5940000.431500
50%64.82800068.06700070.6820009.1110009.25250021.09400040.53600032.5335008.77200060.000000...0.9650008399.44250016951.3570000.7300000.7600000.52750017.93450021.77950017.5260000.604000
75%89.14500087.99050090.91650011.23050011.55100030.00000070.50400063.34550028.56250078.000000...0.98600022583.77950035488.4760000.8315000.8485000.69175027.61525028.62500027.0125000.767250
max100.000000100.000000100.00000013.88200014.43100055.660000186.53800094.5810001433.784000100.000000...1.03600071387.276000107833.0290000.9490000.9650000.85800045.30700056.99600044.1670000.899000
\n

8 rows × 67 columns

\n
" + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
count175.000000167.000000167.000000174.000000174.000000193.000000185.000000180.000000195.000000195.000000...167.000000178.000000178.000000167.000000167.000000156.000000152.000000156.000000152.000000152.000000
mean61.06807461.73601265.8235878.4800179.13340222.98129548.30934638.25801139.39142659.257436...0.93899414440.99851124458.3313540.7026830.7421860.54506419.38914523.40138519.0025990.595250
std29.61079829.28405026.3953583.4213322.80298411.82878440.52883427.774415146.48585523.231038...0.07455915359.93598623943.4826280.1658670.1431940.1727899.9487189.7440049.7773240.190002
min0.0000001.7380009.0000001.0700002.2560000.1000000.2830000.1440000.01100013.200000...0.488000186.041000640.1050000.2700000.4320000.1760004.4440008.5000004.4240000.232000
25%37.29650036.92500045.3565005.8500006.73375014.76500013.17700012.5382502.08100041.200000...0.9085002925.6142506275.9337500.5770000.6215000.40475010.79225016.60300010.5940000.431500
50%64.82800068.06700070.6820009.1110009.25250021.09400040.53600032.5335008.77200060.000000...0.9650008399.44250016951.3570000.7300000.7600000.52750017.93450021.77950017.5260000.604000
75%89.14500087.99050090.91650011.23050011.55100030.00000070.50400063.34550028.56250078.000000...0.98600022583.77950035488.4760000.8315000.8485000.69175027.61525028.62500027.0125000.767250
max100.000000100.000000100.00000013.88200014.43100055.660000186.53800094.5810001433.784000100.000000...1.03600071387.276000107833.0290000.9490000.9650000.85800045.30700056.99600044.1670000.899000
\n

8 rows × 67 columns

\n
" }, "metadata": {}, - "execution_count": 7 + "execution_count": 10 } ], "source": [ @@ -596,7 +589,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -666,22 +659,13 @@ ] }, "metadata": {}, - "execution_count": 8 + "execution_count": 12 } ], "source": [ - "# Column values are shown\n", - "\n", "df_inicator_values.columns.values" ] }, - { - "source": [ - "## Initial Correlation Matrix" - ], - "cell_type": "markdown", - "metadata": {} - }, { "cell_type": "code", "execution_count": 13, @@ -716,23 +700,16 @@ "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size),cmap='mako')" ] }, - { - "source": [ - "## Division of the columns" - ], - "cell_type": "markdown", - "metadata": {} - }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 14, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "['Population with at least some secondary education (% ages 25 and older)', 'Population with at least some secondary education, female (% ages 25 and older)', 'Population with at least some secondary education, male (% ages 25 and older)', 'Share of seats in parliament (% held by women)', 'Vulnerable employment (% of total employment)', 'Urban population (%)', 'Labour force participation rate (% ages 15 and older), female', 'Labour force participation rate (% ages 15 and older), male', 'Remittances, inflows (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Infants lacking immunization, measles (% of one-year-olds)', 'Infants lacking immunization, DTP (% of one-year-olds)', 'Gross fixed capital formation (% of GDP)', 'Inequality in education (%)', 'Inequality in life expectancy (%)', 'Unemployment, youth (% ages 15?24)', 'Private capital flows (% of GDP)', 'Exports and imports (% of GDP)', 'Unemployment, total (% of labour force)', 'Youth not in school or employment (% ages 15-24)', 'Labour force participation rate (% ages 15 and older)', 'Employment to population ratio (% ages 15 and older)', 'Employment in agriculture (% of total employment)', 'Employment in services (% of total employment)', 'Working poor at PPP$3.20 a day (% of total employment)', 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 'Gross capital formation (% of GDP)', 'Overall loss in HDI due to inequality (%)', 'Inequality in income (%)'] \n 29\n['Gross domestic product (GDP), total (2017 PPP $ billions)', 'GDP per capita (2017 PPP $)', 'Gross national income (GNI) per capita (constant 2017 PPP$)', 'Estimated gross national income per capita, female (2017 PPP $)', 'Estimated gross national income per capita, male (2017 PPP $)'] \n 5\n['Gender Inequality Index (GII)', 'Inequality-adjusted education index', 'Inequality-adjusted life expectancy index', 'Life expectancy index', 'Income index', 'Education index', 'Human Development Index (HDI)', 'Gender Development Index (GDI)', 'Human Development Index (HDI), female', 'Human Development Index (HDI), male', 'Inequality-adjusted income index'] \n 11\n['Mean years of schooling, female (years)', 'Mean years of schooling, male (years)', 'Life expectancy at birth (years)', 'Expected years of schooling (years)', 'Mean years of schooling (years)', 'Life expectancy at birth, female (years)', 'Life expectancy at birth, male (years)', 'Expected years of schooling, female (years)', 'Expected years of schooling, male (years)'] \n 9\n['Total population (millions)', 'Population ages 15?64 (millions)', 'Population ages 65 and older (millions)', 'Population under age 5 (millions)'] \n 4\n['Adolescent birth rate (births per 1,000 women ages 15-19)', 'Sex ratio at birth (male to female births)', 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 'HDI rank', 'Total unemployment rate (female to male ratio)', 'Youth unemployment rate (female to male ratio)', 'Coefficient of human inequality', 'Inequality-adjusted HDI (IHDI)'] \n 9\n" + "['Population with at least some secondary education (% ages 25 and older)', 'Population with at least some secondary education, female (% ages 25 and older)', 'Population with at least some secondary education, male (% ages 25 and older)', 'Share of seats in parliament (% held by women)', 'Vulnerable employment (% of total employment)', 'Urban population (%)', 'Labour force participation rate (% ages 15 and older), female', 'Labour force participation rate (% ages 15 and older), male', 'Remittances, inflows (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Infants lacking immunization, measles (% of one-year-olds)', 'Infants lacking immunization, DTP (% of one-year-olds)', 'Gross fixed capital formation (% of GDP)', 'Inequality in education (%)', 'Inequality in life expectancy (%)', 'Unemployment, youth (% ages 15?24)', 'Private capital flows (% of GDP)', 'Exports and imports (% of GDP)', 'Unemployment, total (% of labour force)', 'Youth not in school or employment (% ages 15-24)', 'Labour force participation rate (% ages 15 and older)', 'Employment to population ratio (% ages 15 and older)', 'Employment in agriculture (% of total employment)', 'Employment in services (% of total employment)', 'Working poor at PPP$3.20 a day (% of total employment)', 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 'Gross capital formation (% of GDP)', 'Overall loss in HDI due to inequality (%)', 'Inequality in income (%)'] \n 29\n['Gross domestic product (GDP), total (2017 PPP $ billions)', 'GDP per capita (2017 PPP $)', 'Gross national income (GNI) per capita (constant 2017 PPP$)', 'Estimated gross national income per capita, female (2017 PPP $)', 'Estimated gross national income per capita, male (2017 PPP $)'] \n 5\n['Gender Inequality Index (GII)', 'Inequality-adjusted education index', 'Inequality-adjusted life expectancy index', 'Life expectancy index', 'Income index', 'Education index', 'Human Development Index (HDI)', 'Gender Development Index (GDI)', 'Human Development Index (HDI), female', 'Human Development Index (HDI), male', 'Inequality-adjusted income index'] \n 11\n['Mean years of schooling, female (years)', 'Mean years of schooling, male (years)', 'Life expectancy at birth (years)', 'Expected years of schooling (years)', 'Mean years of schooling (years)', 'Life expectancy at birth, female (years)', 'Life expectancy at birth, male (years)', 'Expected years of schooling, female (years)', 'Expected years of schooling, male (years)'] \n 9\n['Adolescent birth rate (births per 1,000 women ages 15-19)', 'Total population (millions)', 'Sex ratio at birth (male to female births)', 'Population ages 15?64 (millions)', 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 'Population ages 65 and older (millions)', 'Population under age 5 (millions)', 'HDI rank', 'Total unemployment rate (female to male ratio)', 'Youth unemployment rate (female to male ratio)', 'Coefficient of human inequality', 'Inequality-adjusted HDI (IHDI)'] \n 13\n67\n" ] } ], @@ -741,7 +718,6 @@ "money_columns = []\n", "index_columns = []\n", "year_columns = []\n", - "millions_columns = []\n", "rest = []\n", "\n", "for column in df_inicator_values.columns.values:\n", @@ -753,8 +729,6 @@ " year_columns.append(column)\n", " elif 'index' in column.lower():\n", " index_columns.append(column)\n", - " elif 'millions' in column.lower():\n", - " millions_columns.append(column)\n", " else:\n", " rest.append(column)\n", "\n", @@ -762,8 +736,9 @@ "print(money_columns,'\\n',len(money_columns))\n", "print(index_columns,'\\n',len(index_columns))\n", "print(year_columns,'\\n',len(year_columns))\n", - "print(millions_columns,'\\n',len(millions_columns))\n", - "print(rest,'\\n',len(rest))" + "print(rest,'\\n',len(rest))\n", + "\n", + "print(len(percentage_columns)+len(money_columns)+len(index_columns)+len(year_columns)+len(rest))" ] }, { @@ -775,7 +750,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -783,20 +758,19 @@ "split_df2 = df_inicator_values[money_columns]\n", "split_df3 = df_inicator_values[index_columns]\n", "split_df4 = df_inicator_values[year_columns]\n", - "split_df5 = df_inicator_values[millions_columns]\n", - "split_df6 = df_inicator_values[rest]" + "split_df5 = df_inicator_values[rest]" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "- Dataframe 1 -\nMax, min and mean number of missing values for the columns\nMax: 49.743589743589745 %\nMin: 0.0 %\nMean: 14.624226348364274 %\n- Dataframe 2 -\nMax, min and mean number of missing values for the columns\nMax: 8.717948717948717 %\nMin: 2.051282051282051 %\nMean: 6.153846153846153 %\n- Dataframe 3 -\nMax, min and mean number of missing values for the columns\nMax: 20.0 %\nMin: 2.051282051282051 %\nMean: 9.790209790209792 %\n- Dataframe 4 -\nMax, min and mean number of missing values for the columns\nMax: 10.76923076923077 %\nMin: 1.0256410256410255 %\nMean: 5.584045584045585 %\n- Dataframe 5 -\nMax, min and mean number of missing values for the columns\nMax: 5.128205128205129 %\nMin: 0.0 %\nMean: 3.8461538461538467 %\n- Dataframe 6 -\nMax, min and mean number of missing values for the columns\nMax: 22.05128205128205 %\nMin: 3.076923076923077 %\nMean: 9.230769230769232 %\n" + "- Dataframe 1 -\nMax, min and mean number of missing values for the columns\nMax: 49.743589743589745 %\nMin: 0.0 %\nMean: 14.624226348364278 %\n- Dataframe 2 -\nMax, min and mean number of missing values for the columns\nMax: 8.717948717948717 %\nMin: 2.051282051282051 %\nMean: 6.153846153846153 %\n- Dataframe 3 -\nMax, min and mean number of missing values for the columns\nMax: 20.0 %\nMin: 2.051282051282051 %\nMean: 9.790209790209792 %\n- Dataframe 4 -\nMax, min and mean number of missing values for the columns\nMax: 10.76923076923077 %\nMin: 1.0256410256410255 %\nMean: 5.584045584045584 %\n- Dataframe 5 -\nMax, min and mean number of missing values for the columns\nMax: 22.05128205128205 %\nMin: 0.0 %\nMean: 7.57396449704142 %\n" ] } ], @@ -810,9 +784,7 @@ "print('- Dataframe 4 -')\n", "min4, max4 = print_missing_percentages(split_df4)\n", "print('- Dataframe 5 -')\n", - "min5, max5 = print_missing_percentages(split_df5)\n", - "print('- Dataframe 6 -')\n", - "min6, max6 = print_missing_percentages(split_df6)" + "min5, max5 = print_missing_percentages(split_df5)" ] }, { @@ -825,7 +797,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -833,125 +805,174 @@ "name": "stdout", "text": [ "[IterativeImputer] Completing matrix with shape (195, 29)\n", - "[IterativeImputer] Ending imputation round 1/49, elapsed time 0.22\n", - "[IterativeImputer] Ending imputation round 2/49, elapsed time 0.30\n", - "[IterativeImputer] Ending imputation round 3/49, elapsed time 0.42\n", - "[IterativeImputer] Ending imputation round 4/49, elapsed time 0.49\n", - "[IterativeImputer] Ending imputation round 5/49, elapsed time 0.56\n", - "[IterativeImputer] Ending imputation round 6/49, elapsed time 0.63\n", - "[IterativeImputer] Ending imputation round 7/49, elapsed time 0.73\n", - "[IterativeImputer] Ending imputation round 8/49, elapsed time 0.81\n", - "[IterativeImputer] Ending imputation round 9/49, elapsed time 0.92\n", - "[IterativeImputer] Ending imputation round 10/49, elapsed time 0.97\n", - "[IterativeImputer] Ending imputation round 11/49, elapsed time 1.03\n", - "[IterativeImputer] Ending imputation round 12/49, elapsed time 1.08\n", - "[IterativeImputer] Ending imputation round 13/49, elapsed time 1.13\n", - "[IterativeImputer] Ending imputation round 14/49, elapsed time 1.21\n", - "[IterativeImputer] Ending imputation round 15/49, elapsed time 1.30\n", - "[IterativeImputer] Ending imputation round 16/49, elapsed time 1.41\n", - "[IterativeImputer] Ending imputation round 17/49, elapsed time 1.50\n", - "[IterativeImputer] Ending imputation round 18/49, elapsed time 1.60\n", - "[IterativeImputer] Ending imputation round 19/49, elapsed time 1.69\n", - "[IterativeImputer] Ending imputation round 20/49, elapsed time 1.75\n", - "[IterativeImputer] Ending imputation round 21/49, elapsed time 1.81\n", - "[IterativeImputer] Ending imputation round 22/49, elapsed time 1.86\n", - "[IterativeImputer] Ending imputation round 23/49, elapsed time 1.92\n", - "[IterativeImputer] Ending imputation round 24/49, elapsed time 1.98\n", - "[IterativeImputer] Ending imputation round 25/49, elapsed time 2.06\n", - "[IterativeImputer] Ending imputation round 26/49, elapsed time 2.18\n", - "[IterativeImputer] Ending imputation round 27/49, elapsed time 2.26\n", - "[IterativeImputer] Ending imputation round 28/49, elapsed time 2.35\n", - "[IterativeImputer] Ending imputation round 29/49, elapsed time 2.43\n", - "[IterativeImputer] Ending imputation round 30/49, elapsed time 2.50\n", - "[IterativeImputer] Ending imputation round 31/49, elapsed time 2.57\n", - "[IterativeImputer] Ending imputation round 32/49, elapsed time 2.63\n", - "[IterativeImputer] Ending imputation round 33/49, elapsed time 2.71\n", - "[IterativeImputer] Ending imputation round 34/49, elapsed time 2.77\n", - "[IterativeImputer] Ending imputation round 35/49, elapsed time 2.90\n", - "[IterativeImputer] Ending imputation round 36/49, elapsed time 2.97\n", - "[IterativeImputer] Ending imputation round 37/49, elapsed time 3.09\n", - "[IterativeImputer] Ending imputation round 38/49, elapsed time 3.16\n", - "[IterativeImputer] Ending imputation round 39/49, elapsed time 3.22\n", - "[IterativeImputer] Ending imputation round 40/49, elapsed time 3.27\n", - "[IterativeImputer] Ending imputation round 41/49, elapsed time 3.32\n", - "[IterativeImputer] Ending imputation round 42/49, elapsed time 3.37\n", - "[IterativeImputer] Ending imputation round 43/49, elapsed time 3.44\n", - "[IterativeImputer] Ending imputation round 44/49, elapsed time 3.51\n", - "[IterativeImputer] Ending imputation round 45/49, elapsed time 3.61\n", - "[IterativeImputer] Ending imputation round 46/49, elapsed time 3.68\n", - "[IterativeImputer] Ending imputation round 47/49, elapsed time 3.75\n", - "[IterativeImputer] Ending imputation round 48/49, elapsed time 3.80\n", - "[IterativeImputer] Ending imputation round 49/49, elapsed time 3.86\n", + "[IterativeImputer] Ending imputation round 1/49, elapsed time 0.08\n", + "[IterativeImputer] Change: 228.09157262466636, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 2/49, elapsed time 0.15\n", + "[IterativeImputer] Change: 62.25538442972473, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 3/49, elapsed time 0.21\n", + "[IterativeImputer] Change: 42.97964424882681, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 4/49, elapsed time 0.27\n", + "[IterativeImputer] Change: 32.49599901077046, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 5/49, elapsed time 0.40\n", + "[IterativeImputer] Change: 24.095208515105927, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 6/49, elapsed time 0.46\n", + "[IterativeImputer] Change: 31.682426801495314, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 7/49, elapsed time 0.52\n", + "[IterativeImputer] Change: 140.975818147823, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 8/49, elapsed time 0.57\n", + "[IterativeImputer] Change: 39.46695219020222, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 9/49, elapsed time 0.62\n", + "[IterativeImputer] Change: 49.53774118145801, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 10/49, elapsed time 0.67\n", + "[IterativeImputer] Change: 44.68154513428162, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 11/49, elapsed time 0.72\n", + "[IterativeImputer] Change: 46.60916297395449, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 12/49, elapsed time 0.78\n", + "[IterativeImputer] Change: 21.86554010415628, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 13/49, elapsed time 0.83\n", + "[IterativeImputer] Change: 25.92892647183729, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 14/49, elapsed time 0.88\n", + "[IterativeImputer] Change: 18.83927866098054, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 15/49, elapsed time 0.94\n", + "[IterativeImputer] Change: 38.405696393605794, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 16/49, elapsed time 0.99\n", + "[IterativeImputer] Change: 38.25297377918183, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 17/49, elapsed time 1.04\n", + "[IterativeImputer] Change: 41.19811600250341, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 18/49, elapsed time 1.11\n", + "[IterativeImputer] Change: 34.112333701182216, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 19/49, elapsed time 1.16\n", + "[IterativeImputer] Change: 5.287239305308968, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 20/49, elapsed time 1.21\n", + "[IterativeImputer] Change: 9.819564256199563, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 21/49, elapsed time 1.26\n", + "[IterativeImputer] Change: 19.412604471395234, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 22/49, elapsed time 1.31\n", + "[IterativeImputer] Change: 39.00936641411021, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 23/49, elapsed time 1.36\n", + "[IterativeImputer] Change: 21.32069071919966, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 24/49, elapsed time 1.42\n", + "[IterativeImputer] Change: 27.607026109939916, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 25/49, elapsed time 1.46\n", + "[IterativeImputer] Change: 12.932035632308278, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 26/49, elapsed time 1.52\n", + "[IterativeImputer] Change: 20.500731676727007, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 27/49, elapsed time 1.56\n", + "[IterativeImputer] Change: 37.90038155709468, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 28/49, elapsed time 1.62\n", + "[IterativeImputer] Change: 23.402932009174002, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 29/49, elapsed time 1.67\n", + "[IterativeImputer] Change: 37.483562456385734, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 30/49, elapsed time 1.73\n", + "[IterativeImputer] Change: 38.620880987628844, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 31/49, elapsed time 1.78\n", + "[IterativeImputer] Change: 34.235210757797404, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 32/49, elapsed time 1.83\n", + "[IterativeImputer] Change: 5.958748778584713, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 33/49, elapsed time 1.88\n", + "[IterativeImputer] Change: 12.587404647069434, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 34/49, elapsed time 1.93\n", + "[IterativeImputer] Change: 19.321006684253774, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 35/49, elapsed time 1.98\n", + "[IterativeImputer] Change: 23.179317411983668, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 36/49, elapsed time 2.03\n", + "[IterativeImputer] Change: 16.391282472541278, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 37/49, elapsed time 2.08\n", + "[IterativeImputer] Change: 16.142877556306736, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 38/49, elapsed time 2.13\n", + "[IterativeImputer] Change: 8.395072354352404, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 39/49, elapsed time 2.18\n", + "[IterativeImputer] Change: 6.103784872152183, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 40/49, elapsed time 2.23\n", + "[IterativeImputer] Change: 4.99694813675092, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 41/49, elapsed time 2.27\n", + "[IterativeImputer] Change: 4.166011730288048, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 42/49, elapsed time 2.32\n", + "[IterativeImputer] Change: 3.547339840851814, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 43/49, elapsed time 2.37\n", + "[IterativeImputer] Change: 3.076150838166326, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 44/49, elapsed time 2.43\n", + "[IterativeImputer] Change: 2.704813693700327, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 45/49, elapsed time 2.47\n", + "[IterativeImputer] Change: 2.4026926256614516, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 46/49, elapsed time 2.51\n", + "[IterativeImputer] Change: 2.1506562406006946, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 47/49, elapsed time 2.56\n", + "[IterativeImputer] Change: 1.93535948311116, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 48/49, elapsed time 2.61\n", + "[IterativeImputer] Change: 1.7485725296176398, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 49/49, elapsed time 2.66\n", + "[IterativeImputer] Change: 1.5846246034457572, scaled tolerance: 0.381517 \n", "[IterativeImputer] Completing matrix with shape (195, 29)\n", "[IterativeImputer] Ending imputation round 1/49, elapsed time 0.00\n", "[IterativeImputer] Ending imputation round 2/49, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 3/49, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 4/49, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 4/49, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 5/49, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 6/49, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 7/49, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 7/49, elapsed time 0.03\n", "[IterativeImputer] Ending imputation round 8/49, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 9/49, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 10/49, elapsed time 0.03\n", + "[IterativeImputer] Ending imputation round 9/49, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 10/49, elapsed time 0.04\n", "[IterativeImputer] Ending imputation round 11/49, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 12/49, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 13/49, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 12/49, elapsed time 0.05\n", + "[IterativeImputer] Ending imputation round 13/49, elapsed time 0.05\n", "[IterativeImputer] Ending imputation round 14/49, elapsed time 0.05\n", - "[IterativeImputer] Ending imputation round 15/49, elapsed time 0.05\n", - "[IterativeImputer] Ending imputation round 16/49, elapsed time 0.05\n", - "[IterativeImputer] Ending imputation round 17/49, elapsed time 0.06\n", - "[IterativeImputer] Ending imputation round 18/49, elapsed time 0.06\n", + "[IterativeImputer] Ending imputation round 15/49, elapsed time 0.06\n", + "[IterativeImputer] Ending imputation round 16/49, elapsed time 0.06\n", + "[IterativeImputer] Ending imputation round 17/49, elapsed time 0.07\n", + "[IterativeImputer] Ending imputation round 18/49, elapsed time 0.07\n", "[IterativeImputer] Ending imputation round 19/49, elapsed time 0.07\n", - "[IterativeImputer] Ending imputation round 20/49, elapsed time 0.07\n", - "[IterativeImputer] Ending imputation round 21/49, elapsed time 0.07\n", - "[IterativeImputer] Ending imputation round 22/49, elapsed time 0.08\n", - "[IterativeImputer] Ending imputation round 23/49, elapsed time 0.08\n", - "[IterativeImputer] Ending imputation round 24/49, elapsed time 0.08\n", - "[IterativeImputer] Ending imputation round 25/49, elapsed time 0.09\n", - "[IterativeImputer] Ending imputation round 26/49, elapsed time 0.09\n", - "[IterativeImputer] Ending imputation round 27/49, elapsed time 0.09\n", - "[IterativeImputer] Ending imputation round 28/49, elapsed time 0.10\n", - "[IterativeImputer] Ending imputation round 29/49, elapsed time 0.10\n", - "[IterativeImputer] Ending imputation round 30/49, elapsed time 0.10\n", - "[IterativeImputer] Ending imputation round 31/49, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 32/49, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 33/49, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 34/49, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 35/49, elapsed time 0.12\n", - "[IterativeImputer] Ending imputation round 36/49, elapsed time 0.12\n", - "[IterativeImputer] Ending imputation round 37/49, elapsed time 0.12\n", - "[IterativeImputer] Ending imputation round 38/49, elapsed time 0.13\n", - "[IterativeImputer] Ending imputation round 39/49, elapsed time 0.13\n", - "[IterativeImputer] Ending imputation round 40/49, elapsed time 0.13\n", - "[IterativeImputer] Ending imputation round 41/49, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 42/49, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 43/49, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 44/49, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 45/49, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 46/49, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 47/49, elapsed time 0.16\n", - "[IterativeImputer] Ending imputation round 48/49, elapsed time 0.16\n", - "[IterativeImputer] Ending imputation round 49/49, elapsed time 0.16\n", - "/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " \" reached.\", ConvergenceWarning)\n" + "[IterativeImputer] Ending imputation round 20/49, elapsed time 0.08\n", + "[IterativeImputer] Ending imputation round 21/49, elapsed time 0.08\n", + "[IterativeImputer] Ending imputation round 22/49, elapsed time 0.09\n", + "[IterativeImputer] Ending imputation round 23/49, elapsed time 0.09\n", + "[IterativeImputer] Ending imputation round 24/49, elapsed time 0.10\n", + "[IterativeImputer] Ending imputation round 25/49, elapsed time 0.10\n", + "[IterativeImputer] Ending imputation round 26/49, elapsed time 0.10\n", + "[IterativeImputer] Ending imputation round 27/49, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 28/49, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 29/49, elapsed time 0.12\n", + "[IterativeImputer] Ending imputation round 30/49, elapsed time 0.12\n", + "[IterativeImputer] Ending imputation round 31/49, elapsed time 0.12\n", + "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n", + "[IterativeImputer] Ending imputation round 32/49, elapsed time 0.13\n", + "[IterativeImputer] Ending imputation round 33/49, elapsed time 0.13\n", + "[IterativeImputer] Ending imputation round 34/49, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 35/49, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 36/49, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 37/49, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 38/49, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 39/49, elapsed time 0.16\n", + "[IterativeImputer] Ending imputation round 40/49, elapsed time 0.16\n", + "[IterativeImputer] Ending imputation round 41/49, elapsed time 0.17\n", + "[IterativeImputer] Ending imputation round 42/49, elapsed time 0.17\n", + "[IterativeImputer] Ending imputation round 43/49, elapsed time 0.18\n", + "[IterativeImputer] Ending imputation round 44/49, elapsed time 0.18\n", + "[IterativeImputer] Ending imputation round 45/49, elapsed time 0.19\n", + "[IterativeImputer] Ending imputation round 46/49, elapsed time 0.19\n", + "[IterativeImputer] Ending imputation round 47/49, elapsed time 0.20\n", + "[IterativeImputer] Ending imputation round 48/49, elapsed time 0.20\n", + "[IterativeImputer] Ending imputation round 49/49, elapsed time 0.21\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 12 + "execution_count": 17 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "iVBORw0KGgoAAAANSUhEUgAABCkAAARiCAYAAACJaa3IAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzde7TlZ1kn+O9Tpy4hqSQQEsIlgZAEFERbpaTttsehBSWCyijQcmkFBilHF6Ijq5E1KIK3MdjION00TaEiyMjd1oiIihjtFtRUY0yTBCQEMGWIkPu9KrX3O39U0assss8mqbP3+3L257NWLU7t3958n9TlnKpvvb9nV2stAAAAAL1t6T0AAAAAQKKkAAAAAAahpAAAAACGoKQAAAAAhqCkAAAAAIagpAAAAACGoKQAAAAAhrB13hOq6iuTPDXJQ5K0JFcnuaC1dvmCZwMAAABWyLonKarqJ5K8PUkl+eskFx3++G1V9bLFjwcAAACsimqtzb5Y9XdJvqq1dtdRj29Pcmlr7RELng8AAABYEfNu95gmeXCSzxz1+IMOX7tbVbU7ye4kecNP/+Bjdz/jW49lxi87bTLplv2E73llt+xTX/nWbtkvfv2Pdsl93jU3dclNkjMe8qRu2Vf9wx90y35YzfzUs3DHPfbVXXKnN36iS26SfPzvXt8t+zlvfEe37E9+bnaBv0gf/7/+TZfcJDnugY/vlv2vbuv3a/xdN3+6W/ZXfOWPdcn9x2s+0CU3SSbTu+Y/aUEe/KLzu2X/v+/f3S37Jd/xhi65N/3Cc7rkJslna3u37Cf/ypu6ZV/6mgu6ZX/gg6+sbuHLcOl7+vzBYNG+6mlD/rzNKyl+LMmfVNUnklx1+LGHJjk3yYtmvai1tifJniSb9ycUAAAA2FDrlhSttfdX1SOTPC6HFmdWkn1JLmqt9TsuAAAAAGw6c9/do7U2TfKXS5gFAAAAWGFzSwoAAABYVT13Di7SkAspMuctSAEAAACWRUkBAAAADEFJAQAAAAxBSQEAAAAMweJMAAAAmGVysPcEK8VJCgAAAGAISgoAAABgCEoKAAAAYAh2UgAAAMAMbbo5d1JU7wFmcJICAAAAGIKSAgAAABiCkgIAAAAYgp0UAAAAMMtk0nuCleIkBQAAADAEJykWoN15W+8RujjhuH7ZWzrVbVX9duK2tjm3DI9sctwJXXK3rPX7zdXatFv2yffp16Mft73Pv5hsr37/zWtr2/tld9wv3vNfayYHb+2Yvnp2bG/dsrft6Je9o99v7W6mrd+/eu/Y2u/z2XRyZ7ds2EhOUgAAAABDcJICAAAAZmgTJ5iXyUkKAAAAYAhKCgAAAGAISgoAAABgCEoKAAAAYAgWZwIAAMAsFmculZMUAAAAwBCUFAAAAMAQlBQAAADAEOykAAAAgBna1E6KZXKSAgAAABiCkgIAAAAYgpICAAAAGIKdFAAAADDLZNJ7gpXiJAUAAAAwBCUFAAAAMIR7XVJU1fM3chAAAABgtR3LTopXJXnT3V2oqt1JdifJG376B7P7Gd96DDEAAADQR5sc7D3CSlm3pKiqS2ZdSnL6rNe11vYk2ZMkufQ97d4OBwAAAKyOeScpTk/ypCQ3HPV4JfnQQiYCAAAAVtK8kuK9SXa21i4++kJVXbiQiQAAAICVtG5J0Vp7wTrXnr3x4wAAAACr6lgWZwIAAMDmZnHmUt3rtyAFAAAA2EhKCgAAAGAISgoAAABgCHZSAAAAwAxtOuk9wkpxkgIAAAAYgpICAAAAGIKSAgAAABiCnRQAAAAwQ5sc7D3CSnGSAgAAABiCkgIAAAAYgpICAAAAGIKdFAAAADCLnRRL5SQFAAAAMISFn6Rok8miI2Zn33lbl9wtJ5zUJTdJDrbWLfuz11W37P139enbpq3fr++1LTu6Za+qtRuu6pJ71/5ru+QmyZba1i37ymv7/f669qY+uaf95Ntz5c88rUv28fuv65KbJJP0+9o1Sb+vXWvb7tstu5fW8evmjTf2+7e5W2/sd3j5+hv7/RrvZUutdcv+/C39Pp+tbT2hWzZspE17kqJXQQEA91avggIAYBSbtqQAAAAAvrxYnAkAAAAztGm/W9VWkZMUAAAAwBCUFAAAAMAQlBQAAADAEOykAAAAgFkmB3tPsFKcpAAAAACGoKQAAAAAhqCkAAAAAIZgJwUAAADM0OykWConKQAAAIAhKCkAAACAISgpAAAAgCHYSQEAAACz2EmxVE5SAAAAAENQUgAAAABDmFtSVNVXVtUTqmrnUY+ft7ixAAAAgFWzbklRVS9O8rtJfiTJR6vqqUdc/oVFDgYAAACslnmLM1+Y5LGttVur6qwk766qs1prv5KkZr2oqnYn2Z0k//mnXpjdT3/iBo0LAAAAy9Omk94jrJR5JcVaa+3WJGmtfbqqHp9DRcXDsk5J0Vrbk2RPkrRL3tk2aFYAAABgE5u3k+KaqvraL3zncGHxHUlOTfLVixwMAAAAWC3zSorvT3LNkQ+01g621r4/yTcvbCoAAABg5ax7u0drbd861/5i48cBAACAgUwO9p5gpcx9C1IAAACAZVBSAAAAAENQUgAAAABDmPcWpAAAALCy2mTSe4SV4iQFAAAAMAQlBQAAADAEJQUAAAAwBDspAAAAYIY2Odh7hJXiJAUAAAAwBCUFAAAAMAQlBQAAADAEJQUAAAAwBIszAQAAYJapxZnL5CQFAAAAMIRqrS004FvOffRiAwZ0cME/puv58wt+rlv2ru97W7fs0275eJfcbbXWJTdJLjlwc7fs+538qG7Zt962r1t2belz+Kx1bO/vc5/Tu2WvbdnWLXvrtp3dsq+99iNdclubdMk9pGd2v8/jjzj3+V1yb7ryLV1yk2Safn9GunX7/btlP3hyS7fsz+3o83n8gfs/1yU3ST417ffr7OSTzu2WfdMtV3bL/uQnP1rdwpfg1jf82035d9qdP/jWIX/enKQAgEH0KigAAEZhJwUAAADM0CY9T/ytHicpAAAAgCEoKQAAAIAhKCkAAACAIdhJAQAAALPYSbFUTlIAAAAAQ1BSAAAAAENQUgAAAABfpKrOq6qPV9UVVfWyu7n+0Kr606r6m6q6pKqefKyZdlIAAADADG1ysPcIXVTVWpLXJfnWJPuSXFRVF7TWLjviaT+Z5J2ttddX1aOTvC/JWceS6yQFAAAAcLTHJbmitXZla+1AkrcneepRz2lJTjr88clJrj7WUCUFAAAArJiq2l1Ve4/4tvuopzwkyVVHfH/f4ceO9Mok/7aq9uXQKYofOda53O4BAAAAK6a1tifJnnWeUnf3sqO+/6wkv9Fae01V/Yskv1lVj2mtTe/tXE5SAAAAAEfbl+TMI75/Rr74do4XJHlnkrTWPpzkuCSnHkuokxQAAAAwy2TSe4JeLkryiKp6eJJ/SPLMJM8+6jl/n+QJSX6jqh6VQyXF548l1EkKAAAA4J9orR1M8qIkf5jk8hx6F49Lq+pnquq7Dj/tJUleWFV/m+RtSZ7XWjv6lpB7xEkKAAAA4Iu01t6XQwsxj3zsFUd8fFmSb9rIzLklRVU97lB2u+jw+56el+Rjh4cFAAAA2BDrlhRV9dNJvj3J1qr64yT/PMmFSV5WVV/XWvv5xY8IAAAAfbTV3UnRxbyTFE9P8rVJdiS5JskZrbWbq+qXkvxVkrstKQ6/v+ruJPmK0x6YB590v42bGAAAANiU5i3OPNham7TWbk/yydbazUnSWrsjycz3PW2t7Wmt7Wqt7VJQAAAAAF+KeSXFgao6/vDHj/3Cg1V1ctYpKQAAAADuqXm3e3xza21/krTWjiwltiV57sKmAgAAgAG0qZ0Uy7RuSfGFguJuHr82ybULmQgAAABYSfNu9wAAAABYCiUFAAAAMIR5OykAAABgdU3spFgmJykAAACAISgpAAAAgCEoKQAAAIAhKCkAAACAIVicCQAAADM0izOXykkKAAAAYAhKCgAAAGAISgoAAABgCHZSAAAAwAxtMu09wkpxkgIAAAAYgpICAAAAGMLCb/c49ZVvXXTE3TrhuC6xSZLPXlfdsnd93893y977m8/qlv3EC8/ukvuVr39xl9wkOXFtW7fsO/d/tlv25dtO7Jb9i8fv6JJ7x7Rfn/yq6z/RLfvC73tot+wdZz+oS+5T3/qcLrlJcr+rf69b9m3n/B/dsj+773e6ZZ+w77e75D7qhNO75CbJWvr9GenVr3lKt+zXHH9et+wf+vBPdsn9htfd1iU3SXZ0/HX2J9/T77/7RW+7T7ds2Eh2UgAAAMAsdlIslds9AAAAgCEoKQAAAIAhKCkAAACAIdhJAQAAADO0yaT3CCvFSQoAAABgCEoKAAAAYAhKCgAAAGAIdlIAAADADG3Seo+wUpykAAAAAIagpAAAAACGoKQAAAAAhqCkAAAAAIZgcSYAAADM0CbT3iOsFCcpAAAAgCEoKQAAAIAhKCkAAACAIdzjnRRV9ZbW2vcvYhgAAAAYiZ0Uy7VuSVFVFxz9UJJ/XVX3TZLW2nctajAAAABgtcw7SXFGksuS/GqSlkMlxa4kr1nvRVW1O8nuJHns81+es7/le459UgAAAGBTm7eTYleS/57k5Uluaq1dmOSO1tqftdb+bNaLWmt7Wmu7Wmu7FBQAAADAl2LdkxSttWmS11bVuw7/7z/Oew0AAABsFm3aeo+wUr6kwqG1ti/JM6rqKUluXuxIAAAAwCq6R6ciWmu/n+T3FzQLAAAAsMLm7aQAAAAAWAr7JQAAAGCGNrGTYpmcpAAAAACGoKQAAAAAhqCkAAAAAIagpAAAAACGYHEmAAAAzNAmvSdYLU5SAAAAAENQUgAAAABDUFIAAAAAQ7CTAgAAAGZok9Z7hJXiJAUAAAAwBCUFAAAAMAQlBQAAADCEhe+kePHrf3TREXdrS8f6Zf9d/cJffcv13bKfeOHZ3bI/8Pgru+R+1euqS26S3Ln/tm7ZZ57xlG7Zx1/1X7pl/9Ip39Yl98CBG7vkJsnaidNu2d990/ndsvd/aluX3O2feWaX3CT5SOv3+ewpn3lzt+xP3/IP3bKvP/M7u+Re9rm/6JLb21Wf6/M5PEnecNmLumV/x6n/sUvugfqeLrlJsmXHad2yn3rdq7tl37D20m7Zm9203x+HVpKTFAAAAMAQlBQAAADAEJQUAAAAwBAWvpMCAAAAvly1Se8JVouTFAAAAMAQlBQAAADAEJQUAAAAwBCUFAAAAMAQLM4EAACAGSzOXC4nKQAAAIAhKCkAAACAISgpAAAAgCHYSQEAAAAzTKe9J1gtTlIAAAAAQ1BSAAAAAENQUgAAAABDsJMCAAAAZmiT3hOslntUUlTVv0ryuCQfba390WJGAgAAAFbRurd7VNVfH/HxC5P8xyQnJvnpqnrZgmcDAAAAVsi8nRTbjvh4d5Jvba29Ksm3JXnOrBdV1e6q2ltVey+45poNGBMAAADY7Obd7rGlqu6XQ2VGtdY+nySttduq6uCsF7XW9iTZkyT/7Zv+l7ZRwwIAAMAyTafVe4SVMq+kODnJf09SSVpVPbC1dk1V7Tz8GAAAAMCGWLekaK2dNePSNMl3b/g0AAAAwMq6V29B2lq7PcmnNngWAAAAYIXNW5wJAAAAsBT36iQFAAAArILptPcEq8VJCgAAAGAISgoAAABgCEoKAAAAYAh2UgAAAMAMbdJ7gtXiJAUAAAAwBCUFAAAAMAQlBQAAADAEOykAAABghum0eo+wUpykAAAAAIagpAAAAACGoKQAAAAAhmAnBQAAAMwwnfSeYLVUa22hAeee8zWLDZihqt9yk2nr96v4q9b69U7nbD+xW/YfdVpmc+m7frRLbpL8/LP+U7fsN995Y7fs4w/0y76lHeyS2/Pr4vb0+1x6MF2+fCRJztp2QpfcHd/wmi65SfKpD/1Qt+xtHQ92nvk1P9Ut+6av+NouuX/yjW/rkpskk5uv75b90l++vFv2h9bu2y37MXd9vkvu+d807ZKbJOf9cb9fZ1s6ft3smf2RT1+xqTdLXvYdj+v3h5IFevR7/3rInze3ewAAAABDUFIAAAAAQ1BSAAAAAEOwOBMAAABmmHbaf7eqnKQAAAAAhqCkAAAAAIagpAAAAACGYCcFAAAAzNDspFgqJykAAACAISgpAAAAgCEoKQAAAIAh2EkBAAAAM0ynvSdYLU5SAAAAAENQUgAAAABDUFIAAAAAQ7CTAgAAAGaYTqv3CCvFSQoAAABgCOuWFFX1z6vqpMMf36eqXlVVv1dV51fVycsZEQAAAFgF805S/HqS2w9//CtJTk5y/uHH3rTAuQAAAIAVM28nxZbW2sHDH+9qrX394Y//W1VdPOtFVbU7ye4kOe3Uh+Skk0459kkBAACATW1eSfHRqnp+a+1NSf62qna11vZW1SOT3DXrRa21PUn2JMm553xN27hxAQAAYHkszlyuebd7/ECS/7WqPpnk0Uk+XFVXJnnj4WsAAAAAG2LdkxSttZuSPK+qTkxy9uHn72ut/eMyhgMAAABWx7zbPZIkrbVbkvztgmcBAAAAVtiXVFIAAADAKprYSbFU83ZSAAAAACyFkgIAAAAYgpICAAAAGIKdFAAAADDD1E6KpXKSAgAAABiCkgIAAAAYgpICAAAAGIKdFAAAADDDtNlJsUxOUgAAAABDUFIAAAAAQ1BSAAAAAENQUgAAAABDWPjizDMe8qRFR8zU2sFu2WtbdnTJveQz7+ySmyQnrm3rln3n/tu65J7znT+V/33ng7pkv/xtP9wlN0ne+OR/1y37QFq37LW1E/rkJmmtz3/3znZXl9wkOWVrn8+jSXL7tM/Xjzv/6sey8+zndcl+6CN+MJ+74le7ZN+x/ZQuuUly8I5rumV//oIf6JL7NRckf/lvdnbJnh7o92ez6yb7u2WfcOIZ3bJPuuXGLrk//+Hkwulal+xH7Jh0yU2SWycHumX3+tq1CqbT3hOslk17kmIVCwqWr1dBwWrpVVCwfL0KiiTdCgqWr1dBwWrpVVAAX/42bUkBAAAAfHlRUgAAAABDWPhOCgAAAPhyNWnVe4SV4iQFAAAAMAQlBQAAADAEJQUAAAAwBDspAAAAYIbp1E6KZXKSAgAAABiCkgIAAAAYgpICAAAAGIKdFAAAADDDpNlJsUxOUgAAAABDUFIAAAAAQ1BSAAAAAENQUgAAAABDsDgTAAAAZphanLlU656kqKoXV9WZyxoGAAAAWF3zbvf42SR/VVX/tap+uKpOW8ZQAAAAwOqZV1JcmeSMHCorHpvksqp6f1U9t6pOnPWiqtpdVXurau/Vn/3bDRwXAAAA2Kzm7aRorbVpkj9K8kdVtS3Jtyd5VpJ/n+RuT1a01vYk2ZMkj//mf9c2blwAAABYnomdFEs1r6T4Jz8brbW7klyQ5IKqus/CpgIAAABWzrzbPb531oXW2h0bPAsAAACwwtYtKVprf7esQQAAAIDVNu92DwAAAFhZE1sWl2re7R4AAAAAS6GkAAAAAIagpAAAAACGYCcFAAAAzDBt1XuEleIkBQAAADAEJQUAAAAwBCUFAAAAMAQlBQAAADAEizMBAABghonFmUvlJAUAAAAwBCUFAAAAMAQlBQAAADAEOykAAABghknrPcFqqdYW+yN+zjmP8VO6RCftfFi37LP3f7Zb9nUPeEKX3Kuv+WCX3CQ5sP+Gbtmfft8vdct+5JNf0i17a/osTdpa/ZY1ben035wkx2/p16M/YNvx3bKv3H9Tl9zb13Z2yU2Slmm37Idk0i37m094YJfcd915a5fcJFlb29Et+5HT27plX3PyY7pl77z+I11yr7ir34/3A7Zs65Z9v639fo1//q47u2V/5NNXbOrNku/+uiduyr/TPv1vPjDkz5vbPQBgEL0KCgCAUSgpAAAAgCHYSQEAAAAzTDre/rqKnKQAAAAAhqCkAAAAAIagpAAAAACGYCcFAAAAzDDZlG9AOi4nKQAAAIAhKCkAAACAISgpAAAAgCEoKQAAAIAhWJwJAAAAM0x6D7BinKQAAAAAhqCkAAAAAIagpAAAAACGYCcFAAAAzGAnxXI5SQEAAAAMYd2TFFW1Pckzk1zdWvtAVT07yb9McnmSPa21u5YwIwAAALAC5t3u8abDzzm+qp6bZGeS307yhCSPS/LcxY4HAAAArIp5JcVXt9a+pqq2JvmHJA9urU2q6q1J/nbWi6pqd5LdSXLqqQ/KSSedsmEDAwAAwLJMUr1HWCnzdlJsOXzLx4lJjk9y8uHHdyTZNutFrbU9rbVdrbVdCgoAAADgSzHvJMWvJflYkrUkL0/yrqq6Msk3Jnn7gmcDAAAAVsi6JUVr7bVV9Y7DH19dVW9J8sQkb2yt/fUyBgQAAABWw7yTFGmtXX3ExzcmefdCJwIAAIBBTFrrPcJKmbeTAgAAAGAplBQAAADAEJQUAAAAwBCUFAAAAMAQ5i7OBAAAgFU16T3AinGSAgAAABiCkgIAAAAYgpICAAAA+CJVdV5Vfbyqrqiql63zvKdXVauqXceaaScFAAAAzLCqOymqai3J65J8a5J9SS6qqgtaa5cd9bwTk7w4yV9tRK6TFAAAAMDRHpfkitbala21A0nenuSpd/O8n03y6iR3bkSokgIAAAA42kOSXHXE9/cdfux/qqqvS3Jma+29GxWqpAAAAIAVU1W7q2rvEd92H/2Uu3lZO+L1W5K8NslLNnIuOykAAABghs26k6K1tifJnnWesi/JmUd8/4wkVx/x/ROTPCbJhVWVJA9MckFVfVdrbe+9nctJCgAAAOBoFyV5RFU9vKq2J3lmkgu+cLG1dlNr7dTW2lmttbOS/GWSYyookiWcpHhYTRcdwRGuum1ft+zLt53YLfv4q/5Ln9zW5j9pQQ6kX/Yjn7yhJ7rukb9732u6ZT/qaZ2yt+3sk5vk9v3Xdcu+bXqwW/at20/rE7z9tNxy88e7RD/ohW/pkpskP/WeH++W/UOf+1S37Hfcfn2X3G133dwlN0mmHb92XXnSI7tlf/UtH+uWff0jX9Ql99TL+329vq62dcu+ecsJ3bL3Z3+3bDan1trBqnpRkj9Mspbk11trl1bVzyTZ21q7YP3/h3vH7R4AMIheBQUAwN1prb0vyfuOeuwVM577+I3IVFIAAADADJOOp8BWkZ0UAAAAwBCUFAAAAMAQlBQAAADAEJQUAAAAwBAszgQAAIAZJr0HWDFOUgAAAABDUFIAAAAAQ1BSAAAAAEOwkwIAAABmmLTWe4SV4iQFAAAAMAQlBQAAADAEJQUAAAAwBDspAAAAYIZJ7wFWjJMUAAAAwBCUFAAAAMAQ5t7uUVXnJPnuJGcmOZjkE0ne1lq7acGzAQAAACtk3ZKiql6c5DuT/FmSb0hycQ6VFR+uqh9urV248AkBAACgk0la7xFWyrzbPV6Y5LzW2s8leWKSR7fWXp7kvCSvnfWiqtpdVXurau/VN9+wcdMCAAAAm9aXspPiC6ctdiQ5MUlaa3+fZNusF7TW9rTWdrXWdj34pPsd+5QAAADApjdvJ8WvJrmoqv4yyTcnOT9Jquq0JNcveDYAAABghaxbUrTWfqWqPpDkUUl+ubX2scOPfz6HSgsAAACADTH33T1aa5cmuXQJswAAAMBQLM5cri9lJwUAAADAwikpAAAAgCEoKQAAAIAhzN1JAQAAAKtq0nuAFeMkBQAAADAEJQUAAAAwBCUFAAAAMAQ7KQAAAGCGSWu9R1gpTlIAAAAAQ1BSAAAAAENQUgAAAABDsJMCAAAAZpjEToplcpICAAAAGIKSAgAAABjCwm/3OO6xr150xN2aHHdCl9wkWbvhqm7Z9bFf6Zb9i8fv6Jb9S6d8W5fcq/f9fpfcJFlb6/drfOvkjm7Zj3raa7plX/6el3TJveOjf9ElN0ke9RO/0y379aed1S37zNNu6pL7HZetdclNkuz53m7RLz/pK7plH98tOXlYTbvkXl7VJfeQfr/Gf25rnx/vJPn1s364W/YPXf3WLrkvnk665CbJ1q397mh/y4P6Zb/hc/fvlg0byUkKAAAAYAgWZwIAAMAMFmcul5MUAAAAwBCUFAAAAMAQlBQAAADAEOykAAAAgBmmzU6KZXKSAgAAABiCkgIAAAAYgpICAAAAGIKdFAAAADDDJHZSLJOTFAAAAMAQlBQAAADAEJQUAAAAwBDspAAAAIAZ7KRYLicpAAAAgCEoKQAAAIAhKCkAAACAISgpAAAAgCFYnAkAAAAzTJrFmcu07kmKqjq5qn6xqj5WVdcd/nb54cfuu87rdlfV3qra+/effP/GTw0AAABsOvNu93hnkhuSPL61dv/W2v2T/OvDj71r1otaa3taa7taa7sees55GzctAAAAsGnNKynOaq2d31q75gsPtNauaa2dn+Shix0NAAAAWCXzdlJ8pqpemuTNrbV/TJKqOj3J85JcteDZAAAAoKtJ7KRYpnknKb43yf2T/FlVXV9V1ye5MMkpSZ6x4NkAAACAFbLuSYrW2g1JfuLwt3+iqp6f5E0LmgsAAABYMfNOUqznVRs2BQAAALDy1j1JUVWXzLqU5PSNHwcAAADGMW12UizTvMWZpyd5Ug695eiRKsmHFjIRAAAAsJLmlRTvTbKztXbx0Req6sKFTAQAAACspHmLM1+wzrVnb/w4AAAAwKqad5ICAAAAVtYkdlIs07G8uwcAAADAhlFSAAAAAENQUgAAAABDUFIAAAAAQ7A4EwAAAGawOHO5nKQAAAAAhqCkAAAAAIaw8Ns9pjd+YtERd2vL2nFdcpPkrv3Xdstu04Pdsu+Y9uu8Dhy4sUvupEvqIVtav2NnW6u6ZWfbzm7Rd3z0L7rk3ucx39QlN0nWtry/W/bVd/W7I/GBB/p8Pqvq99+8vfp9Dr9r6326ZR/ItFv2lm4/5qv5b1R3tH5fu9rkzm7Zt046/XzXWp/cJDt23Ldb9h0HukXnpC2r+XubzcdOCgAAAJhh2vEfB1eRug0AAAAYgpICAAAAGIKSAgAAABiCnRQAAAAwwyR2UiyTkxQAAADAEJQUAAAAwBCUFAAAAMAQ7KQAAACAGSbNToplcpICAAAAGIKSAgAAABiCkgIAAAAYgpICAAAAGILFmQAAAJ9QWoEAACAASURBVDDDNBZnLpOTFAAAAMAQlBQAAADAEJQUAAAAwBDspAAAAIAZJs1OimVykgIAAAAYwr0uKarqD9a5truq9lbV3qv2ffjeRgAAAAArZN3bParq62ddSvK1s17XWtuTZE+SnPek1zobAwAAAMw1byfFRUn+LIdKiaPdd+PHAQAAgHFM7aRYqnklxeVJfrC19omjL1TVVYsZCQAAAFhF83ZSvHKd5/zIxo4CAAAArLJ1T1K01t69zuX7bfAsAAAAwAqbd7vHel6V5E0bNQgAAACMZhI7KZZp3rt7XDLrUpLTN34cAAAAYFXNO0lxepInJbnhqMcryYcWMhEAAACwkuaVFO9NsrO1dvHRF6rqwoVMBAAAAKykeYszX7DOtWdv/DgAAADAqjqWxZkAAACwqU3btPcIK2VL7wEAAAAAEiUFAAAAMAglBQAAADAEOykAAABghmla7xFWipMUAAAAwBCUFAAAAMAQlBQAAADAEKq1xd5f8/CHP7LLDTyt43vZbqlt3bJ37jyzW/bBmz/RLXvtxLO75O6/5ZNdcpNkZ/VbKdPzvrzbt9+3W/aBAzd0yV3bclyX3CT55O/9bLfshz/5x7tld2vwt57YKzlbtvT72rWl0++tJDm4dWe37G0Hb+2Su736/RvVllS37DvbpFv2ZPv9umWfcNfNXXJvbAe75CbJx/6f53TL/sofe3u37G3bT+6W/YlP/E2/39xL8MRzv2pTLqX4wBWXDvnz5iQFAAAAMAQlBQAAADAEJQUAAAAwhH43tQMAAMDgeu5jW0VOUgAAAABDUFIAAAAAQ1BSAAAAAENQUgAAAABDsDgTAAAAZpg2izOXyUkKAAAAYAhKCgAAAGAISgoAAABgCHZSAAAAwAzT3gOsGCcpAAAAgCEoKQAAAIAhKCkAAACAIdhJAQAAADNMW+s9wkpxkgIAAAAYgpICAAAAGMK6JUVVnVRV/3dV/WZVPfuoa/9pndftrqq9VbX3lltu2qhZAQAAgE1s3kmKNyWpJO9J8syqek9V7Th87Rtnvai1tqe1tqu1tuvEE0/eoFEBAABguaZpm/LbqOaVFOe01l7WWvud1tp3JflIkg9W1f2XMBsAAACwQua9u8eOqtrSWpsmSWvt56tqX5I/T7Jz4dMBAAAAK2PeSYrfS/ItRz7QWntzkpckObCooQAAAIDVs+5JitbaS2c8/v6q+oXFjAQAAACsonm3e6znVTm0WBMAAAA2pWkbd8nkZrRuSVFVl8y6lOT0jR8HAAAAWFXzTlKcnuRJSW446vFK8qGFTAQAAACspHklxXuT7GytXXz0haq6cCETAQAAACtp3uLMF6xz7dkbPw4AAACMYxo7KZZp3luQAgAAACyFkgIAAAAYgpICAAAAGMK8xZkAAACwsuykWC4nKQAAAIAhKCkAAACAISgpAAAAgCEsfCfFc974jkVHzHTyffp0MFdeO+mSmyR/+Yqf6pZ94fc9tFv2d990frfsqy54WpfcU7bu6JKbJLdPD3bLvq1j9utPO6tL7tV39Vsf9PAn/3i37E+975e7Zd/8/rd0y/4Xv3ZDl9yzDt7UJTdJDpz7gm7ZV1z5m92yn3fyOd2yf+vmT3XJ3ZLqkpskH/y2U7tlP+vRv9ot+//76A90yf3f/uasLrlJ8s9+7Le6ZX/8dc/vlv2yl32gW/ZmN7WSYqk27UmKXgUFq6VXQQFsTr0KClZLr4ICAL4U/iYPAAAADEFJAQAAAAyh343OAAAAMLhpLKVYJicpAAAAgCEoKQAAAIAhKCkAAACAISgpAAAAgCFYnAkAAAAzWJy5XE5SAAAAAENQUgAAAABDUFIAAAAAQ7CTAgAAAGZoVlIslZMUAAAAwBCUFAAAAMAQlBQAAADAEOykAAAAgBmmsZRimZykAAAAAIagpAAAAACGsJCSoqp2V9Xeqtr7kd9/zyIiAAAAgE1m3ZKiqh5YVa+vqtdV1f2r6pVV9T+q6p1V9aBZr2ut7Wmt7Wqt7fr6pzxt46cGAACAJWib9Nuo5p2k+I0klyW5KsmfJrkjyVOS/Nck/3mhkwEAAAArZV5JcXpr7T+01n4xyX1ba+e31v6+tfYfkjxsCfMBAAAAK2JeSXHk9bccdW1tg2cBAAAAVti8kuJ3q2pnkrTWfvILD1bVuUk+vsjBAAAAgNWydb2LrbVXzHj8iqr6/cWMBAAAAGOYDr1mcvM5lrcgfdWGTQEAAACsvHVPUlTVJbMuJTl948cBAAAAVtW6JUUOFRFPSnLDUY9Xkg8tZCIAAABgJc0rKd6bZGdr7eKjL1TVhQuZCAAAAAZhI8VyzVuc+YJ1rj1748cBAAAAVtWxLM4EAAAA2DBKCgAAAGAI83ZSAAAAwMqyk2K5nKQAAAAAhqCkAAAAAIagpAAAAACGYCcFAAAAzDC1lWKpFl5SfPJzvX5CJzlue5/ka2/qk5skW7ft7Ja94+wHdcve/6ltXXLP2nZCl9wkuX16sFv2A7Yd3y371u2ndcs+87Q+v7kfeKDfobctN3SLzs3vf0u37JPO+/4uuZeelzz66a/tkn3clrUuuUky2XbfbtmZ7u8WPemUe8rajk7JyYE27ZZ9/Dn9vn6c8aB+/907bz+lS+61f3Jxl9xDql/0pN+fz85ZO65bNmykTXu7R6+CAgDurV4FBQDAKDZtSQEAAAB8eVFSAAAAAEOwOBMAAABmsDZzuZykAAAAAIagpAAAAACGoKQAAAAAhmAnBQAAAMxgJ8VyOUkBAAAADEFJAQAAAAxBSQEAAAAMwU4KAAAAmMFOiuVykgIAAAAYgpICAAAAGIKSAgAAABiCnRQAAAAwg50Uy+UkBQAAAPBFquq8qvp4VV1RVS+7m+s7quodh6//VVWddayZSgoAAADgn6iqtSSvS/LtSR6d5FlV9eijnvaCJDe01s5N8tok5x9r7j0uKarqAccaCgAAAAztcUmuaK1d2Vo7kOTtSZ561HOemuTNhz9+d5InVFUdS+i6JUVVnXLUt/sn+euqul9VnbLO63ZX1d6q2nvFB3/7WOYDAAAANtiRf28//G33UU95SJKrjvj+vsOP3e1zWmsHk9yU5P7HMte8xZnXJvnM3QzxkRzaH3L23b2otbYnyZ4kedZvfcSeEQAAABjIkX9vn+HuTkQc/ff7L+U598i82z1emuTjSb6rtfbw1trDk+w7/PHdFhQAAADAl719Sc484vtnJLl61nOqamuSk5Ncfyyh65YUrbV/n+QHkryiqn65qk6Md2ABAACAze6iJI+oqodX1fYkz0xywVHPuSDJcw9//PQkH2ytHVNnMO92j7TW9iV5RlV9Z5I/TnL8sQQCAAAAY2utHayqFyX5wyRrSX69tXZpVf1Mkr2ttQuS/FqS36yqK3LoBMUzjzV3bklxxIC/V1UfSHJOklTV81trbzrWAQAAAGBcx/RmFV/WWmvvS/K+ox57xREf35nkGRuZeY/egrS1dkdr7aOHv/uqjRwEAAAAWG3rnqSoqktmXUpy+saPAwAAAKyqebd7nJ7kSUluOOrxSvKhhUwEAAAArKR5JcV7k+xsrV189IWqunAhEwEAAMAwVncnRQ/rlhSttResc+3ZGz8OAAAAsKru0eJMAAAAgEVRUgAAAABDmLeTAgAAAFaYnRTL5CQFAAAAMAQlBQAAADAEJQUAAAAwBCUFAAAAMASLMwEAAGAWezOXqlprCw34+rPOXWzADNur3yGRW6Z3dcu+vfr1Tuec/Zxu2Qc+844uuSc87pe75CbJ5PMXd8u+5oo3dsu+pU26Zbda65JbHX9fb9myvVv2cTtO6Zbd2rRb9mXv/j+75D7yu3+hS26SHDhwU7fsK970om7Zf/zSC7rkvuaf/UyX3CTZ+g0ndMs++I7f7Zb91Z/7k27Zf77t1C65W9b6ff247rp+f0bq6Su29vsx/4NP/I9N/df4s846u8vfaRft05++csifN7d7AMAgehUUAACjUFIAAAAAQ7CTAgAAAGbyb/vL5EcbAAAAGIKSAgAAABiCkgIAAAAYgp0UAAAAMENlyHfq3LScpAAAAACGoKQAAAAAhqCkAAAAAIZgJwUAAADMUnZSLJOTFAAAAMAQlBQAAADAEJQUAAAAwBCUFAAAAMAQLM4EAACAGSoWZy6TkxQAAADAEJQUAAAAwBCUFAAAAMAQ1i0pquq8Iz4+uap+raouqarfqqrT13nd7qraW1V7r73l5o2cFwAAAJZoyyb9NqZ5k/3CER+/Jslnk3xnkouSvGHWi1pre1pru1pru0498aRjnxIAAADY9O7Ju3vsaq197eGPX1tVz13EQAAAAMBqmldSPKCqfjxJJTmpqqq11g5fG/d8CAAAAPBlZ15J8cYkJx7++M1JTk3y+ap6YJKLFzkYAAAA9FZVvUdYKeuWFK21V814/Jqq+tPFjAQAAACsomO5ZeNuCwwAAACAe2PdkxRVdcmsS0lmvgUpAAAAwD01byfF6UmelOSGox6vJB9ayEQAAAAwivKeEcs0r6R4b5KdrbUvWpJZVRcuZCIAAABgJc1bnPmCda49e+PHAQAAAFaVcysAAADAEJQUAAAAwBDm7aQAAACAlVX+bX+p/GgDAAAAQ1BSAAAAAENQUgAAAABDsJMCAAAAZqiq3iOsFCcpAAAAgCFUa22hAf/yG39gsQEzrP3/7d15uGRnWS/s37N3OoFMZIIwp5kCORoI0HqhIkTCKBHEgUmNA9KCEgKOeOCgOHACisinHqUFAUUBDWgYNIfIpB/I0EQCkTAIdEhDJkIgdBiSdL/fH7var227qhKya73Lrvu+rn117Vq79u9dq/d+d9VT73rW6oE9YpMkX//Gld2yr7jiA92yv+3mR3bLPu/ar3XJ3bDzq11yk+Rmtdote8fqId2yb/XkP++WnS2P6xJ7YPWrJ39u5Wbdso+vXd2yb7bS5/frI7v6vVPzib/9n92yH/ziY7tlf/ptT+qWvWvXdV1yD+uSuqbnm5F3OLDfnm+/2W27Zb/pXld0yT35//1Kl9wk2bDh8G7Zd8213bI/9Y0vd8v+2EXb9uulBne76726vKZdtE/++/mj/H+zkgIAAAAYBT0pAAAAYJqOq1qXkaMNAAAAjIIiBQAAADAKihQAAADAKOhJAQAAAFOUnhSDcrQBAACAUVCkAAAAAEZBkQIAAAAYBUUKAAAAYBQ0zgQAAIApynv7g3K0AQAAgFFQpAAAAABGQZECAAAAGAU9KQAAAGCKKu/tD8nRBgAAAEZBkQIAAAAYBUUKAAAAYBRudE+Kqjq6tXblIgYDAAAAY1K12nsIS2XmSoqqOrOqjpnc3lRVn07yvqq6qKoeOONxm6tqa1Vtvezyj63zkAEAAID90bzTPR7ZWvvC5PbvJHlca+2uSR6S5EXTHtRa29Ja29Ra23Tsre6xTkMFAAAA9mfzihQbqmr3KSE3b619IElaa59IctBCRwYAAAAslXk9Kf4oyd9X1ZlJzqmq30/yhiSnJPnQogcHAAAAPVW53sSQZhYpWmt/UFUfSfLUJMdPvv74JH+X5DcXPzwAAABgWcy9ukdr7Z1J3rn3/VX1k0lesf5DAgAAAJbRTVm38rx1GwUAAACw9GaupKiqD0/blOTY9R8OAAAAsKzmne5xbJKHJblqr/sryXsWMiIAAAAYCY0zhzWvSPHmJIe21v7LlTyq6p0LGREAAACwlOZd3eNJM7Y9cf2HAwAAACwr61YAAACAUZh7CVIAAABYVlWrvYewVKykAAAAAEZBkQIAAAAYBUUKAAAAYBT0pAAAAIApqry3PyRHGwAAABgFRQoAAABgFBZ+usf9r/nkoiP2aTXVJTdJdqZ1y/7r7OyWfc1dntIt+5EXvapL7ju+8vUuuUnytQOP6pbddn6tW/b/ev3Pd8t+9uF375J73QE375KbJCtXvK9b9rV3fVK37J0bjuiSe+3HXtIlN0ke/OJju2X/4zMv65Z9l7f3u6zcnTc+pkvutm2v6ZKbJCv9niLl+o0/2i37sZee0y37ibfc0iX3zqs/1iU3SbZ/44pu2TtP+IVu2Qd9/u+7ZcN60pMCAAAApqjqV9BeRk73AAAAAEZBkQIAAAAYBUUKAAAAYBQUKQAAAIBR0DgTAAAAptA4c1hWUgAAAACjoEgBAAAAjIIiBQAAADAKelIAAADAFCvlvf0hOdoAAADAKChSAAAAAKOgSAEAAACMgp4UAAAAMEXVau8hLBUrKQAAAIBRUKQAAAAARkGRAgAAABgFPSkAAABgCj0phmUlBQAAADAKM4sUVXVeVT2nqu5yY75pVW2uqq1VtfX8L15y00YIAAAALIV5KymOTHJEkndU1fur6plVddt537S1tqW1tqm1tuleR91mXQYKAAAA7N/mFSmuaq39Ymvtjkl+IcndkpxXVe+oqs2LHx4AAACwLG5w48zW2j8n+eeqOj3JQ5I8LsmWRQ0MAAAAetM4c1jzihSf2PuO1trOJOdMPgAAAADWxczTPVprj5+2rap+cv2HAwAAACyrm3IJ0uet2ygAAACApTfzdI+q+vC0TUmOXf/hAAAAwHjUip4UQ5rXk+LYJA9LctVe91eS9yxkRAAAAMBSmlekeHOSQ1trH9p7Q1W9cyEjAgAAAJbSzCJFa+1JM7Y9cf2HAwAAACyreSspAAAAYGmtlJ4UQ7opV/cAAAAAWDeKFAAAAMAoKFIAAAAAo6AnBQAAAExRelIMykoKAAAAYBQUKQAAAIBRUKQAAAAARqFaawsNuNNxxy02YIqe1ZedqY7p/fb8yCNO6Jb9jS99tEvuxns+t0tuklz/tUu7ZX/9M3/eLfuz13+9W/bBnXKvza5OyUk74LBu2bt2XdstO7u+0SX2k698ZpfcJDn+p/64W3bPc30/9abf7Jb9nqf8fpfc37jj/+qSmyS77n/LbtlfeOmvdcu+/Vc/2y37guv6zGeHHbaxS26SXHPN57plH3D91d2yv+XmR3XLfv3Hzuv5Amjh7vftP9blNe2ivff9fzHK/zeNMwEAAGAKjTOH5XQPAAAAYBQUKQAAAIBRUKQAAAAARkFPCgAAAJiiysvmIVlJAQAAAIyCIgUAAAAwCooUAAAAwCg4uQYAAACmWKnV3kNYKlZSAAAAAKOgSAEAAACMgiIFAAAAMAp6UgAAAMAUtaInxZCspAAAAABGQZECAAAAGAVFCgAAAGAUFCkAAACAUdA4EwAAAKao8rJ5SFZSAAAAAKMws0hRVZuq6h1V9eqqukNVnVtVX66qD1TVvWc8bnNVba2qrV/ZsWP9Rw0AAADsd+atpPg/SV6Y5C1J3pPkpa21WyR51mTbPrXWtrTWNrXWNh126KHrNlgAAABg/zXv5JoNrbV/SJKqekFr7awkaa29rap+d+GjAwAAgI6qVnsPYanMW0nx9ap6aFX9cJJWVd+fJFX1wCQ7Fz46AAAAYGnMW0nxlKyd7rErycOSPLWqXpnkc0mevNihAQAAAMtk5kqK1tr5rbWHtdYe0Vr7WGvtjNbaEa21b0ly94HGCAAAACyBm3LB1+clecV6DQQAAADGpuqmvGzmxpp5tKvqw9M2JTl2/YcDAAAALKt5JaFjs9aL4qq97q+sXZIUAAAAYF3MK1K8OcmhrbUP7b2hqt65kBEBAAAAS2lmkaK19qQZ2564/sMBAACA8Vip1d5DWCozr+4BAAAAMBRFCgAAAGAUFCkAAACAUVCkAAAAAEZh3tU9AAAAYGnVipfNQ7KSAgAAABgFRQoAAABgFBa+buXu93jGoiOm2nn9ji65qxuO6JKbJDuv+1K37EO2v6Fb9hfv8H1dcr9895O65CbJFW/86W7Zjz3k1t2yX/fVL3bLPq52dcldqX715E9948vdsn/iFnfplr2zU+4LznhT7nNQ65K9a9d1XXKT5M4bH9Mt+z1P+f1u2d/5J52eI5252ic3yTWf6hado3b0C7/4kI3dsu9Zn++Se+3GJ3TJTZJPn//cbtnPPvJu3bI/urPfPA7rab89uaZXgQIAvlm9ChQAwHRV++3L5lFyugcAAAAwCooUAAAAwCgoUgAAAACj4OQaAAAAmKKqX8PhZWQlBQAAADAKihQAAADAKChSAAAAAKOgJwUAAABMUeVl85CspAAAAABGQZECAAAAGAVFCgAAAGAUFCkAAACAUdABBAAAAKaoFS+bh2QlBQAAADAKihQAAADAKChSAAAAAKPg5BoAAACYosrL5iFZSQEAAACMwkKKFFW1uaq2VtXWi7f/yyIiAAAAgP3MzCJFVR1aVb9RVf9WVV+uqiuq6r1V9ROzHtda29Ja29Ra23SH23/Hug4YAAAA2D/NO7nmL5P8bZKHJXlskkOSvDbJc6rq+Nba/1zw+AAAAKAfPSkGNe90j42ttVe21ra31n4vyaNaa59M8pNJfmDxwwMAAACWxbwixTVVdf8kqarvS/LFJGmt7UpSCx4bAAAAsETmrVt5SpKXVdXxSS5I8lNJUlW3TPJHCx4bAAAAsERmFilaax9O8u37uP+KqvrKwkYFAAAAI1ArelIM6aZcgvR56zYKAAAAYOnNLAlV1YenbUpy7PoPBwAAAFhW89atHJu1y49etdf9leQ9CxkRAAAAsJTmFSnenOTQ1tqH9t5QVe9cyIgAAACApTSvceaTZmx74voPBwAAAMajSuPMId2UxpkAAAAA60aRAgAAABgFRQoAAABgFJxcAwAAANOseNk8JCspAAAAgFFQpAAAAABGQZECAAAAGIWFn1xz2aX/uOgI9rDh6k90yz7hkGO7ZX/08nd3yf3w/W7WJTdJvnqzQ7tlf8df7+iWveG6q7tlX1jVKblfPfnw6pf9V1d/plv2UasHdck995rkgO/50y7Zh517WpfcJNm27TXdsn/j/n2Od5LkzNUusec860tdcpNk1zWf7ZZ9+jlHdMv+4EFHdcu+484vd8k9dturuuQmyc4D+z1HeunVF3fLvr7t6pa936s+8/WyspICAEaiV4ECAODGqKqjqurcqvrk5N8jZ3zt4VX1uar6wxvyvRUpAAAAgBvjWUne1lq7W5K3TT6f5jeTvOuGfmNFCgAAAODGeHSS3ed1vSrJ9+/ri6rqvkmOTfLWG/qNXfAVAAAApqgVL5v34djW2iVJ0lq7pKputfcXVNVKkhcl+bEkp9zQb+xoAwAAwJKpqs1JNu9x15bW2pY9tv9jklvv46HPvoERP5vk71trF9eNaECvSAEAAABLZlKQ2DJj+4Onbauqy6rqNpNVFLdJcvk+vuw7knx3Vf1skkOTHFhVO1prs/pXKFIAAAAAN8obk/x4kjMn/5699xe01n5k9+2q+okkm+YVKBKNMwEAAIAb58wkD6mqTyZ5yOTzVNWmqnrZTfnGVlIAAADANOVl895aa1dmH80wW2tbk/z0Pu5/ZZJX3pDvbSUFAAAAMAqKFAAAAMAoKFIAAAAAo+DkGgAAAJiirXjZPCQrKQAAAIBRUKQAAAAARkGRAgAAABgFJ9cAAADANCurvUewVKykAAAAAEZhZpGiqm5RVWdW1ceq6srJx4WT+44YapAAAADA/m/eSoq/TnJVkpNba0e31o5O8j2T+/5m2oOqanNVba2qrV+48rPrN1oAAABgvzWvJ8XG1toL9ryjtXZpkhdU1U9Ne1BrbUuSLUly73s9st3kUQIAAEAPelIMat5Kiouq6per6tjdd1TVsVX1K0kuXuzQAAAAgGUyr0jxuCRHJ3lXVV1VVV9M8s4kRyV57ILHBgAAACyRmad7tNauqqpXJDk3yXtbazt2b6uqhyc5Z8HjAwAAAJbEvKt7PD3J2UmeluSCqnr0Hpufv8iBAQAAAMtlXuPMJye5b2ttR1VtTHJWVW1srb0kSS16cAAAANBT0zhzUPOKFKu7T/ForW2rqpOzVqg4LooUAAAAwDqa1zjz0qo6afcnk4LFqUmOSXLiIgcGAAAALJd5RYrTkly65x2ttetba6clecDCRgUAAAAsnXlX99g+Y9u71384AAAAMB56Ugxr3koKAAAAgEEoUgAAAACjoEgBAAAAjMK8S5ACAADA8tKTYlBWUgAAAACjoEgBAAAAjIIiBQAAADAKelIAAADAFG3Fe/tDWniRYueu6xYdMTqt7eyWvZrWMbu6Zfey8+ovdsvede313bJXVw/qlr2r4894snxNk1Y6/l73zL627eqT+/Yn5fBfeW2X7PrHLrFJkpWOv9a77n/LbtnXfKpP7q5rPtsnOMnKIYd3yz6w4wLiDRsO65b9tU7PS7+WPvNob1/d1e/52Y7WLxvWk5IQAIxErwIFAMBYKFIAAAAAo6BIAQAAAIyCxpkAAAAwRVtZvr5kPVlJAQAAAIyCIgUAAAAwCooUAAAAwCjoSQEAAABT7Fr13v6QHG0AAABgFBQpAAAAgFFQpAAAAABGQU8KAAAAmKKteG9/SI42AAAAMAqKFAAAAMAoKFIAAAAAo6AnBQAAAEyhJ8WwHG0AAABgFBQpAAAAgFH4posUVfUP6zkQAAAAYLnN7ElRVfeZtinJSTMetznJ5iS53W1PyFFH3f6bHiAAAAD0sktPikHNa5z5gSTvylpRYm9HTHtQa21Lki1Jcs8TH9q+6dEBAAAAS2NekeLCJD/TWvvk3huq6uLFDAkAAABYRvPWrfz6jK85fX2HAgAAACyzmUWK1tpZSaqqTqmqQ/fa/PXFDQsAAABYNvMaZz49yc9l7bSPl1fVGa21syebn5/knAWPDwAAALppqxpnDmleT4onJ7lva21HVW1MclZVbWytvST7bqYJAAAA8E2ZV6RYba3tSJLW2raqOjlrhYrjokgBAAAArKN561YuraqTdn8yKVicmuSYJCcucmAAAADAcpm3kuK0JNfveUdr7fokp1XVSxc2KgAAABiBtuIkgiHNLFK01rbP2Pbu9R8OAAAAsKy0KQUAAABGQZEC5jxNlAAAIABJREFUAAAAGIV5PSkAAABgae1a1ZNiSFZSAAAAAKOgSAEAAACMgiIFAAAAMAp6UgAAAMAUbUVPiiFZSQEAAACMwsJXUtz2aS9YdMQ+HXRg65KbJF/6Ur/az2V/8jPdsl/4okd2y7748od2yf3lF5zeJTdJrtz5jW7Zx+/a2S3704cf3y37tw7Y1SX3a61f9f63r/pkt+y3P/SYbtkH3+WWXXJ/6HVnd8lNkjsceFi37Os3/mi37Cte+mvdso/a8akuuaefc0SX3CQ5sOP7Yy9+3TO6Zb929YRu2Q99zc91yb3PKy/qkpskh3d80/utjzy6W/bPva3f8zNYT1ZSAAAAAKOgSAEAAACMgsaZAAAAMIXGmcOykgIAAAAYBUUKAAAAYBQUKQAAAIBR0JMCAAAApmirvUewXKykAAAAAEZBkQIAAAAYBUUKAAAAYBT0pAAAAIAp2kr1HsJSsZICAAAAGAVFCgAAAGAUFCkAAACAUdCTAgAAAKbx1v6gHG4AAABgFBQpAAAAgFFQpAAAAABGYWaRoqoOr6r/XVV/UVVP3Gvb/1ns0AAAAIBlMm8lxSuSVJLXJ3l8Vb2+qg6abLvftAdV1eaq2lpVWy/+59ev01ABAABgYKv76cdIzStS3KW19qzW2t+11h6V5Lwkb6+qo2c9qLW2pbW2qbW26Q7f/YPrNlgAAABg/zXvEqQHVdVKa21XkrTWfruqtif5pySHLnx0AAAAwNKYt5LiTUketOcdrbVXJfmFJNcualAAAADA8pm5kqK19stVdY+qOiXJ+1prOyb3n1NVTx9khAAAANCLa2IOat7VPU5PcnaS05NcUFWP3mPzby9yYAAAAMBymdeTYnOS+7bWdlTVxiRnVdXG1tpLsnbVDwAAAIB1Ma9IsbrHKR7bqurkrBUqjosiBQAAALCO5p1dc2lVnbT7k0nB4tQkxyQ5cZEDAwAAgO5W9tOPkZo3tNOSXLrnHa2161trpyV5wMJGBQAAACydeVf32D5j27vXfzgAAADAshrxIg8AAABgmcxrnAkAAABLq7y1PyiHGwAAABgFRQoAAABgFBQpAAAAgFFQpAAAAABGQeNMAAAAmKJWWu8hLBUrKQAAAIBRWPhKiv/nnM2LjtinDQf1q3bt+FK/BSpn7PxKt+wXHfzwbtkv/ejTuuQ+ePWILrlJcshht++Wff3Or3bLPvErH+uW/Wcbf7ZLbtv59S65SbLzmi3dsp/wP17WLfv2t9nVJffEy8/okpskb7/ZbbtlP/bSc7plv+/rV3bLvviQjV1yP3jQUV1yk2TDhsO6Zb929YRu2Y/feWG37Cfc/UVdcm+98vguuUmy4+Dbdct+3F1f3C378rf1eZ4C681KCgAAAGAU9KQAAACAKcpb+4NyuAEAAIBRUKQAAAAARkGRAgAAABgFPSkAAABgipXV3iNYLlZSAAAAAKOgSAEAAACMgiIFAAAAMAp6UgAAAMAUK97aH5TDDQAAAIyCIgUAAAAwCooUAAAAwCgoUgAAAACjoHEmAAAATFErrfcQloqVFAAAAMAoKFIAAAAAo6BIAQAAAIzCzJ4UVXXrJL+WZFeS5yY5PckPJrkwyRmttUsWPkIAAADoZMVb+4Oad7hfmeSjSS5O8o4kX0vyyCT/nORPpj2oqjZX1daq2vq6bVes01ABAACA/dm8IsWxrbU/aK2dmeSI1toLWmufba39QZLjpj2otbaltbaptbbpcRtvua4DBgAAAPZP84oUe27/8722ra7zWAAAAIAlNrMnRZKzq+rQ1tqO1tpzdt9ZVXdN8vHFDg0AAAD60pNiWDMPd2vtuUluX1WnVNWhe9z/70letujBAQAAAMtjZpGiqk5PcnbWrupxQVU9eo/Nz1/kwAAAAIDlMu90j81J7tta21FVG5OcVVUbW2svSVKLHhwAAACwPOYVKVZbazuSpLW2rapOzlqh4rgoUgAAALCf05NiWPMO96VVddLuTyYFi1OTHJPkxEUODAAAAFgu84oUpyW5dM87WmvXt9ZOS/KAhY0KAAAAWDozT/dorW2fse3d6z8cAAAAYFk5uwYAAAAYhXmNMwEAAGBpaZw5LIcbAAAAGAVFCgAAAGAUFCkAAACAUdCTAgAAAKbQk2JYDjcAAAAwCooUAAAAwCgs/HSPXzj1pYuO2KeDDuwSmyT54peqW/blf/Lz3bKf+i/P6ZZ96jF/2CX3W6/b3CU3SQ7/ype6ZX/86/2yv3j807plP/Xzr+6Su2Nnv3ryC6+7ulv2X17w092yD/3qUV1yT9lwTJfcJHnTt17cLfuJt9zSLfvzf/sj3bLvWZ/vknvHnV/ukpskX2s7u2U/9DU/1y37CXd/Ubfs19zrM11y79Ral9wkOXjHtm7Zf3PRz3TLfvaq95/ZP+hJAQAAAFOsrvQrui0j5TYAAABgFBQpAAAAgFFQpAAAAABGQU8KAAAAmGLFW/uDcrgBAACAUVCkAAAAAEZBkQIAAAAYBUUKAAAAYBQ0zgQAAIApNM4clsMNAAAAjIIiBQAAADAKihQAAADAKOhJAQAAAFOsemt/UA43AAAAMAqKFAAAAMAoKFIAAAAAo3Cje1JU1a1aa5cvYjAAAAAwJivVewTLZeZKiqo6aq+Po5O8v6qOrKqjZjxuc1VtraqtF73rDes+aAAAAGD/M28lxReSXLTXfbdLcl6SluTO+3pQa21Lki1J8n0vP6/dxDECAAAAS2BeT4pfTvLxJI9qrd2ptXanJNsnt/dZoAAAAAD4ZsxcSdFa+92qem2SF1fVxUl+LWsrKAAAAGC/t+pyE4Oae7hba9tbaz+c5B1Jzk1y8MJHBQAAACyduUWKqrpHVZ2StSLF9yR58OT+hy94bAAAAMASmXd1j6cnOTvJ6UkuSPLQ1toFk83PX/DYAAAAgCUy7+oeT05y39bajqramOSsqtrYWntJEleLBQAAANbNvCLFamttR5K01rZV1clZK1QcF0UKAAAA9nMrGmcOat7hvrSqTtr9yaRgcWqSY5KcuMiBAQAAAMtlXpHitCSX7nlHa+361tppSR6wsFEBAAAAS2fm6R6tte0ztr17/YcDAAAALKt5PSkAAABgaa3qSTEohxsAAAAYBUUKAAAAYBQUKQAAAIBR0JMCAAAAptCTYlgONwAAADAKihQAAADAKFRrbaEBD7jLCYsN4D85sPrVnT573TXdsq/ttN/vechRXXKT5CHvPrhb9jXXfLZb9jEr/c5S+8KunX2Ca7VPbpK067pFH3P0t3XL/sIXP9Qt+5bH3LdL7pVX9tvnO6/2+72+4uA7dsu+3cYndMn9rm2v6pKbJF/Lrm7Zr/7yRd2yb93xb9dlC36uP81n3nJml9wkudujf6Nbdjo+F1+59qpu2R+/aFt1Cx/AT5513n75mvYVP3SfUf6/6UkBACPRq0ABAEynJ8WwHG4AAABgFBQpAAAAgFFQpAAAAABGQZECAAAAGAWNMwEAAGCKFW/tD8rhBgAAAEZBkQIAAAAYBUUKAAAAYBT0pAAAAIApVqv3CJaLlRQAAADAKChSAAAAAKOgSAEAAACMgp4UAAAAMMWqt/YH5XADAAAAo6BIAQAAAIyCIgUAAAAwCnpSAAAAwBR6UgzL4QYAAABGYWaRoqoevsftW1TVy6vqw1X1V1V17IzHba6qrVW19ZKrv7Se4wUAAAD2U/NWUjx/j9svSnJJku9L8oEkL532oNbaltbaptbaptscfsRNHyUAAACw37sxPSk2tdZOmtx+cVX9+CIGBAAAACyneUWKW1XVzyepJIdXVbXW2mSbfhYAAADs1w5Yqd5DWCrzCg1/muSwJIcmeVWSY5Kkqm6d5EOLHRoAAACwTGaupGitPa+q7pHkdkne11rbMbn/0qr6qyEGCAAAAIxHVR2V5HVJNibZluSxrbWr9vF1L0zyyKwtkDg3yRl7nJ2xT/Ou7nF6krOTnJ7kgqp69B6bn7/vRwEAAAD7sWcleVtr7W5J3jb5/D+pqu9M8l1J7pnkW5N8W5IHzvvG83pSbE5y39bajqramOSsqtrYWntJ1vpUAAAAwH5rVTfGfXl0kpMnt1+V5J1JfmWvr2lJbpbkwKzVDzYkuWzeN55XpFjd4xSPbVV1ctYKFcdFkQIAAAD+W6qqzVlbmLDbltbalhv48GNba5ckSWvtkqq61d5f0Fr7l6p6R5JLslY/+MPW2oXzvvG8IsWlVXVSa+1Dk5AdVXVqkj9LcuINHDwAAAAwIpOCxNSiRFX9Y5Jb72PTs2/I96+quyY5IcntJ3edW1UPaK3906zHzStSnJbk+j3vaK1dn+S0qnrpDRkYAAAA8N9La+3B07ZV1WVVdZvJKorbJLl8H1/2mCTv3X12RlX9Q5L7JZlZpJh5dk1rbXtr7dIp294967EAAADw391q7Z8fN9Ebk/z45PaPZ+2CG3v7bJIHVtUBVbUha00z557uoQUIAAAAcGOcmeQhVfXJJA+ZfJ6q2lRVL5t8zVlJPpXkI0nOT3J+a+1N877xvNM9AAAAAP5Da+3KJKfs4/6tSX56cntnkp+5sd/bSgoAAABgFKykAAAAgClWvbU/KIcbAAAAGAVFCgAAAGAUFCkAAACAUVh4T4pL6sBFR+zTrrazS26SrNRqt+xdu67rln1QbvrFdr9ZKwfdskvuw8+9oktuktztoH4/459f2dAt+8rql33AAX3a+Bx00BFdcpPkg799/27Z93rGX3XLTqf57Ior3p+VlT5/NzdsOLxLbpJs/0a/ufT6az7XLfvT5z+3S+7OAw/tktvb4f2epmTHwbfrln3wjm1dcu/26N/okpsknzy7z+9Wkmx85C91yz700Dt1y4b1pHEmAIxErwIFADCdxpnDcrgBAACAUVCkAAAAAEZBkQIAAAAYBT0pAAAAYIrVlY6dd5eQlRQAAADAKChSAAAAAKOgSAEAAACMgp4UAAAAMMWqt/YH5XADAAAAo6BIAQAAAIyCIgUAAAAwCnpSAAAAwBSr1XsEy8VKCgAAAGAUFCkAAACAUVCkAAAAAEZBkQIAAAAYhRvdOLOqjm6tXbmIwQAAAMCYrK7onDmkmSspqurMqjpmcntTVX06yfuq6qKqeuAgIwQAAACWwrzTPR7ZWvvC5PbvJHlca+2uSR6S5EXTHlRVm6tqa1Vt/fLVFl0AAAAA880rUmyoqt2nhNy8tfaBJGmtfSLJQdMe1Frb0lrb1FrbdIvDj16noQIAAAD7s3k9Kf4oyd9X1ZlJzqmq30/yhiSnJPnQogcHAAAAPa263MSgZhYpWmt/UFUfSfLUJMdPvv74JH+X5LcWPzwAAABgWdyQq3tcmmRLkve11nbsvrOqHp7knEUNDAAAAFgu867u8fQkZyc5PckFVfXoPTY/f5EDAwAAAJbLvJUUT05y39bajqramOSsqtrYWntJEheLBQAAYL+2uuKl75DmFSlWd5/i0VrbVlUnZ61QcVwUKQAAAIB1NK9P6aVVddLuTyYFi1OTHJPkxEUODAAAAFgu84oUp2WtceZ/aK1d31o7LckDFjYqAAAAYOnMuwTp9hnb3r3+wwEAAIDxWJ331j7ryuEGAAAARkGRAgAAABgFRQoAAABgFBQpAAAAgFGY2TgTAAAAltlKVe8hLBUrKQAAAIBRUKQAAAAARkGRAgAAABiFhfek+N6XvGLREft00AH9zhu64iutW/b5v/Xr3bLf9gPXdMt+9JUv7JJ79dmP7ZKbJDt2Xtst+8gDDuqWffXKId2y//w2fdr4fK3ff3Xu8YzXdsv++B/9ZLfs7Ly+W/QJz/jrLrl3Tb8ftJ0n/EK37G0Xvqhb9rOPvFuX3JdefXGX3CT56q5+v1tvfeTR3bIfd9cXd8v+m4t+pkvud72l35yy8ZG/1C1721t+p1v2Z37zf3fL3t+temt/UA43AIxErwIFAMBYKFIAAAAAo6BIAQAAAIxCnxOsAQAA4L+B1ZV+/Q6XkZUUAAAAwCgoUgAAAACjoEgBAAAAjIKeFAAAADDFqrf2B+VwAwAAAKOgSAEAAACMgiIFAAAAMAqKFAAAAMAoaJwJAAAAU6yuVO8hLBUrKQAAAIBRUKQAAAAARkGRAgAAABgFPSkAAABgCj0phmUlBQAAADAKM4sUVXVeVT2nqu4y1IAAAACA5TRvJcWRSY5I8o6qen9VPbOqbjvvm1bV5qraWlVbLzjnDesyUAAAAGD/Nq8nxVWttV9M8otV9d1JnpDkvKq6MMlrWmtb9vWgyf1bkuSMt5zX1nPAAAAAMJRVTRIGdYMPd2vtn1trP5vkdklekOQ7FjYqAAAAYOnMW0nxib3vaK3tTHLO5AMAAABgXcxcSdFae3xV3aOqTqmqQ/fcVlUPX+zQAAAAgGUycyVFVZ2e5GlJLkzy8qo6o7V29mTz82M1BQAAAPuxlZXqPYSlMu90j81J7tta21FVG5OcVVUbW2svSeJ/CgAAAFg384oUq621HUnSWttWVSdnrVBxXBQpAAAAgHU07+oel1bVSbs/mRQsTk1yTJITFzkwAAAAYLnMK1KcluTSPe9orV3fWjstyQMWNioAAABg6cw83aO1tn3Gtnev/3AAAABgPFY1zhzUvJUUAAAAAINQpAAAAABGQZECAAAAGIV5lyAFAACApbXqrf1BOdwAAADAKChSAAAAAKOgSAEAAACMgp4UAAAAMMXqSvUewlKp1tpCAx78oF9fbMAUu3Z+vUdskmT1gEO6ZX962+u6Zd/vwJt3y/7w6uFdcjfsuKhLbpLcbGW1W/bXd+3sln1V+v2ReNDBR3fJPXxlQ5fcJHnzN3Z0yz71oEO7Zd9l9WZdct/y1Uu75CbJZ679Srfsg25xQrfsu157Wbfs4zf0+dv1jh2f75KbJF/cdW237BNvflS37I+mz5ySJPdf7fJUPP+045IuuUmy4dA7dct+04n95tI7/a9f7Zadb/nB/fpV/Ju2fbzPL9KCfd/Gu4/y/83pHgAAAMAoKFIAAAAAo6AnBQAAAEyxoifFoKykAAAAAEZBkQIAAAAYBUUKAAAAYBQUKQAAAIBR0DgTAAAAplj11v6gHG4AAABgFBQpAAAAgFFQpAAAAABGQU8KAAAAmGJ1pXoPYalYSQEAAACMgiIFAAAAMAqKFAAAAMAo6EkBAAAAU+hJMSwrKQAAAIBRUKQAAAAARmFmkaKqNlXVO6rq1VV1h6o6t6q+XFUfqKp7DzVIAAAAYAm01qZ+JHl/kkckeUKSi5P80OT+U5L8y4zHbU6ydfKxeVbGvI+b+vj/brmylyt7Gfd5WbOXcZ9lL0+u7OXKXsZ9XtbsZdxn2X2yffjY86Naa1MLGFX1r621e09uf7a1dsd9bVukqtraWtu06Jyx5Mperuxl3OdlzV7GfZbt50z2/pm9jPu8rNnLuM+y+2TDnub1pPh6VT20qn44Sauq70+Sqnpgkp0LHx0AAACwNOZdgvQpSV6YZFeShyV5alW9Msnnkjx5sUMDAAAAlsnMlRSttfOTPCPJ7ybZ3lo7o7V2RGvtW5IcPsQAk2wZKGcsubKXK3sZ93lZs5dxn2UvT67s5cpexn1e1uxl3GfZ0Nm8nhRPT/KzST6W5KQkZ7TWzp5sO6+1dp9BRgkAAADs9+ad7vHkJJtaazuqamOSs6pqY2vtJUlq0YMDAAAAlse8xpmrrbUdSdJa25bk5CSPqKrfy4KLFFX18Kr6eFX9e1U9a5FZe+X+WVVdXlUXDJW5R/YdquodVXVhVf1bVZ0xYPbNqur9VXX+JPt5Q2VP8ler6l+r6s0D526rqo9U1YeqauvA2UdU1VlV9bHJ//l3DJR798n+7v64uqqeMVD2Myc/XxdU1Wuq6mZD5E6yz5jk/tui93df80hVHVVV51bVJyf/Hjlg9g9P9ntXVS2sa/eU7N+Z/Ix/uKr+tqqOGDD7Nye5H6qqt1bVbYfI3WPbL1ZVq6pj1jt3WnZV/XpVfW6P3+/vHSp7cv/pk7/d/1ZVLxwqu6pet8c+b6uqDw2YfVJVvXf335Gq+vaBcu9VVf8y+Rv2pqpayGm4056bLHpOm5G78PlsRvbC57MZ2UPMZzOfhy5yTpux3wud02bt86Lnsxn7vPD5bEb2wuczuEFmXZ80yduTnLTXfQck+fMkOxd1XdQkq0k+leTOSQ5Mcn6S/zHENVmTPCDJfZJcMOS1YCfZt0lyn8ntw5J8YsD9riSHTm5vSPK+JPcbcN9/PslfJXnzwMd8W5Jjhv6/nmS/KslPT24fmOSIDmNYTXJpkuMGyLpdks8kufnk879O8hMD7ee3JrkgycGTOewfk9xtgXn/ZR7JWhPiZ01uPyvJCwbMPiHJ3ZO8M2ur44bc74cmOWBy+wUD7/fhe9x+epI/GSJ3cv8dkvzfJBctao6Zss+/nuQXF/V/PCf7eya/WwdNPr/VUNl7bX9RkucOuN9vTfKIye3vTfLOgXI/kOSBk9s/leQ3F7TP+3xusug5bUbuwuezGdkLn89mZA8xn019HrroOW3Gfi90TpuRu/D5bNbx3uNrFjKfzdjvhc9nPnzckI95KylOy9oLmP/QWru+tXZa1v5gLsq3J/n31tqnW2vXJnltkkcvMO8/tNb+KckXh8jaR/YlrbXzJre/kuTCrL2wGyK7tcmqmawVKTYkmd6wZB1V1e2TPDLJy4bIG4PJO14PSPLyJGmtXdta+1KHoZyS5FOttYsGyjsgyc2r6oCsFQw+P1DuCUne21r7amvt+iTvSvKYRYVNmUcenbXCVCb/fv9Q2a21C1trH19E3g3IfuvkmCfJe5PcfsDsq/f49JAsYE6b8TfjxUl+eRGZNyB74aZkPzXJma21b0y+5vIBs5MkVVVJHpvkNQNmt/z/zcRvkQXMa1Ny757knya3z03yg+udO8me9txkoXPatNwh5rMZ2Qufz2ZkDzGfzXoeutA5rddz4Bm5C5/P5u3zIuezGdkLn8/ghph3dY/trbVLp2x792KGlGTtl+TiPT7fnoFerI9FrfUAuXfWVjQMlbk6WVJ2eZJzW2tDZf9+1v7w7Roob08tyVur6oNVtXnA3DsnuSLJK2rtNJeXVdUhA+bv9vgs6Mn83lprn8valYI+m+SSJF9urb11iOysraJ4QFUdXVUHZ+3dgTsMlL3bsa21S5K1JwdJbjVw/hj8VJJ/GDKwqn67qi5O8iNJnjtQ5qOSfK6tXSGrh6dNloX/2XovwZ/j+CTfXVXvq6p3VdW3DZi923cnuay19skBM5+R5HcmP2e/m+RXB8q9IMmjJrd/OAPMaXs9NxlsTuvxnOgGZC98Pts7e8j5bM/soee0fRzzQea0vXIHnc+m/JwNMp/tld1rPoP/ZN5Kil721e9ikHf1x6CqDk3y+iTP2KtyvlCttZ2ttZOy9s7At1fVty46s6pOTXJ5a+2Di86a4rva2lVqHpHk56pqkSuE9nRA1pbu/nFr7d5JrsnactnBVNWBWXuC+zcD5R2ZtXfe7pTktkkOqaofHSK7tXZh1pbmnpvknKydQnb9zAexrqrq2Vk75n85ZG5r7dmttTtMcp+26LxJEezZGaggsg9/nOQuWbsi1yVZWyo8lAOSHJnkfkl+KclfT94JHNITMlDhdQ9PTfLMyc/ZMzNZITeAn8ra360PZm259rWLDOv13KRX7qzsIeazfWUPNZ/tmZ21/RxsTtvHfg8yp+0jd7D5bMbP+MLns31k95rP4D8Za5Fie/7zOwK3z5IsN6qqDVmbLP6ytfaGHmOYnHbwziQPHyDuu5I8qqq2Ze20ngdV1asHyE2StNY+P/n38iR/m7VTjYawPcn2PVarnJW1osWQHpHkvNbaZQPlPTjJZ1prV7TWrkvyhiTfOVB2Wmsvb63dp7X2gKwtmx7yndYkuayqbpMkk38XshR+jKrqx5OcmuRHWmu9Cs5/lQUth9/LXbJWiDt/Mq/dPsl5VXXrAbLTWrtsUnDeleRPM9yclqzNa2+YnD74/qytjltI09B9mZxG9gNJXjdU5sSPZ20+S9aKvoMc89bax1prD22t3TdrL2Q+taisKc9NFj6n9XxONC17iPnsBuz3wuazfWQPNqfta7+HmNOmHO9B5rMZP2cLn8+mZHeZz2BvYy1SfCDJ3arqTpN3ex+f5I2dx7Rwkwrty5Nc2Fr7vYGzb1mTLtVVdfOsvaD82KJzW2u/2lq7fWttY9b+n9/eWhvk3fWqOqSqDtt9O2sNsQa5qsvkNKqLq+ruk7tOSfLRIbL3MPQ7jp9Ncr+qOnjys35K1s6BHERV3Wry7x2z9od/6Hdb35i1P/6Z/Hv2wPldVNXDk/xKkke11r46cPbd9vj0URlmTvtIa+1WrbWNk3lte9aak+3z1Mn1tvtF48RjMtCcNvF3SR40GcfxWWsI/IUB8x+c5GOtte0DZiZrb6I8cHL7QRmoALrHnLaS5DlJ/mRBOdOemyx0Tuv8nGif2UPMZzOyFz6f7St7qDltxn4vdE6b8XO28Plszs/4QuezGdld5jP4L9oIunfu6yNr54x/ImvvDDx7wNzXZG052XVZm4ifNGD2/bN2WsuHk3xo8vG9A2XfM8m/TrIvyII6o88Zw8kZ8OoeWesLcf7k49+G/Dmb5J+UZOvkmP9dkiMHzD44yZVJbjHwPj8va0+sLkjyF5l0zR4o+5+zVgg6P8kpC876L/NIkqOTvC1rf/DfluSoAbMfM7n9jSSXJfm/A2b/e9Z6DO2e09a9I/2M7NdPftY+nORNWWs+t/DcvbZvy+Ku7rGvff6LJB+Z7PMbk9xmwOwDk7x6cszPS/KgobIn978yyVMWkTlnv++f5IOTueV9Se47UO4ZWXue9IkkZyapBe3zPp+bLHpOm5G78PlsRvbC57MZ2UPMZ3Ofhy5qTpux3wud02bkLnw+m3W8Fz2fzdjvhc9nPnzckI9qbWlaPQAAAAAjNtbTPQAAAIDb6gWbAAAAOklEQVQlo0gBAAAAjIIiBQAAADAKihQAAADAKChSAAAAAKOgSAEAAACMgiIFAAAAMAqKFAAAAMAo/H9IHhsrwalSCAAAAABJRU5ErkJggg==\n" + "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:25.208277\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -959,9 +980,6 @@ } ], "source": [ - "if max1 < 10:\n", - " max1 = 10\n", - "\n", "idf1 = impute_df(split_df1, max_iter= int(max1), verbose=2)\n", "size = idf1.shape[1]\n", "corr = idf1.corr()\n", @@ -971,7 +989,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -979,47 +997,51 @@ "name": "stdout", "text": [ "[IterativeImputer] Completing matrix with shape (195, 5)\n", - "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.08\n", - "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.10\n", - "[IterativeImputer] Ending imputation round 3/10, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 4/10, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 5/10, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 6/10, elapsed time 0.16\n", - "[IterativeImputer] Ending imputation round 7/10, elapsed time 0.17\n", - "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.18\n", - "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.21\n", - "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.22\n", + "[IterativeImputer] Ending imputation round 1/8, elapsed time 0.01\n", + "[IterativeImputer] Change: 457858.9943114285, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 2/8, elapsed time 0.02\n", + "[IterativeImputer] Change: 123566.60351871609, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 3/8, elapsed time 0.03\n", + "[IterativeImputer] Change: 66082.79555085694, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 4/8, elapsed time 0.05\n", + "[IterativeImputer] Change: 31356.06832177031, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 5/8, elapsed time 0.06\n", + "[IterativeImputer] Change: 13275.540026738847, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 6/8, elapsed time 0.07\n", + "[IterativeImputer] Change: 5607.778352352506, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 7/8, elapsed time 0.08\n", + "[IterativeImputer] Change: 4637.325683098177, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 8/8, elapsed time 0.09\n", + "[IterativeImputer] Change: 4069.896702134858, scaled tolerance: 131.03159 \n", "[IterativeImputer] Completing matrix with shape (195, 5)\n", - "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 3/10, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 4/10, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 5/10, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 6/10, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 7/10, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.02\n", - "/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " \" reached.\", ConvergenceWarning)\n" + "[IterativeImputer] Ending imputation round 1/8, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 2/8, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 3/8, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 4/8, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 5/8, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 6/8, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 7/8, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 8/8, elapsed time 0.01\n", + "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 13 + "execution_count": 18 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" + "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:25.919090\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -1027,9 +1049,6 @@ } ], "source": [ - "if max2 < 10:\n", - " max2 = 10\n", - "\n", "idf2 = impute_df(split_df2, max_iter= int(max2), verbose=2)\n", "size = idf2.shape[1]\n", "corr = idf2.corr()\n", @@ -1039,7 +1058,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 19, "metadata": {}, "outputs": [ { @@ -1047,35 +1066,55 @@ "name": "stdout", "text": [ "[IterativeImputer] Completing matrix with shape (195, 11)\n", - "[IterativeImputer] Ending imputation round 1/20, elapsed time 0.09\n", - "[IterativeImputer] Ending imputation round 2/20, elapsed time 0.18\n", - "[IterativeImputer] Ending imputation round 3/20, elapsed time 0.20\n", - "[IterativeImputer] Ending imputation round 4/20, elapsed time 0.22\n", - "[IterativeImputer] Ending imputation round 5/20, elapsed time 0.24\n", - "[IterativeImputer] Ending imputation round 6/20, elapsed time 0.26\n", - "[IterativeImputer] Ending imputation round 7/20, elapsed time 0.29\n", - "[IterativeImputer] Ending imputation round 8/20, elapsed time 0.37\n", - "[IterativeImputer] Ending imputation round 9/20, elapsed time 0.39\n", - "[IterativeImputer] Ending imputation round 10/20, elapsed time 0.41\n", - "[IterativeImputer] Ending imputation round 11/20, elapsed time 0.43\n", - "[IterativeImputer] Ending imputation round 12/20, elapsed time 0.45\n", - "[IterativeImputer] Ending imputation round 13/20, elapsed time 0.47\n", - "[IterativeImputer] Ending imputation round 14/20, elapsed time 0.49\n", - "[IterativeImputer] Ending imputation round 15/20, elapsed time 0.54\n", - "[IterativeImputer] Ending imputation round 16/20, elapsed time 0.59\n", - "[IterativeImputer] Ending imputation round 17/20, elapsed time 0.62\n", - "[IterativeImputer] Ending imputation round 18/20, elapsed time 0.64\n", - "[IterativeImputer] Ending imputation round 19/20, elapsed time 0.66\n", - "[IterativeImputer] Ending imputation round 20/20, elapsed time 0.68\n", + "[IterativeImputer] Ending imputation round 1/20, elapsed time 0.02\n", + "[IterativeImputer] Change: 1.3057475071151101, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 2/20, elapsed time 0.04\n", + "[IterativeImputer] Change: 0.2431361335536576, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 3/20, elapsed time 0.06\n", + "[IterativeImputer] Change: 0.050910966788451084, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 4/20, elapsed time 0.08\n", + "[IterativeImputer] Change: 0.024727577272021295, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 5/20, elapsed time 0.10\n", + "[IterativeImputer] Change: 0.01669938231479373, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 6/20, elapsed time 0.12\n", + "[IterativeImputer] Change: 0.014436526598200827, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 7/20, elapsed time 0.14\n", + "[IterativeImputer] Change: 0.013328878116682485, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 8/20, elapsed time 0.16\n", + "[IterativeImputer] Change: 0.012628774076243077, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 9/20, elapsed time 0.17\n", + "[IterativeImputer] Change: 0.012053990511959045, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 10/20, elapsed time 0.19\n", + "[IterativeImputer] Change: 0.011528761105839724, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 11/20, elapsed time 0.21\n", + "[IterativeImputer] Change: 0.011028826842162887, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 12/20, elapsed time 0.23\n", + "[IterativeImputer] Change: 0.010551696384536902, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 13/20, elapsed time 0.25\n", + "[IterativeImputer] Change: 0.010132923628465185, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 14/20, elapsed time 0.26\n", + "[IterativeImputer] Change: 0.009943115736268826, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 15/20, elapsed time 0.28\n", + "[IterativeImputer] Change: 0.009758788813657215, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 16/20, elapsed time 0.30\n", + "[IterativeImputer] Change: 0.00957549042259187, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 17/20, elapsed time 0.31\n", + "[IterativeImputer] Change: 0.009397405580395923, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 18/20, elapsed time 0.34\n", + "[IterativeImputer] Change: 0.009222955401797739, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 19/20, elapsed time 0.36\n", + "[IterativeImputer] Change: 0.009052194265333768, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 20/20, elapsed time 0.38\n", + "[IterativeImputer] Change: 0.008884981830621363, scaled tolerance: 0.001036 \n", "[IterativeImputer] Completing matrix with shape (195, 11)\n", "[IterativeImputer] Ending imputation round 1/20, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 2/20, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 3/20, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 4/20, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 2/20, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 3/20, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 4/20, elapsed time 0.00\n", "[IterativeImputer] Ending imputation round 5/20, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 6/20, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 7/20, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 8/20, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 8/20, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 9/20, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 10/20, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 11/20, elapsed time 0.02\n", @@ -1085,29 +1124,29 @@ "[IterativeImputer] Ending imputation round 15/20, elapsed time 0.03\n", "[IterativeImputer] Ending imputation round 16/20, elapsed time 0.03\n", "[IterativeImputer] Ending imputation round 17/20, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 18/20, elapsed time 0.03\n", + "[IterativeImputer] Ending imputation round 18/20, elapsed time 0.04\n", "[IterativeImputer] Ending imputation round 19/20, elapsed time 0.04\n", "[IterativeImputer] Ending imputation round 20/20, elapsed time 0.04\n", - "/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " \" reached.\", ConvergenceWarning)\n" + "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 14 + "execution_count": 19 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" + "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:26.720961\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -1115,9 +1154,6 @@ } ], "source": [ - "if max3 < 10:\n", - " max3 = 10\n", - "\n", "idf3 = impute_df(split_df3, max_iter= int(max3), verbose=2)\n", "size = idf3.shape[1]\n", "corr = idf3.corr()\n", @@ -1127,7 +1163,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 20, "metadata": {}, "outputs": [ { @@ -1135,16 +1171,26 @@ "name": "stdout", "text": [ "[IterativeImputer] Completing matrix with shape (195, 9)\n", - "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.06\n", - "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.08\n", - "[IterativeImputer] Ending imputation round 3/10, elapsed time 0.09\n", - "[IterativeImputer] Ending imputation round 4/10, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 5/10, elapsed time 0.12\n", - "[IterativeImputer] Ending imputation round 6/10, elapsed time 0.13\n", - "[IterativeImputer] Ending imputation round 7/10, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.16\n", - "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.18\n", + "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.02\n", + "[IterativeImputer] Change: 24.70630011052294, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.04\n", + "[IterativeImputer] Change: 3.081030825722686, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 3/10, elapsed time 0.05\n", + "[IterativeImputer] Change: 1.7974084139622217, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 4/10, elapsed time 0.07\n", + "[IterativeImputer] Change: 1.2013063098503434, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 5/10, elapsed time 0.08\n", + "[IterativeImputer] Change: 0.8709381898134296, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 6/10, elapsed time 0.09\n", + "[IterativeImputer] Change: 0.6848017337664789, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 7/10, elapsed time 0.10\n", + "[IterativeImputer] Change: 0.5789561706855952, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.11\n", + "[IterativeImputer] Change: 0.5398675991534301, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.12\n", + "[IterativeImputer] Change: 0.5228675076225393, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.13\n", + "[IterativeImputer] Change: 0.5054232450128762, scaled tolerance: 0.087747 \n", "[IterativeImputer] Completing matrix with shape (195, 9)\n", "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.00\n", "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.00\n", @@ -1156,26 +1202,26 @@ "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.01\n", - "/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " \" reached.\", ConvergenceWarning)\n" + "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 15 + "execution_count": 20 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" + "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:27.310512\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAzUAAANSCAYAAABcHeAOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/Z1A+gAAAACXBIWXMAAAsTAAALEwEAmpwYAAAwh0lEQVR4nO3df7Std10f+Pfn3PBDfkiCVIwJA7GmVVpmBU2DLR1/hIBxVEJbxMBYgwO9rlniz1VrGGakorRhpi22S+twDMEglCCxYKhRBIJaR8FcFAwEkRCpSQwESAKBMIR7zmf+uDusw+2950fuc559viev11p7Ze9nP3vvj4+I553P5/t9qrsDAAAwqpVlFwAAAHAihBoAAGBoQg0AADA0oQYAABiaUAMAAAxNqAEAAIYm1AAAADtSVZdV1W1V9b7jvF9V9R+q6oaq+rOq+oYN711UVR9aPC6aoh6hBgAA2KlfSXL+Ju9/R5IzF4+DSX4pSarqkUlenORJSc5J8uKqOuVEixFqAACAHenu309y+yanXJDk1X3EO5OcXFWnJvn2JG/t7tu7+44kb83m4WhbTjrRL9jS+3+9d/039om1T2/2nwvuddc1b1h2CUNYeejDll3CMB5+7oXLLmEId731tcsuYRj3fOwTyy5hCA/+m2csu4Qh3HPTTcsuYRiP/Lnfq2XXsC17/O/j+rvP/MEc6a7ca7W7V3f4Nacl2fgf3psXx453/ITsfqgBAACGsQgwOw0xS2X8DAAAmNotSR6z4fXpi2PHO35ChBoAAGBqVyX5/sUuaN+U5FPdfWuStyR5WlWdstgg4GmLYyfE+BkAAMyo19aWXcKmtrMwqapel+Rbkzyqqm7OkR3NHpAk3f3/JLk6yf+c5IYkdyf5gcV7t1fVzya5dvFVL+nuE15YLtQAAAA70t3P3uL9TvJDx3nvsiSXTVmP8TMAAGBoOjUAADCntcPLrmDf0akBAACGJtQAAABDM34GAAAz6vW9PX62nd3P9hqdGgAAYGhCDQAAMDTjZwAAMKc9fvPNEenUAAAAQxNqAACAoRk/AwCAGbWbb05OpwYAABiaUAMAAAxNqAEAAIZmTQ0AAMzJmprJ6dQAAABDE2oAAIChGT8DAIAZ9brxs6np1AAAAEMTagAAgKEZPwMAgDmtrS27gn1HpwYAABiaUAMAAAzN+BkAAMyo3Xxzcjo1AADA0IQaAABgaMbPAABgTsbPJqdTAwAADE2oAQAAhmb8DAAAZtTrbr45NZ0aAABgaEINAAAwNKEGAAAYmjU1AAAwo7al8+R0agAAgKEJNQAAwNCMnwEAwJyMn01OpwYAABjalp2aqvq6JBckOW1x6JYkV3X3B3azMAAAgO3YNNRU1U8leXaSK5L88eLw6UleV1VXdPclu1wfAADsK72+tuwS9p2txs+el+Tvdfcl3f2axeOSJOcs3jumqjpYVYeq6tDqG946Zb0AAABfYqvxs/UkX53kvx11/NTFe8fU3atJVpMk7//1PoH6AAAANrVVqPmxJG+vqg8luWlx7H9I8rVJXrCLdQEAwP5k97PJbRpquvu3q+pv5ci42caNAq7tbsOAAADA0m25+1l3ryd55wy1AAAA7JibbwIAwIza+Nnk3HwTAAAYmlADAAAMTagBAACGZk0NAADMyZqayenUAAAAQxNqAACAoRk/AwCAGfW6e9hPTacGAAAYmlADAAAMzfgZAADMye5nk9OpAQAAhibUAAAAQzN+BgAAM+o1u59NTacGAAAYmlADAAAMzfgZAADMqO1+NjmdGgAAYGhCDQAAMDTjZwAAMKd142dT06kBAACGJtQAAABDE2oAAIChWVMDAAAz6rW1ZZew7+jUAAAAQxNqAACAoRk/AwCAORk/m5xODQAAMDShBgAAGJrxMwAAmFGvHV52CfuOTg0AADA0oQYAABia8TMAAJiT3c8mp1MDAAAMTagBAACGZvwMAABm1MbPJqdTAwAADE2oAQAAhibUAAAAQ7OmBgAAZtTr1tRMbddDzdqnb9/tn9g3Dnz5I5ddwhDWPnP3sksYwsqDv2zZJbDPHL7jzmWXMIzDn71n2SUMYf0zdy27hCGsPOwhyy4B9jzjZwAAwNCMnwEAwJxs6Tw5nRoAAGBoQg0AADA042cAADCjNn42OZ0aAABgaEINAAAwNONnAAAwo15bX3YJJ6yqzk/y75McSHJpd19y1PsvT/Jti5cPSfKV3X3y4r21JNct3vur7n76idYj1AAAANtWVQeS/GKSpya5Ocm1VXVVd19/7znd/eMbzv/hJE/c8BWf6+6zpqzJ+BkAALAT5yS5obtv7O57klyR5IJNzn92ktftZkE6NQAAMKc9Pn5WVQeTHNxwaLW7Vze8Pi3JTRte35zkScf5rscmOSPJNRsOP7iqDiU5nOSS7n7TidYs1AAAAF+0CDCrW564PRcmubK7N+5j/djuvqWqvibJNVV1XXd/+ER+xPgZAACwE7ckecyG16cvjh3LhTlq9Ky7b1n888Ykv5svXW9znwg1AADATlyb5MyqOqOqHpgjweWqo0+qqq9LckqSP9pw7JSqetDi+aOSPDnJ9Ud/dqeMnwEAwIx6bW3rk/aw7j5cVS9I8pYc2dL5su5+f1W9JMmh7r434FyY5Iru7g0f//okr6iq9RxpsFyycde0+0qoAQAAdqS7r05y9VHHfvqo1//yGJ/7wyRPmLoe42cAAMDQdGoAAGBGvdZbn8SO6NQAAABDE2oAAIChGT8DAIAZ9dr6skvYd3RqAACAoQk1AADA0IyfAQDAjIyfTU+nBgAAGJpQAwAADM34GQAAzKjX3Xxzajo1AADA0IQaAABgaMbPAABgRr1m/GxqOjUAAMDQhBoAAGBoQg0AADA0a2oAAGBGvbbsCvYfnRoAAGBoQg0AADA042cAADAjWzpPT6cGAAAYmlADAAAMzfgZAADMaH192RXsPzo1AADA0IQaAABgaMbPAABgRm6+Ob373Kmpqh+YshAAAID74kTGz37meG9U1cGqOlRVh375Tb9/Aj8BAACwuU3Hz6rqz473VpJHH+9z3b2aZDVJ1v7ol91dCAAAFoyfTW+rNTWPTvLtSe446ngl+cNdqQgAAGAHtgo1/yXJw7r7PUe/UVW/uxsFAQAA7MSmoaa7n7fJe8+ZvhwAAICdsaUzAADMaH192RXsP26+CQAADE2oAQAAhmb8DAAAZmRL5+np1AAAAEMTagAAgKEZPwMAgBmtr9eyS9h3dGoAAIChCTUAAMDQjJ8BAMCM3Hxzejo1AADA0IQaAABgaMbPAABgRm6+OT2dGgAAYGhCDQAAMDTjZwAAMCM335yeTg0AADA0oQYAABiaUAMAAAzNmhoAAJjRui2dJ6dTAwAADE2oAQAAhmb8DAAAZmRL5+np1AAAAEMTagAAgKEZPwMAgBm18bPJ6dQAAABDE2oAAIChGT8DAIAZra8vu4L9R6cGAAAYmlADAAAMzfgZAADMyM03p6dTAwAADE2oAQAAhibUAAAAQ7OmBgAAZmRNzfR0agAAgKEJNQAAwNCMnwEAwIzWjJ9NTqcGAAAYmlADAAAMbdfHz+665g27/RP7xtpn7l52CUP4iu/78WWXMIQ733zpsksYxp1vesWySxjCKc/6oWWXwD5z1zVXLLuEIax94q5ll8DE7H42PZ0aAABgaEINAAAwNLufAQDAjNbb+NnUdGoAAIChCTUAAMDQjJ8BAMCM1teXXcH+o1MDAAAMTagBAACGJtQAAABDs6YGAABmtGZL58np1AAAAEMTagAAgKEZPwMAgBmtrxs/m5pODQAAMDShBgAAGJrxMwAAmJHdz6anUwMAAAxNqAEAAIYm1AAAwIzWu/b0Yzuq6vyq+mBV3VBVFx/j/edW1cer6j2Lx/M3vHdRVX1o8bhoimtqTQ0AALBtVXUgyS8meWqSm5NcW1VXdff1R536+u5+wVGffWSSFyc5O0kneffis3ecSE06NQAAwE6ck+SG7r6xu+9JckWSC7b52W9P8tbuvn0RZN6a5PwTLUinBgAAZrTXdz+rqoNJDm44tNrdqxten5bkpg2vb07ypGN81T+pqm9O8hdJfry7bzrOZ0870ZqFGgAA4IsWAWZ1yxM39+Ykr+vuz1fVDya5PMm5J1zccRg/AwAAduKWJI/Z8Pr0xbEv6u5PdvfnFy8vTfKN2/3sfaFTAwAAM1rrZVdwwq5NcmZVnZEjgeTCJM/ZeEJVndrdty5ePj3JBxbP35LkX1XVKYvXT0vywhMtSKgBAAC2rbsPV9ULciSgHEhyWXe/v6pekuRQd1+V5Eeq6ulJDie5PclzF5+9vap+NkeCUZK8pLtvP9GahBoAAGBHuvvqJFcfdeynNzx/YY7Tgenuy5JcNmU91tQAAABD06kBAIAZre/xLZ1HpFMDAAAMTagBAACGZvwMAABmtGb8bHI6NQAAwNCEGgAAYGjGzwAAYEZrvewK9h+dGgAAYGhCDQAAMDTjZwAAMKO12P1sajo1AADA0IQaAABgaMbPAABgRnY/m55ODQAAMLQtQ01VfV1VPaWqHnbU8fN3rywAAIDt2TTUVNWPJPmNJD+c5H1VdcGGt//VbhYGAACwHVt1av5Zkm/s7mck+dYk/2dV/ejivePuRVdVB6vqUFUd+pVrb56kUAAA2A/W9vhjRFttFLDS3Z9Jku7+SFV9a5Irq+qx2STUdPdqktUkufOlT7MUCgAA2DVbdWo+VlVn3ftiEXC+K8mjkjxhF+sCAADYlq06Nd+f5PDGA919OMn3V9Urdq0qAADYp0Yd8drLNg013X3cBTHd/f9OXw4AAMDOuE8NAAAwtK3GzwAAgAmtHX+/Le4jnRoAAGBoQg0AADA042cAADCjtXYbx6np1AAAAEMTagAAgKEZPwMAgBm5+eb0dGoAAIChCTUAAMDQjJ8BAMCMjJ9NT6cGAAAYmlADAAAMTagBAACGZk0NAADMyJqa6enUAAAAQxNqAACAoRk/AwCAGa2ll13CvqNTAwAADE2oAQAAhmb8DAAAZmT3s+np1AAAAEMTagAAgKEZPwMAgBmttd3PpqZTAwAADE2oAQAAhmb8DAAAZmT3s+np1AAAAEMTagAAgKEJNQAAwNCsqQEAgBmtxZbOU9OpAQAAhibUAAAAQzN+BgAAMzJ+Nj2dGgAAYGhCDQAAMDTjZwAAMKO1ZRewD+nUAAAAQxNqAACAoRk/AwCAGa213c+mtuuhZuWhD9vtn9g3Vh78ZcsuYQh3vvnSZZcwhJO/+/nLLmEYd13z+mWXMIRP+b89JvYI/z21PWuHl10B7HnGzwAAgKEZPwMAgBm5+eb0dGoAAIChCTUAAMDQhBoAAGBo1tQAAMCMrKmZnk4NAAAwNKEGAAAYmvEzAACY0XobP5uaTg0AADA0oQYAABia8TMAAJiR3c+mp1MDAAAMTagBAACGZvwMAABmZPxsejo1AADA0IQaAABgaMbPAABgRmtuvjk5nRoAAGBoQg0AADA042cAADAju59NT6cGAAAYmlADAAAMTagBAACGZk0NAADMaN2WzpPTqQEAAIYm1AAAAEMzfgYAADOypfP0dGoAAIChCTUAAMDQhBoAAJjRWnpPP7ajqs6vqg9W1Q1VdfEx3v+Jqrq+qv6sqt5eVY/d8N5aVb1n8bhqimtqTQ0AALBtVXUgyS8meWqSm5NcW1VXdff1G0770yRnd/fdVfW/Jfm/knzv4r3PdfdZU9akUwMAAOzEOUlu6O4bu/ueJFckuWDjCd39ju6+e/HynUlO382CdGoAAGBGe/3mm1V1MMnBDYdWu3t1w+vTkty04fXNSZ60yVc+L8lvbXj94Ko6lORwkku6+00nVrFQAwAAbLAIMKtbnrgNVfV9Sc5O8i0bDj+2u2+pqq9Jck1VXdfdHz6R3zF+BgAA7MQtSR6z4fXpi2NfoqrOS/KiJE/v7s/fe7y7b1n888Ykv5vkiSdakE4NAADMaB/cfPPaJGdW1Rk5EmYuTPKcjSdU1ROTvCLJ+d1924bjpyS5u7s/X1WPSvLkHNlE4IQINQAAwLZ19+GqekGStyQ5kOSy7n5/Vb0kyaHuvirJ/53kYUneUFVJ8lfd/fQkX5/kFVW1niNTY5cctWvafSLUAAAAO9LdVye5+qhjP73h+XnH+dwfJnnC1PVYUwMAAAxNpwYAAGa0tse3dB6RTg0AADA0oQYAABjaluNnVXVOku7ua6vq8UnOT/Lni8VBAADADqyPv6XznrNpp6aqXpzkPyT5par610l+IclDk1xcVS/a5HMHq+pQVR161R/+5aQFAwAAbLRVp+aZSc5K8qAkH01yend/uqr+TZJ3JXnpsT7U3atJVpPk0z//j0VRAABg12wVag5391qSu6vqw9396STp7s8tbpgDAADsgN3PprfVRgH3VNVDFs+/8d6DVfWIJEINAACwdFt1ar65uz+fJN29McQ8IMlFu1YVAADANm0aau4NNMc4/okkn9iVigAAYB9bN342OfepAQAAhibUAAAAQ9vy5psAAMB01tx8c3I6NQAAwNCEGgAAYGjGzwAAYEbr7XaPU9OpAQAAhibUAAAAQxNqAACAoVlTAwAAM1q3pfPkdGoAAIChCTUAAMDQjJ8BAMCM1tr42dR0agAAgKEJNQAAwNCMnwEAwIzsfjY9nRoAAGBoQg0AADA042cAADCjdbufTU6nBgAAGJpQAwAADM34GQAAzGh92QXsQzo1AADA0IQaAABgaEINAAAwNGtqAABgRrZ0np5ODQAAMDShBgAAGJrxMwAAmNF6jJ9NTacGAAAYmlADAAAMzfgZAADMyO5n09OpAQAAhibUAAAAQzN+BgAAM7L72fR0agAAgKEJNQAAwNCMnwEAwIyMn01PpwYAABiaUAMAAAxt18fPHn7uhbv9E9zP3PmmVyy7hCHcdc3rl13CMB5+7vcuu4Qh3PnmS5ddwjC+8PE7l13CED7zB29cdglDWLvjk8suYRgnP/E5yy6BJbGmBgAAZrRuSc3kjJ8BAABDE2oAAIChGT8DAIAZ2dJ5ejo1AADA0IQaAABgaMbPAABgRsbPpqdTAwAADE2oAQAAhmb8DAAAZtSmzyanUwMAAAxNqAEAAIZm/AwAAGZk97Pp6dQAAABDE2oAAIChGT8DAIAZGT6bnk4NAAAwNKEGAAAYmlADAAAMzZoaAACYkS2dp6dTAwAADE2oAQAAhmb8DAAAZmT4bHo6NQAAwNCEGgAAYGjGzwAAYEbGz6anUwMAAAxNqAEAAIZm/AwAAGbk5pvT06kBAACGJtQAAABDM34GAAAzMnw2PZ0aAABgaEINAAAwNKEGAAAYmjU1AAAwI2tqpqdTAwAADE2oAQAAhmb8DAAAZmT8bHo6NQAAwNCEGgAAYGhCDQAAzKj3+GM7qur8qvpgVd1QVRcf4/0HVdXrF++/q6oet+G9Fy6Of7Cqvn2bP7mpHYeaqnr1FD8MAACMp6oOJPnFJN+R5PFJnl1Vjz/qtOcluaO7vzbJy5O8bPHZxye5MMnfSXJ+kv+4+L4TsulGAVV11dGHknxbVZ2cJN399BMtAAAAGMo5SW7o7huTpKquSHJBkus3nHNBkn+5eH5lkl+oqlocv6K7P5/kL6vqhsX3/dGJFLRVp+b0JJ9O8u+S/NvF464Nz4+pqg5W1aGqOrR65dtOpD4AAGBGG/+WXzwOHnXKaUlu2vD65sWxY57T3YeTfCrJV2zzszu21ZbOZyf50SQvSvKT3f2eqvpcd//eZh/q7tUkq0nSf/Zrdq0DAIBBbPxbfhSbhpruXk/y8qp6w+KfH9vqMwAAwL52S5LHbHh9+uLYsc65uapOSvKIJJ/c5md3bFsbBXT3zd39PUl+K8lrTvRHAQDg/qv2+GNL1yY5s6rOqKoH5sjC/6PX4l+V5KLF82cmuaa7e3H8wsXuaGckOTPJH2/nRzezo65Ld/9mkt880R8FAADG1N2Hq+oFSd6S5ECSy7r7/VX1kiSHuvuqJK9M8quLjQBuz5Hgk8V5v5YjmwocTvJD3b12ojUZJQMAAHaku69OcvVRx356w/P/L8n3HOezL03y0inrEWoAAGBW2xrxYgd2fPNNAACAvUSoAQAAhibUAAAAQ7OmBgAAZmVNzdR0agAAgKEJNQAAwNCMnwEAwJxMn01OpwYAABiaUAMAAAzN+BkAAMxKX2FqrigAADA0oQYAABia8TMAAJhR2f5scjo1AADA0IQaAABgaMbPAABgTmX8bGo6NQAAwNCEGgAAYGhCDQAAMDRragAAYEa2dJ6eTg0AADA0oQYAABia8TMAAJiVvsLUXFEAAGBoQg0AADA042cAADCjKrufTU2nBgAAGJpQAwAADM34GQAAzKn0FabmigIAAEMTagAAgKEZPwMAgBmVvsLkXFEAAGBoQg0AADA0oQYAABiaNTUAADCjqlp2CfuOTg0AADA0oQYAABjaro+f3fXW1+72T+wbh++4c9klDOGUZ/3QsksYwqfefOmySxjGna7Vtpz83c9fdgnDWLvjtmWXMITPvuu3l13CEL5w+13LLoGplb7C1FxRAABgaEINAAAwNLufAQDAjMr42eRcUQAAYGhCDQAAMDTjZwAAMKPSV5icKwoAAAxNqAEAAIZm/AwAAGZk97PpuaIAAMDQhBoAAGBoxs8AAGBGVQeWXcK+o1MDAAAMTagBAACGJtQAAABDs6YGAABmZEvn6bmiAADA0IQaAABgaMbPAABgRsbPpueKAgAAQxNqAACAoRk/AwCAGVUdWHYJ+45ODQAAMDShBgAAGJrxMwAAmJHdz6bnigIAAEMTagAAgKEZPwMAgBnZ/Wx6OjUAAMDQhBoAAGBoQg0AADA0a2oAAGBG1tRMT6cGAAAYmlADAAAMzfgZAADMaKX0FabmigIAAEMTagAAgKEZPwMAgBnZ/Wx6OjUAAMDQhBoAAGBoxs8AAGBGxs+mp1MDAAAMTagBAACGtqPxs6r6h0nOSfK+7v6d3SkJAAD2L+Nn09u0U1NVf7zh+T9L8gtJHp7kxVV18S7XBgAAsKWtxs8esOH5wSRP7e6fSfK0JP/L8T5UVQer6lBVHXrVOz9y4lUCAAAcx1bjZytVdUqOhJ/q7o8nSXd/tqoOH+9D3b2aZDVJPv1vL+ipigUAgNHVivGzqW0Vah6R5N1JKklX1andfWtVPWxxDAAAYKk2DTXd/bjjvLWe5B9NXg0AAMAO3aebb3b33Un+cuJaAAAAduw+hRoAAOC+WbGl8+TcfBMAABiaUAMAAAzN+BkAAMyo9vn4WVU9MsnrkzwuyUeSPKu77zjqnLOS/FKSL0+yluSl3f36xXu/kuRbknxqcfpzu/s9m/2mTg0AADCli5O8vbvPTPL2xeuj3Z3k+7v77yQ5P8nPV9XJG97/ye4+a/F4z1Y/KNQAAABTuiDJ5Yvnlyd5xtEndPdfdPeHFs//OsltSf7Gff1B42cAADCjvT5+VlUHkxzccGi1u1d38BWP7u5bF88/muTRW/zeOUkemOTDGw6/tKp+OotOT3d/frPvEGoAAIAvWgSYTUNMVb0tyVcd460XHfVdXVW9yfecmuRXk1zU3euLwy/MkTD0wEUdP5XkJZvVI9QAAAA70t3nHe+9qvpYVZ3a3bcuQsttxznvy5P8ZpIXdfc7N3z3vV2ez1fVq5L8863qEWoAAGBGVfv+T/CrklyU5JLFP3/j6BOq6oFJ3pjk1d195VHv3RuIKkfW47xvqx+0UQAAADClS5I8tao+lOS8xetU1dlVdeninGcl+eYkz62q9yweZy3ee21VXZfkuiSPSvJzW/3gvo+JAADAfLr7k0mecozjh5I8f/H8NUlec5zPn7vT3xRqAABgRit7fPezERk/AwAAhibUAAAAQxNqAACAoVlTAwAAM6oVa2qmplMDAAAMTagBAACGZvwMAABmVOVP8Knp1AAAAEMTagAAgKHpfQEAwIyq7H42NZ0aAABgaEINAAAwNONnAAAwI7ufTU+nBgAAGJpQAwAADE3vCwAAZrRi97PJ6dQAAABDE2oAAIChCTUAAMDQrKkBAIAZ1Yo/waemUwMAAAxNqAEAAIam9wUAADOq8if41HRqAACAoQk1AADA0PS+AABgRlUHll3CvqNTAwAADG3XOzX3fOwTu/0T+8bhz96z7BLgfukLH79z2SUMYe2O25ZdwjAOnPKVyy5hCL22tuwSgH3C+BkAAMzI7mfTM34GAAAMTagBAACGpvcFAAAzqhV/gk9NpwYAABiaUAMAAAxN7wsAAGZk97Pp6dQAAABDE2oAAIChCTUAAMDQDPQBAMCcrKmZnE4NAAAwNKEGAAAYmt4XAADMqFb8CT41nRoAAGBoQg0AADA0vS8AAJhR2f1scjo1AADA0IQaAABgaHpfAAAwJ7ufTU6nBgAAGJpQAwAADE3vCwAA5lQHll3BvqNTAwAADE2oAQAAhibUAAAAQ7OmBgAAZlS2dJ6cTg0AADA0oQYAABia3hcAAMyp/Ak+NZ0aAABgaEINAAAwNL0vAACYUdv9bHI6NQAAwNCEGgAAYGh6XwAAMKeVA8uuYN/RqQEAAIYm1AAAAEMzfgYAAHMyfjY5nRoAAGBoQg0AADA042cAADCjNn42uU07NVX1pKr68sXzL6uqn6mqN1fVy6rqEfOUCAAAcHxbjZ9dluTuxfN/n+QRSV62OPaqXawLAABgW7YKNSvdfXjx/Ozu/rHu/oPu/pkkX3O8D1XVwao6VFWHXv3ej05WLAAAwNG2WlPzvqr6ge5+VZL3VtXZ3X2oqv5Wki8c70PdvZpkNUk+8S+e3NOVCwAAY7OmZnpbdWqen+RbqurDSR6f5I+q6sYkv7x4DwAAYKk27dR096eSPHexWcAZi/Nv7u6PzVEcAADAVra1pXN3fzrJe3e5FgAA2P+Mn03OzTcBAIChCTUAAMDQtjV+BgAATKNX9BWm5ooCAABDE2oAAIChGT8DAIAZufnm9HRqAACAoQk1AADA0IyfAQDAjNYP6CtMzRUFAACGJtQAAABDE2oAAIChWVMDAAAz6hV9ham5ogAAwNCEGgAAYGjGzwAAYEb7ffysqh6Z5PVJHpfkI0me1d13HOO8tSTXLV7+VXc/fXH8jCRXJPmKJO9O8k+7+57NfnN/X1EAAGBuFyd5e3efmeTti9fH8rnuPmvxePqG4y9L8vLu/tokdyR53lY/KNQAAABTuiDJ5Yvnlyd5xnY/WFWV5NwkV+7k88bPAABgRut7fPysqg4mObjh0Gp3r+7gKx7d3bcunn80yaOPc96Dq+pQksNJLunuN+XIyNmd3X14cc7NSU7b6geFGgAA4IsWAWbTEFNVb0vyVcd460VHfVdXVR/nax7b3bdU1dckuaaqrkvyqftSs1ADAADsSHefd7z3qupjVXVqd99aVacmue0433HL4p83VtXvJnlikl9PcnJVnbTo1pye5Jat6tnbvS8AANhn+sDKnn5M4KokFy2eX5TkN44+oapOqaoHLZ4/KsmTk1zf3Z3kHUmeudnnjybUAAAAU7okyVOr6kNJzlu8TlWdXVWXLs75+iSHquq9ORJiLunu6xfv/VSSn6iqG3Jkjc0rt/pB42cAAMBkuvuTSZ5yjOOHkjx/8fwPkzzhOJ+/Mck5O/lNoQYAAGbUK7XsEvYd42cAAMDQhBoAAGBoQg0AADA0a2oAAGBG6wesqZmaTg0AADA0oQYAABia8TMAAJiRLZ2np1MDAAAMTagBAACGZvwMAABmZPxsejo1AADA0IQaAABgaMbPAABgRn1g2RXsPzo1AADA0IQaAABgaMbPAABgRnY/m96uh5oH/80zdvsn9o31z9y17BKGcNc1Vyy7hCE84rufv+wShvGZP3jjsksYwmff9dvLLmEYvba27BKG8Ijv/IFllzCEtTtuW3YJsOcZPwMAAIZm/AwAAOakrTA5lxQAABiaUAMAAAxNqAEAAIZmTQ0AAMzpwLIL2H90agAAgKEJNQAAwNCMnwEAwJy0FSbnkgIAAEMTagAAgKEZPwMAgDlpK0zOJQUAAIYm1AAAAEMzfgYAADMqbYXJuaQAAMDQhBoAAGBoxs8AAGBGtdLLLmHf0akBAACGJtQAAABDE2oAAIChWVMDAAAzsqXz9FxSAABgaEINAAAwNONnAAAwo5UDy65g/9GpAQAAhibUAAAAQzN+BgAAM1rRVpicSwoAAAxNqAEAAIZm/AwAAGZUK73sEvYdnRoAAGBoQg0AADA042cAADAju59NzyUFAACGJtQAAABDM34GAAAzMn42PZcUAAAYmlADAAAMTagBAACGZk0NAADMyJqa6bmkAADA0DYNNVX1I1X1mLmKAQAA2Kmtxs9+NsnFVfXhJK9L8obu/vjulwUAAPuT8bPpbXVJb0xyeo6Em29Mcn1V/XZVXVRVDz/eh6rqYFUdqqpDl/3XD01YLgAAwJfaKtR0d6939+909/OSfHWS/5jk/BwJPMf70Gp3n93dZ/+v/9OZE5YLAADwpbYaP6uNL7r7C0muSnJVVT1k16oCAIB9yvjZ9La6pN97vDe6++6JawEAANixTUNNd//FXIUAAADcF26+CQAAMzqw0ssuYd8x0QcAAAxNqAEAAIZm/AwAAGZk97PpuaQAAMDQhBoAAGBoQg0AADA0a2oAAGBG1tRMzyUFAACGJtQAAABDM34GAAAzOqCtMDmXFAAAGJpQAwAADM34GQAAzGilll3B/qNTAwAADE2oAQAAhmb8DAAAZmT3s+m5pAAAwNCEGgAAYGjGzwAAYEYr2gqTc0kBAIChCTUAAMDQhBoAAGBo1tQAAMCMbOk8PZcUAAAYmlADAAAMzfgZAADMaL+Pn1XVI5O8PsnjknwkybO6+46jzvm2JC/fcOjrklzY3W+qql9J8i1JPrV477nd/Z7NfnOfX1IAAGBmFyd5e3efmeTti9dforvf0d1ndfdZSc5NcneS39lwyk/e+/5WgSYRagAAgGldkOTyxfPLkzxji/OfmeS3uvvu+/qDQg0AAMzowMreflTVwao6tOFxcIf/Iz66u29dPP9okkdvcf6FSV531LGXVtWfVdXLq+pBW/2gNTUAAMAXdfdqktXNzqmqtyX5qmO89aKjvqurqjf5nlOTPCHJWzYcfmGOhKEHLur4qSQv2aweoQYAANiR7j7veO9V1ceq6tTuvnURWm7b5KueleSN3f2FDd99b5fn81X1qiT/fKt6jJ8BAMCMVlb29mMCVyW5aPH8oiS/scm5z85Ro2eLIJSqqhxZj/O+rX5QqAEAAKZ0SZKnVtWHkpy3eJ2qOruqLr33pKp6XJLHJPm9oz7/2qq6Lsl1SR6V5Oe2+kHjZwAAwGS6+5NJnnKM44eSPH/D648kOe0Y5527098UagAAYEYHatkV7D/GzwAAgKEJNQAAwNCMnwEAwIwOaCtMziUFAACGtuudmntuumm3f2LfWHnYQ5ZdwhDWPnHXsksYw9rhZVcwjLU7PrnsEobwhdv93x7TWrtjs/vxca8Dp3zlskuAPU+nBgAAGJo1NQAAMCNraqbnkgIAAEMTagAAgKEZPwMAgBmdtFLLLmHf0akBAACGJtQAAABDM34GAAAzsvvZ9FxSAABgaEINAAAwNONnAAAwowM2P5ucTg0AADA0oQYAABia8TMAAJiR3c+m55ICAABDE2oAAIChCTUAAMDQrKkBAIAZWVMzPZcUAAAYmlADAAAMzfgZAADM6MBKLbuEfUenBgAAGJpQAwAADM34GQAAzMjuZ9NzSQEAgKEJNQAAwNCMnwEAwIwO2Pxscjo1AADA0IQaAABgaMbPAABgRm6+OT2dGgAAYGhCDQAAMDTjZwAAMCM335yeSwoAAAxNqAEAAIYm1AAAAEOzpgYAAGZkS+fp6dQAAABDE2oAAIChGT8DAIAZ2dJ5ei4pAAAwNKEGAAAYmvEzAACY0UrZ/Wxqm4aaqnpgkguT/HV3v62qnpPkHyT5QJLV7v7CDDUCAAAc11bjZ69K8p1JfrSqfjXJ9yR5V5K/l+TS432oqg5W1aGqOnT5n9w6WbEAAABH22r87And/T9W1UlJbkny1d29VlWvSfLe432ou1eTrCbJ7f/Ht/Rk1QIAwODsfja9rS7pymIE7eFJHpLkEYvjD0rygN0sDAAAYDu26tS8MsmfJzmQ5EVJ3lBVNyb5piRX7HJtAAAAW9o01HT3y6vq9Yvnf11Vr05yXpJf7u4/nqNAAADYTw6s2P1saltu6dzdf73h+Z1JrtzNggAAAHbCMiUAAGBoQg0AADC0LcfPAACA6djSeXouKQAAMDShBgAAGJrxMwAAmJEtnaenUwMAAAxNqAEAAIZm/AwAAGZk/Gx6OjUAAMDQhBoAAGBoxs8AAGBGbr45PZcUAAAYmlADAAAMzfgZAADMaMXuZ5PTqQEAAIYm1AAAAEMTagAAgKFZUwMAADM6YE3N5HRqAACAoQk1AADA0IyfAQDAjA5oK0zOJQUAAIYm1AAAAEMzfgYAADOy+9n0dGoAAIChCTUAAMDQjJ8BAMCMVoyfTU6nBgAAGJpQAwAADM34GQAAzMjNN6fnkgIAAEMTagAAgKEZPwMAgBm5+eb0dGoAAIChCTUAAMDQhBoAAGBoQg0AAMzowErt6ceJqqrvqar3V9V6VZ29yXnnV9UHq+qGqrp4w/Ezqupdi+Ovr6oHbvWbQg0AADCl9yX5x0l+/3gnVNWBJL+Y5DuSPD7Js6vq8Yu3X5bk5d39tUnuSPK8rX5QqAEAACbT3R/o7g9ucdo5SW7o7hu7+54kVyS5oKoqyblJrlycd3mSZ2z1m7u+pfMjf+739tyedVV1sLtXl13HCFyr7XGdtm8vXquTn/icZZfw39mL12kvcp22z7XaHtdp+1yr++7cr/raPff38UZVdTDJwQ2HVnfhf9enJblpw+ubkzwpyVckubO7D284ftpWX3Z/7dQc3PoUFlyr7XGdts+12h7XaXtcp+1zrbbHddo+12qf6u7V7j57w+O/CzRV9baqet8xHhcso2Y33wQAAHaku887wa+4JcljNrw+fXHsk0lOrqqTFt2ae49v6v7aqQEAAJbn2iRnLnY6e2CSC5Nc1d2d5B1Jnrk476Ikv7HVl91fQ435z+1zrbbHddo+12p7XKftcZ22z7XaHtdp+1wrjqmq/lFV3Zzk7yf5zap6y+L4V1fV1Umy6MK8IMlbknwgya919/sXX/FTSX6iqm7IkTU2r9zyN4+EIQAAgDHdXzs1AADAPiHUAAAAQ7vfhZqqOr+qPlhVN1TVxcuuZ6+qqsuq6raqet+ya9nLquoxVfWOqrq+qt5fVT+67Jr2oqp6cFX9cVW9d3GdfmbZNe1lVXWgqv60qv7LsmvZy6rqI1V1XVW9p6oOLbuevaqqTq6qK6vqz6vqA1X195dd015UVX978Z+lex+frqofW3Zde1FV/fjiv8vfV1Wvq6oHL7smuF+tqamqA0n+IslTc+RGPtcmeXZ3X7/UwvagqvrmJJ9J8uru/rvLrmevqqpTk5za3X9SVQ9P8u4kz/CfqS+1uDvwQ7v7M1X1gCR/kORHu/udSy5tT6qqn0hydpIv7+7vWnY9e1VVfSTJ2d39iWXXspdV1eVJ/mt3X7rYYegh3X3nksva0xZ/L9yS5End/d+WXc9eUlWn5ch/hz++uz9XVb+W5Oru/pXlVsb93f2tU3NOkhu6+8buvifJFUmWcoOgva67fz/J7cuuY6/r7lu7+08Wz+/Kkd07trzr7f1NH/GZxcsHLB73n3+jsgNVdXqS70xy6bJrYXxV9Ygk35zFzkHdfY9Asy1PSfJhgea4TkryZVV1UpKHJPnrJdcD97tQc1qSmza8vjn+AGUiVfW4JE9M8q4ll7InLUaq3pPktiRv7W7X6dh+Psm/SLK+5DpG0El+p6reXVXubH5sZyT5eJJXLUYaL62qhy67qAFcmOR1yy5iL+ruW5L8myR/leTWJJ/q7t9ZblVw/ws1sCuq6mFJfj3Jj3X3p5ddz17U3WvdfVaO3Bn4nKoy1niUqvquJLd197uXXcsg/mF3f0OS70jyQ4uxWb7USUm+IckvdfcTk3w2ifWkm1iM6D09yRuWXcteVFWn5MiUyxlJvjrJQ6vq+5ZbFdz/Qs0tSR6z4fXpi2Nwny3WiPx6ktd2939edj173WL05R1Jzl9yKXvRk5M8fbFW5Iok51bVa5Zb0t61+DfG6e7bkrwxR0aM+VI3J7l5Q2f0yhwJORzfdyT5k+7+2LIL2aPOS/KX3f3x7v5Ckv+c5B8suSa434Waa5OcWVVnLP5NzIVJrlpyTQxssQD+lUk+0N3/btn17FVV9Teq6uTF8y/Lkc06/nypRe1B3f3C7j69ux+XI//9dE13+zegx1BVD11szpHFONXTktit8Sjd/dEkN1XV314cekoSG5ls7tkxeraZv0ryTVX1kMX/D3xKjqwnhaU6adkFzKm7D1fVC5K8JcmBJJd19/uXXNaeVFWvS/KtSR5VVTcneXF3v3K5Ve1JT07yT5Nct1gvkiT/e3dfvbyS9qRTk1y+2FFoJcmvdbftijkRj07yxiN/U+WkJP+pu397uSXtWT+c5LWLf5l3Y5IfWHI9e9YiID81yQ8uu5a9qrvfVVVXJvmTJIeT/GmS1eVWBfezLZ0BAID95/42fgYAAOwzQg0AADA0oQYAABiaUAMAAAxNqAEAAIYm1AAAAEMTagAAgKH9/7a8qjjBImDKAAAAAElFTkSuQmCC\n" }, "metadata": { "needs_background": "light" @@ -1183,9 +1229,6 @@ } ], "source": [ - "if max4 < 10:\n", - " max4 = 10\n", - "\n", "idf4 = impute_df(split_df4, max_iter= int(max4), verbose=2)\n", "size = idf4.shape[1]\n", "corr = idf4.corr()\n", @@ -1195,32 +1238,101 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 21, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "[IterativeImputer] Completing matrix with shape (195, 4)\n[IterativeImputer] Ending imputation round 1/10, elapsed time 0.03\n[IterativeImputer] Ending imputation round 2/10, elapsed time 0.04\n[IterativeImputer] Ending imputation round 3/10, elapsed time 0.04\n[IterativeImputer] Ending imputation round 4/10, elapsed time 0.04\n[IterativeImputer] Ending imputation round 5/10, elapsed time 0.05\n[IterativeImputer] Ending imputation round 6/10, elapsed time 0.05\n[IterativeImputer] Ending imputation round 7/10, elapsed time 0.06\n[IterativeImputer] Ending imputation round 8/10, elapsed time 0.08\n[IterativeImputer] Early stopping criterion reached.\n[IterativeImputer] Completing matrix with shape (195, 4)\n[IterativeImputer] Ending imputation round 1/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 2/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 3/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 4/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 5/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 6/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 7/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 8/8, elapsed time 0.00\n" + "[IterativeImputer] Completing matrix with shape (195, 13)\n", + "[IterativeImputer] Ending imputation round 1/22, elapsed time 0.03\n", + "[IterativeImputer] Change: 87.8605447830152, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 2/22, elapsed time 0.05\n", + "[IterativeImputer] Change: 28.586268171421256, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 3/22, elapsed time 0.07\n", + "[IterativeImputer] Change: 11.593324359484521, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 4/22, elapsed time 0.10\n", + "[IterativeImputer] Change: 5.237507958945714, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 5/22, elapsed time 0.12\n", + "[IterativeImputer] Change: 3.9889516059898855, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 6/22, elapsed time 0.13\n", + "[IterativeImputer] Change: 4.3327704567826295, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 7/22, elapsed time 0.15\n", + "[IterativeImputer] Change: 4.743884302961482, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 8/22, elapsed time 0.16\n", + "[IterativeImputer] Change: 4.9041296773902205, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 9/22, elapsed time 0.18\n", + "[IterativeImputer] Change: 4.8867183949047215, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 10/22, elapsed time 0.20\n", + "[IterativeImputer] Change: 4.74618090912474, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 11/22, elapsed time 0.22\n", + "[IterativeImputer] Change: 4.597447396969357, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 12/22, elapsed time 0.24\n", + "[IterativeImputer] Change: 4.383379000147137, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 13/22, elapsed time 0.25\n", + "[IterativeImputer] Change: 4.120519036033551, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 14/22, elapsed time 0.27\n", + "[IterativeImputer] Change: 3.829399094168947, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 15/22, elapsed time 0.28\n", + "[IterativeImputer] Change: 3.5249997877409696, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 16/22, elapsed time 0.30\n", + "[IterativeImputer] Change: 3.218127903389302, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 17/22, elapsed time 0.32\n", + "[IterativeImputer] Change: 2.9164496052546856, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 18/22, elapsed time 0.34\n", + "[IterativeImputer] Change: 2.625268955987952, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 19/22, elapsed time 0.36\n", + "[IterativeImputer] Change: 2.3497527779462066, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 20/22, elapsed time 0.38\n", + "[IterativeImputer] Change: 2.0912457219816023, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 21/22, elapsed time 0.39\n", + "[IterativeImputer] Change: 1.8498070813863305, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Ending imputation round 22/22, elapsed time 0.41\n", + "[IterativeImputer] Change: 1.6257610843838977, scaled tolerance: 1.4337840000000002 \n", + "[IterativeImputer] Completing matrix with shape (195, 13)\n", + "[IterativeImputer] Ending imputation round 1/22, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 2/22, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 3/22, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 4/22, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 5/22, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 6/22, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 7/22, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 8/22, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 9/22, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 10/22, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 11/22, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 12/22, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 13/22, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 14/22, elapsed time 0.03\n", + "[IterativeImputer] Ending imputation round 15/22, elapsed time 0.03\n", + "[IterativeImputer] Ending imputation round 16/22, elapsed time 0.03\n", + "[IterativeImputer] Ending imputation round 17/22, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 18/22, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 19/22, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 20/22, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 21/22, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 22/22, elapsed time 0.05\n", + "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 16 + "execution_count": 21 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" + "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:28.158122\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -1228,9 +1340,6 @@ } ], "source": [ - "if max5 < 10:\n", - " max5 = 10\n", - "\n", "idf5 = impute_df(split_df5, max_iter= int(max5), verbose=2)\n", "size = idf5.shape[1]\n", "corr = idf5.corr()\n", @@ -1238,132 +1347,171 @@ "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))" ] }, + { + "source": [ + "# Imputation of the whole dataset " + ], + "cell_type": "markdown", + "metadata": {} + }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 22, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "[IterativeImputer] Completing matrix with shape (195, 9)\n", - "[IterativeImputer] Ending imputation round 1/22, elapsed time 0.12\n", - "[IterativeImputer] Ending imputation round 2/22, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 3/22, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 4/22, elapsed time 0.16\n", - "[IterativeImputer] Ending imputation round 5/22, elapsed time 0.17\n", - "[IterativeImputer] Ending imputation round 6/22, elapsed time 0.18\n", - "[IterativeImputer] Ending imputation round 7/22, elapsed time 0.20\n", - "[IterativeImputer] Early stopping criterion reached.\n", - "[IterativeImputer] Completing matrix with shape (195, 9)\n", - "[IterativeImputer] Ending imputation round 1/7, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 2/7, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 3/7, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 4/7, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 5/7, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 6/7, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 7/7, elapsed time 0.02\n" + "24.0\n" ] - }, - { - "output_type": "execute_result", - "data": { - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 17 - }, - { - "output_type": "display_data", - "data": { - "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } } ], "source": [ - "if max6 < 10:\n", - " max6 = 10\n", - "\n", - "idf6 = impute_df(split_df6, max_iter= int(max6), verbose=2)\n", - "size = idf6.shape[1]\n", - "corr = idf6.corr()\n", - "plt.subplots(figsize=(20,20))\n", - "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))" + "iter_number = (max_missing + min_missing) // 2\n", + "print(iter_number)" ] }, - { - "source": [ - "## Dataframe is merged and displayed below" - ], - "cell_type": "markdown", - "metadata": {} - }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 23, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "(195, 34)\n(195, 45)\n(195, 54)\n(195, 58)\n(195, 67)\n" + "[IterativeImputer] Completing matrix with shape (195, 67)\n", + "[IterativeImputer] Ending imputation round 1/24, elapsed time 0.62\n", + "[IterativeImputer] Change: 373997.8709645349, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 2/24, elapsed time 1.24\n", + "[IterativeImputer] Change: 29511.1061871246, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 3/24, elapsed time 1.85\n", + "[IterativeImputer] Change: 17873.188347295738, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 4/24, elapsed time 2.47\n", + "[IterativeImputer] Change: 11459.888077535514, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 5/24, elapsed time 3.09\n", + "[IterativeImputer] Change: 5390.9739547168065, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 6/24, elapsed time 3.71\n", + "[IterativeImputer] Change: 3757.0867210105503, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 7/24, elapsed time 4.34\n", + "[IterativeImputer] Change: 3514.840674151647, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 8/24, elapsed time 4.96\n", + "[IterativeImputer] Change: 3272.008783191113, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 9/24, elapsed time 5.59\n", + "[IterativeImputer] Change: 3038.1248408404294, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 10/24, elapsed time 6.21\n", + "[IterativeImputer] Change: 2816.9578324964855, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 11/24, elapsed time 6.86\n", + "[IterativeImputer] Change: 2609.6887758823086, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 12/24, elapsed time 7.53\n", + "[IterativeImputer] Change: 2416.3188696420248, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 13/24, elapsed time 8.14\n", + "[IterativeImputer] Change: 2236.3250712345057, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 14/24, elapsed time 8.77\n", + "[IterativeImputer] Change: 2069.007986775061, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 15/24, elapsed time 9.40\n", + "[IterativeImputer] Change: 2002.417721664179, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 16/24, elapsed time 10.03\n", + "[IterativeImputer] Change: 1982.0451872189394, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 17/24, elapsed time 10.66\n", + "[IterativeImputer] Change: 1961.971235914504, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 18/24, elapsed time 11.30\n", + "[IterativeImputer] Change: 1942.2299581335687, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 19/24, elapsed time 11.93\n", + "[IterativeImputer] Change: 1922.8595087315427, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 20/24, elapsed time 12.56\n", + "[IterativeImputer] Change: 1903.8819537004383, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 21/24, elapsed time 13.19\n", + "[IterativeImputer] Change: 1885.3076371898812, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 22/24, elapsed time 13.83\n", + "[IterativeImputer] Change: 1867.1381917331032, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 23/24, elapsed time 14.46\n", + "[IterativeImputer] Change: 1849.3691382747977, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 24/24, elapsed time 15.09\n", + "[IterativeImputer] Change: 1831.9916662386954, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Completing matrix with shape (195, 67)\n", + "[IterativeImputer] Ending imputation round 1/24, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 2/24, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 3/24, elapsed time 0.03\n", + "[IterativeImputer] Ending imputation round 4/24, elapsed time 0.05\n", + "[IterativeImputer] Ending imputation round 5/24, elapsed time 0.06\n", + "[IterativeImputer] Ending imputation round 6/24, elapsed time 0.07\n", + "[IterativeImputer] Ending imputation round 7/24, elapsed time 0.08\n", + "[IterativeImputer] Ending imputation round 8/24, elapsed time 0.09\n", + "[IterativeImputer] Ending imputation round 9/24, elapsed time 0.10\n", + "[IterativeImputer] Ending imputation round 10/24, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 11/24, elapsed time 0.12\n", + "[IterativeImputer] Ending imputation round 12/24, elapsed time 0.13\n", + "[IterativeImputer] Ending imputation round 13/24, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 14/24, elapsed time 0.16\n", + "[IterativeImputer] Ending imputation round 15/24, elapsed time 0.17\n", + "[IterativeImputer] Ending imputation round 16/24, elapsed time 0.18\n", + "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n", + "[IterativeImputer] Ending imputation round 17/24, elapsed time 0.19\n", + "[IterativeImputer] Ending imputation round 18/24, elapsed time 0.21\n", + "[IterativeImputer] Ending imputation round 19/24, elapsed time 0.22\n", + "[IterativeImputer] Ending imputation round 20/24, elapsed time 0.23\n", + "[IterativeImputer] Ending imputation round 21/24, elapsed time 0.24\n", + "[IterativeImputer] Ending imputation round 22/24, elapsed time 0.25\n", + "[IterativeImputer] Ending imputation round 23/24, elapsed time 0.26\n", + "[IterativeImputer] Ending imputation round 24/24, elapsed time 0.27\n" ] - }, + } + ], + "source": [ + "imputed_df = impute_df(df_inicator_values, max_iter=int(iter_number), verbose=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ { "output_type": "display_data", "data": { - "text/plain": " Population with at least some secondary education (% ages 25 and older) \\\nAFG 26.080 \nAGO 30.232 \nALB 93.174 \nAND 72.327 \nARG 57.158 \n.. ... \nWSM 74.942 \nYEM 28.020 \nZAF 75.478 \nZMB 44.440 \nZWE 64.935 \n\n Population with at least some secondary education, female (% ages 25 and older) \\\nAFG 13.220 \nAGO 23.133 \nALB 93.700 \nAND 71.484 \nARG 59.161 \n.. ... \nWSM 79.127 \nYEM 19.920 \nZAF 74.977 \nZMB 38.488 \nZWE 59.792 \n\n Population with at least some secondary education, male (% ages 25 and older) \\\nAFG 36.920 \nAGO 38.056 \nALB 92.497 \nAND 73.327 \nARG 54.828 \n.. ... \nWSM 71.583 \nYEM 36.918 \nZAF 78.207 \nZMB 54.068 \nZWE 70.783 \n\n Share of seats in parliament (% held by women) \\\nAFG 27.244 \nAGO 30.000 \nALB 29.508 \nAND 46.429 \nARG 39.877 \n.. ... \nWSM 10.000 \nYEM 0.971 \nZAF 45.333 \nZMB 17.964 \nZWE 34.571 \n\n Vulnerable employment (% of total employment) Urban population (%) \\\nAFG 79.72600 25.8 \nAGO 65.99500 66.2 \nALB 52.85200 61.2 \nAND 15.42884 88.0 \nARG 21.80500 92.0 \n.. ... ... \nWSM 29.98300 18.1 \nYEM 45.62700 37.3 \nZAF 10.29800 66.9 \nZMB 78.13400 44.1 \nZWE 64.73900 32.2 \n\n Labour force participation rate (% ages 15 and older), female \\\nAFG 21.595000 \nAGO 76.136000 \nALB 46.712000 \nAND 54.351364 \nARG 50.721000 \n.. ... \nWSM 31.104000 \nYEM 5.834000 \nZAF 49.610000 \nZMB 70.370000 \nZWE 78.106000 \n\n Labour force participation rate (% ages 15 and older), male \\\nAFG 74.658000 \nAGO 78.913000 \nALB 64.568000 \nAND 72.232983 \nARG 72.730000 \n.. ... \nWSM 55.456000 \nYEM 70.183000 \nZAF 62.749000 \nZMB 79.076000 \nZWE 88.993000 \n\n Remittances, inflows (% of GDP) \\\nAFG 4.542000 \nAGO 0.002000 \nALB 9.640000 \nAND 1.703573 \nARG 0.119000 \n.. ... \nWSM 17.254000 \nYEM 7.999177 \nZAF 0.253000 \nZMB 0.551000 \nZWE 8.068000 \n\n Foreign direct investment, net inflows (% of GDP) ... \\\nAFG 0.123000 ... \nAGO -4.331000 ... \nALB 7.912000 ... \nAND 3.286625 ... \nARG 1.389000 ... \n.. ... ... \nWSM 3.114609 ... \nYEM -4.382862 ... \nZAF 1.316000 ... \nZMB 2.087000 ... \nZWE 4.005312 ... \n\n Population under age 5 (millions) \\\nAFG 5.639000 \nAGO 5.670000 \nALB 0.169000 \nAND -7.122259 \nARG 3.742000 \n.. ... \nWSM 0.027000 \nYEM 4.099000 \nZAF 5.786000 \nZMB 2.902000 \nZWE 2.138000 \n\n Adolescent birth rate (births per 1,000 women ages 15-19) \\\nAFG 68.957000 \nAGO 150.526000 \nALB 19.642000 \nAND 14.976928 \nARG 62.782000 \n.. ... \nWSM 23.886000 \nYEM 60.352000 \nZAF 67.908000 \nZMB 120.112000 \nZWE 86.135000 \n\n Sex ratio at birth (male to female births) \\\nAFG 1.060000 \nAGO 1.030000 \nALB 1.090000 \nAND 1.058547 \nARG 1.040000 \n.. ... \nWSM 1.080000 \nYEM 1.050000 \nZAF 1.030000 \nZMB 1.030000 \nZWE 1.020000 \n\n Young age (0-14) dependency ratio (per 100 people ages 15-64) \\\nAFG 77.346000 \nAGO 91.097000 \nALB 25.439000 \nAND 25.017928 \nARG 38.334000 \n.. ... \nWSM 66.194000 \nYEM 67.773000 \nZAF 44.148000 \nZMB 83.229000 \nZWE 76.845000 \n\n Old-age (65 and older) dependency ratio (per 100 people ages 15-64) \\\nAFG 4.764000 \nAGO 4.297000 \nALB 20.764000 \nAND 21.992987 \nARG 17.523000 \n.. ... \nWSM 8.623000 \nYEM 5.015000 \nZAF 8.253000 \nZMB 3.960000 \nZWE 5.433000 \n\n HDI rank Total unemployment rate (female to male ratio) \\\nAFG 169.0 1.356000 \nAGO 148.0 1.016000 \nALB 69.0 0.903000 \nAND 36.0 1.788414 \nARG 46.0 1.222000 \n.. ... ... \nWSM 111.0 1.297000 \nYEM 179.0 2.088000 \nZAF 114.0 1.149000 \nZMB 146.0 1.147000 \nZWE 150.0 1.231000 \n\n Youth unemployment rate (female to male ratio) \\\nAFG 1.308000 \nAGO 0.906000 \nALB 0.799000 \nAND 1.412073 \nARG 1.291000 \n.. ... \nWSM 1.492000 \nYEM 1.467000 \nZAF 1.161000 \nZMB 1.079000 \nZWE 1.269000 \n\n Coefficient of human inequality Inequality-adjusted HDI (IHDI) \nAFG 29.772732 0.360280 \nAGO 31.733000 0.397000 \nALB 10.893000 0.708000 \nAND 10.066536 0.786988 \nARG 13.238000 0.729000 \n.. ... ... \nWSM 21.104700 0.551524 \nYEM 30.867000 0.321000 \nZAF 31.163000 0.468000 \nZMB 30.592000 0.401000 \nZWE 22.525000 0.441000 \n\n[195 rows x 67 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Share of seats in parliament (% held by women)Vulnerable employment (% of total employment)Urban population (%)Labour force participation rate (% ages 15 and older), femaleLabour force participation rate (% ages 15 and older), maleRemittances, inflows (% of GDP)Foreign direct investment, net inflows (% of GDP)...Population under age 5 (millions)Adolescent birth rate (births per 1,000 women ages 15-19)Sex ratio at birth (male to female births)Young age (0-14) dependency ratio (per 100 people ages 15-64)Old-age (65 and older) dependency ratio (per 100 people ages 15-64)HDI rankTotal unemployment rate (female to male ratio)Youth unemployment rate (female to male ratio)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
AFG26.08013.22036.92027.24479.7260025.821.59500074.6580004.5420000.123000...5.63900068.9570001.06000077.3460004.764000169.01.3560001.30800029.7727320.360280
AGO30.23223.13338.05630.00065.9950066.276.13600078.9130000.002000-4.331000...5.670000150.5260001.03000091.0970004.297000148.01.0160000.90600031.7330000.397000
ALB93.17493.70092.49729.50852.8520061.246.71200064.5680009.6400007.912000...0.16900019.6420001.09000025.43900020.76400069.00.9030000.79900010.8930000.708000
AND72.32771.48473.32746.42915.4288488.054.35136472.2329831.7035733.286625...-7.12225914.9769281.05854725.01792821.99298736.01.7884141.41207310.0665360.786988
ARG57.15859.16154.82839.87721.8050092.050.72100072.7300000.1190001.389000...3.74200062.7820001.04000038.33400017.52300046.01.2220001.29100013.2380000.729000
..................................................................
WSM74.94279.12771.58310.00029.9830018.131.10400055.45600017.2540003.114609...0.02700023.8860001.08000066.1940008.623000111.01.2970001.49200021.1047000.551524
YEM28.02019.92036.9180.97145.6270037.35.83400070.1830007.999177-4.382862...4.09900060.3520001.05000067.7730005.015000179.02.0880001.46700030.8670000.321000
ZAF75.47874.97778.20745.33310.2980066.949.61000062.7490000.2530001.316000...5.78600067.9080001.03000044.1480008.253000114.01.1490001.16100031.1630000.468000
ZMB44.44038.48854.06817.96478.1340044.170.37000079.0760000.5510002.087000...2.902000120.1120001.03000083.2290003.960000146.01.1470001.07900030.5920000.401000
ZWE64.93559.79270.78334.57164.7390032.278.10600088.9930008.0680004.005312...2.13800086.1350001.02000076.8450005.433000150.01.2310001.26900022.5250000.441000
\n

195 rows × 67 columns

\n
" + "text/plain": " Population with at least some secondary education (% ages 25 and older) \\\nAFG 26.080 \nAGO 30.232 \nALB 93.174 \nAND 72.327 \nARG 57.158 \n.. ... \nWSM 74.942 \nYEM 28.020 \nZAF 75.478 \nZMB 44.440 \nZWE 64.935 \n\n Population with at least some secondary education, female (% ages 25 and older) \\\nAFG 13.220 \nAGO 23.133 \nALB 93.700 \nAND 71.484 \nARG 59.161 \n.. ... \nWSM 79.127 \nYEM 19.920 \nZAF 74.977 \nZMB 38.488 \nZWE 59.792 \n\n Population with at least some secondary education, male (% ages 25 and older) \\\nAFG 36.920 \nAGO 38.056 \nALB 92.497 \nAND 73.327 \nARG 54.828 \n.. ... \nWSM 71.583 \nYEM 36.918 \nZAF 78.207 \nZMB 54.068 \nZWE 70.783 \n\n Mean years of schooling, female (years) \\\nAFG 1.94800 \nAGO 4.02300 \nALB 9.70200 \nAND 10.43900 \nARG 11.12300 \n.. ... \nWSM 11.16287 \nYEM 2.88000 \nZAF 10.03100 \nZMB 6.28300 \nZWE 8.06600 \n\n Mean years of schooling, male (years) \\\nAFG 6.006000 \nAGO 6.359000 \nALB 10.614000 \nAND 10.564000 \nARG 10.729000 \n.. ... \nWSM 10.415249 \nYEM 5.146000 \nZAF 10.291000 \nZMB 8.176000 \nZWE 8.923000 \n\n Share of seats in parliament (% held by women) \\\nAFG 27.244 \nAGO 30.000 \nALB 29.508 \nAND 46.429 \nARG 39.877 \n.. ... \nWSM 10.000 \nYEM 0.971 \nZAF 45.333 \nZMB 17.964 \nZWE 34.571 \n\n Adolescent birth rate (births per 1,000 women ages 15-19) \\\nAFG 68.957000 \nAGO 150.526000 \nALB 19.642000 \nAND 18.266334 \nARG 62.782000 \n.. ... \nWSM 23.886000 \nYEM 60.352000 \nZAF 67.908000 \nZMB 120.112000 \nZWE 86.135000 \n\n Vulnerable employment (% of total employment) \\\nAFG 79.726000 \nAGO 65.995000 \nALB 52.852000 \nAND 4.461035 \nARG 21.805000 \n.. ... \nWSM 29.983000 \nYEM 45.627000 \nZAF 10.298000 \nZMB 78.134000 \nZWE 64.739000 \n\n Total population (millions) Urban population (%) ... \\\nAFG 38.042 25.8 ... \nAGO 31.825 66.2 ... \nALB 2.881 61.2 ... \nAND 0.077 88.0 ... \nARG 44.781 92.0 ... \n.. ... ... ... \nWSM 0.197 18.1 ... \nYEM 29.162 37.3 ... \nZAF 58.558 66.9 ... \nZMB 17.861 44.1 ... \nZWE 14.645 32.2 ... \n\n Gender Development Index (GDI) \\\nAFG 0.659000 \nAGO 0.903000 \nALB 0.967000 \nAND 1.058332 \nARG 0.993000 \n.. ... \nWSM 0.949613 \nYEM 0.488000 \nZAF 0.986000 \nZMB 0.958000 \nZWE 0.931000 \n\n Estimated gross national income per capita, female (2017 PPP $) \\\nAFG 819.385000 \nAGO 5205.049000 \nALB 11004.455000 \nAND 43647.171247 \nARG 14872.167000 \n.. ... \nWSM 4054.375000 \nYEM 186.041000 \nZAF 9247.751000 \nZMB 3379.549000 \nZWE 2374.612000 \n\n Estimated gross national income per capita, male (2017 PPP $) \\\nAFG 3565.86500 \nAGO 7022.23100 \nALB 16884.66700 \nAND 66636.70046 \nARG 27825.75700 \n.. ... \nWSM 8410.10900 \nYEM 2980.03500 \nZAF 15094.54600 \nZMB 3270.42200 \nZWE 2984.89600 \n\n Human Development Index (HDI), female \\\nAFG 0.391000 \nAGO 0.552000 \nALB 0.780000 \nAND 0.888833 \nARG 0.835000 \n.. ... \nWSM 0.689133 \nYEM 0.270000 \nZAF 0.702000 \nZMB 0.569000 \nZWE 0.550000 \n\n Human Development Index (HDI), male Inequality-adjusted income index \\\nAFG 0.593000 0.590646 \nAGO 0.611000 0.442000 \nALB 0.807000 0.648000 \nAND 0.848604 0.657202 \nARG 0.840000 0.606000 \n.. ... ... \nWSM 0.723468 0.388513 \nYEM 0.553000 0.327000 \nZAF 0.712000 0.312000 \nZMB 0.593000 0.292000 \nZWE 0.590000 0.353000 \n\n Overall loss in HDI due to inequality (%) Inequality in income (%) \\\nAFG 19.599171 -14.312061 \nAGO 31.670000 28.900000 \nALB 10.943000 13.179000 \nAND 17.410515 36.120212 \nARG 13.728000 25.159000 \n.. ... ... \nWSM 18.988401 40.384439 \nYEM 31.702000 21.800000 \nZAF 33.992000 56.996000 \nZMB 31.336000 44.840000 \nZWE 22.767000 28.769000 \n\n Coefficient of human inequality Inequality-adjusted HDI (IHDI) \nAFG 19.795336 0.423465 \nAGO 31.733000 0.397000 \nALB 10.893000 0.708000 \nAND 16.938070 0.735618 \nARG 13.238000 0.729000 \n.. ... ... \nWSM 18.427962 0.575112 \nYEM 30.867000 0.321000 \nZAF 31.163000 0.468000 \nZMB 30.592000 0.401000 \nZWE 22.525000 0.441000 \n\n[195 rows x 67 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
AFG26.08013.22036.9201.948006.00600027.24468.95700079.72600038.04225.8...0.659000819.3850003565.865000.3910000.5930000.59064619.599171-14.31206119.7953360.423465
AGO30.23223.13338.0564.023006.35900030.000150.52600065.99500031.82566.2...0.9030005205.0490007022.231000.5520000.6110000.44200031.67000028.90000031.7330000.397000
ALB93.17493.70092.4979.7020010.61400029.50819.64200052.8520002.88161.2...0.96700011004.45500016884.667000.7800000.8070000.64800010.94300013.17900010.8930000.708000
AND72.32771.48473.32710.4390010.56400046.42918.2663344.4610350.07788.0...1.05833243647.17124766636.700460.8888330.8486040.65720217.41051536.12021216.9380700.735618
ARG57.15859.16154.82811.1230010.72900039.87762.78200021.80500044.78192.0...0.99300014872.16700027825.757000.8350000.8400000.60600013.72800025.15900013.2380000.729000
..................................................................
WSM74.94279.12771.58311.1628710.41524910.00023.88600029.9830000.19718.1...0.9496134054.3750008410.109000.6891330.7234680.38851318.98840140.38443918.4279620.575112
YEM28.02019.92036.9182.880005.1460000.97160.35200045.62700029.16237.3...0.488000186.0410002980.035000.2700000.5530000.32700031.70200021.80000030.8670000.321000
ZAF75.47874.97778.20710.0310010.29100045.33367.90800010.29800058.55866.9...0.9860009247.75100015094.546000.7020000.7120000.31200033.99200056.99600031.1630000.468000
ZMB44.44038.48854.0686.283008.17600017.964120.11200078.13400017.86144.1...0.9580003379.5490003270.422000.5690000.5930000.29200031.33600044.84000030.5920000.401000
ZWE64.93559.79270.7838.066008.92300034.57186.13500064.73900014.64532.2...0.9310002374.6120002984.896000.5500000.5900000.35300022.76700028.76900022.5250000.441000
\n

195 rows × 67 columns

\n
" }, "metadata": {} } ], "source": [ - "# 29 - 5 - 11 - 9 - 4 - 9\n", - "final_df = idf1.merge(idf2, left_index=True, right_index=True)\n", - "print(final_df.shape)\n", - "final_df = final_df.merge(idf3, left_index=True, right_index=True)\n", - "print(final_df.shape)\n", - "final_df = final_df.merge(idf4, left_index=True, right_index=True)\n", - "print(final_df.shape)\n", - "final_df = final_df.merge(idf5, left_index=True, right_index=True)\n", - "print(final_df.shape)\n", - "final_df = final_df.merge(idf6, left_index=True, right_index=True)\n", - "print(final_df.shape)\n", - "\n", - "display(final_df)" + "display(imputed_df)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "#scatter_matrix(imputed_df, figsize=(size, size)) Takes a lot of time to work, visualization is not that great. But it can stay, just in case" + ] + }, + { + "cell_type": "code", + "execution_count": 26, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 19 + "execution_count": 26 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", - "image/png": "\n" + "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:44.813663\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -1371,64 +1519,130 @@ } ], "source": [ - "size = final_df.shape[1]\n", - "corr = final_df.corr()\n", + "#sns.heatmap(corr, annot = True, vmin=-1, vmax=1, center= 0, fmt='.1g', cmap= 'coolwarm', linewidths=1, linecolor='black', square=True, yticklabels=False, xticklabels=False)\n", + "size = imputed_df.shape[1]\n", + "corr = imputed_df.corr()\n", "plt.subplots(figsize=(20,20))\n", - "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))" + "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size),cmap='mako')" ] }, { - "source": [ - "## PCA of the Final Dataset" + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{0: 'Population with at least some secondary education (% ages 25 and older)', 1: 'Population with at least some secondary education, female (% ages 25 and older)', 2: 'Population with at least some secondary education, male (% ages 25 and older)', 3: 'Mean years of schooling, female (years)', 4: 'Mean years of schooling, male (years)', 5: 'Share of seats in parliament (% held by women)', 6: 'Adolescent birth rate (births per 1,000 women ages 15-19)', 7: 'Vulnerable employment (% of total employment)', 8: 'Total population (millions)', 9: 'Urban population (%)', 10: 'Labour force participation rate (% ages 15 and older), female', 11: 'Labour force participation rate (% ages 15 and older), male', 12: 'Sex ratio at birth (male to female births)', 13: 'Remittances, inflows (% of GDP)', 14: 'Foreign direct investment, net inflows (% of GDP)', 15: 'Population ages 15?64 (millions)', 16: 'Infants lacking immunization, measles (% of one-year-olds)', 17: 'Infants lacking immunization, DTP (% of one-year-olds)', 18: 'Gross fixed capital formation (% of GDP)', 19: 'Gender Inequality Index (GII)', 20: 'Life expectancy at birth (years)', 21: 'Expected years of schooling (years)', 22: 'Inequality-adjusted education index', 23: 'Inequality-adjusted life expectancy index', 24: 'Inequality in education (%)', 25: 'Inequality in life expectancy (%)', 26: 'Mean years of schooling (years)', 27: 'Life expectancy index', 28: 'Income index', 29: 'Education index', 30: 'Unemployment, youth (% ages 15?24)', 31: 'Private capital flows (% of GDP)', 32: 'Life expectancy at birth, female (years)', 33: 'Life expectancy at birth, male (years)', 34: 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 35: 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 36: 'Expected years of schooling, female (years)', 37: 'Expected years of schooling, male (years)', 38: 'Population ages 65 and older (millions)', 39: 'Population under age 5 (millions)', 40: 'Exports and imports (% of GDP)', 41: 'Human Development Index (HDI)', 42: 'Unemployment, total (% of labour force)', 43: 'HDI rank', 44: 'Youth not in school or employment (% ages 15-24)', 45: 'Labour force participation rate (% ages 15 and older)', 46: 'Employment to population ratio (% ages 15 and older)', 47: 'Employment in agriculture (% of total employment)', 48: 'Employment in services (% of total employment)', 49: 'Working poor at PPP$3.20 a day (% of total employment)', 50: 'Total unemployment rate (female to male ratio)', 51: 'Youth unemployment rate (female to male ratio)', 52: 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 53: 'Gross capital formation (% of GDP)', 54: 'Gross domestic product (GDP), total (2017 PPP $ billions)', 55: 'GDP per capita (2017 PPP $)', 56: 'Gross national income (GNI) per capita (constant 2017 PPP$)', 57: 'Gender Development Index (GDI)', 58: 'Estimated gross national income per capita, female (2017 PPP $)', 59: 'Estimated gross national income per capita, male (2017 PPP $)', 60: 'Human Development Index (HDI), female', 61: 'Human Development Index (HDI), male', 62: 'Inequality-adjusted income index', 63: 'Overall loss in HDI due to inequality (%)', 64: 'Inequality in income (%)', 65: 'Coefficient of human inequality', 66: 'Inequality-adjusted HDI (IHDI)'}\n" + ] + } ], - "cell_type": "markdown", - "metadata": {} + "source": [ + "map_columns = {}\n", + "count = 0\n", + "for col in imputed_df.columns:\n", + " map_columns[count] = col\n", + " count += 1\n", + "\n", + "print(map_columns)\n", + "\n", + "#Makes it easier to check the correlations" + ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 28, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "[[26.08 13.22 36.92 ... 1.308 29.77273216\n 0.36028022]\n [30.232 23.133 38.056 ... 0.906 31.733\n 0.397 ]\n [93.174 93.7 92.497 ... 0.799 10.893\n 0.708 ]\n ...\n [75.478 74.977 78.207 ... 1.161 31.163\n 0.468 ]\n [44.44 38.488 54.068 ... 1.079 30.592\n 0.401 ]\n [64.935 59.792 70.783 ... 1.269 22.525\n 0.441 ]]\n(195, 67)\n(195, 67)\n" + "[[ 26.08 13.22 36.92 ... -14.31206116 19.79533568\n 0.42346498]\n [ 30.232 23.133 38.056 ... 28.9 31.733\n 0.397 ]\n [ 93.174 93.7 92.497 ... 13.179 10.893\n 0.708 ]\n ...\n [ 75.478 74.977 78.207 ... 56.996 31.163\n 0.468 ]\n [ 44.44 38.488 54.068 ... 44.84 30.592\n 0.401 ]\n [ 64.935 59.792 70.783 ... 28.769 22.525\n 0.441 ]]\n" ] } ], "source": [ - "x = final_df.loc[:, final_df.columns].values\n", + "x = imputed_df.loc[:, imputed_df.columns].values\n", "print(x)\n", - "x = StandardScaler().fit_transform(x)\n", - "\n", - "print(final_df.shape)\n", + "x = StandardScaler().fit_transform(x)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(195, 67)\n(195, 67)\n" + ] + } + ], + "source": [ + "print(imputed_df.shape)\n", "print(x.shape)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": "array([[-1.213211 , -1.49949287, -0.95026927, ..., -3.68962647,\n 0.08912291, -0.98754125],\n [-1.06825946, -1.17214393, -0.90913571, ..., 0.45316976,\n 1.47092771, -1.13989063],\n [ 1.12912485, 1.15813273, 1.06212486, ..., -1.05402286,\n -0.94133748, 0.65042431],\n ...,\n [ 0.51133525, 0.53985833, 0.54469657, ..., 3.14676964,\n 1.40494925, -0.73116921],\n [-0.57224033, -0.66508825, -0.32935532, ..., 1.98135819,\n 1.33885503, -1.11686407],\n [ 0.14326588, 0.03841642, 0.27588008, ..., 0.44061062,\n 0.40508616, -0.88659848]])" + }, + "metadata": {} + } + ], + "source": [ + "display(x)" + ] + }, + { + "source": [ + "### We want to check if the mean of the normalized dataset is 0 and std is 1\n", + "### It looks like it" + ], + "cell_type": "markdown", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": 31, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "(-1.713134035701734e-17, 1.0)" + "(-6.274693313304368e-17, 1.0)" ] }, "metadata": {}, - "execution_count": 21 + "execution_count": 31 } ], "source": [ "np.mean(x), np.std(x)" ] }, + { + "source": [ + "### To show the normalized data" + ], + "cell_type": "markdown", + "metadata": {} + }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 32, "metadata": {}, "outputs": [ { @@ -1436,167 +1650,167 @@ "data": { "text/plain": [ " Population with at least some secondary education (% ages 25 and older) \\\n", - "0 -1.242745 \n", - "1 -1.094493 \n", - "2 1.152920 \n", - "3 0.408555 \n", - "4 -0.133071 \n", + "0 -1.213211 \n", + "1 -1.068259 \n", + "2 1.129125 \n", + "3 0.401330 \n", + "4 -0.128239 \n", "\n", " Population with at least some secondary education, female (% ages 25 and older) \\\n", - "0 -1.537407 \n", - "1 -1.202607 \n", - "2 1.180706 \n", - "3 0.430388 \n", - "4 0.014194 \n", + "0 -1.499493 \n", + "1 -1.172144 \n", + "2 1.158133 \n", + "3 0.424512 \n", + "4 0.017579 \n", "\n", " Population with at least some secondary education, male (% ages 25 and older) \\\n", - "0 -0.972184 \n", - "1 -0.930083 \n", - "2 1.087538 \n", - "3 0.377085 \n", - "4 -0.308501 \n", + "0 -0.950269 \n", + "1 -0.909136 \n", + "2 1.062125 \n", + "3 0.367996 \n", + "4 -0.301836 \n", "\n", - " Share of seats in parliament (% held by women) \\\n", - "0 0.366696 \n", - "1 0.601335 \n", - "2 0.559447 \n", - "3 2.000054 \n", - "4 1.442235 \n", + " Mean years of schooling, female (years) \\\n", + "0 -1.941779 \n", + "1 -1.318987 \n", + "2 0.385514 \n", + "3 0.606718 \n", + "4 0.812015 \n", "\n", - " Vulnerable employment (% of total employment) Urban population (%) \\\n", - "0 1.554740 -1.443911 \n", - "1 1.044887 0.299618 \n", - "2 0.556868 0.083835 \n", - "3 -0.832710 1.240433 \n", - "4 -0.595953 1.413060 \n", + " Mean years of schooling, male (years) \\\n", + "0 -1.112677 \n", + "1 -0.984083 \n", + "2 0.565966 \n", + "3 0.547752 \n", + "4 0.607859 \n", "\n", - " Labour force participation rate (% ages 15 and older), female \\\n", - "0 -2.035680 \n", - "1 1.612659 \n", - "2 -0.355562 \n", - "3 0.155448 \n", - "4 -0.087393 \n", - "\n", - " Labour force participation rate (% ages 15 and older), male \\\n", - "0 0.233312 \n", - "1 0.733974 \n", - "2 -0.953924 \n", - "3 -0.052027 \n", - "4 0.006454 \n", - "\n", - " Remittances, inflows (% of GDP) \\\n", - "0 -0.027719 \n", - "1 -0.796618 \n", - "2 0.835683 \n", - "3 -0.508438 \n", - "4 -0.776802 \n", + " Share of seats in parliament (% held by women) \\\n", + "0 0.361897 \n", + "1 0.596509 \n", + "2 0.554626 \n", + "3 1.995072 \n", + "4 1.437315 \n", "\n", - " Foreign direct investment, net inflows (% of GDP) ... \\\n", - "0 -0.441756 ... \n", - "1 -0.915527 ... \n", - "2 0.386758 ... \n", - "3 -0.105242 ... \n", - "4 -0.307092 ... \n", + " Adolescent birth rate (births per 1,000 women ages 15-19) \\\n", + "0 0.532975 \n", + "1 2.562666 \n", + "2 -0.694136 \n", + "3 -0.728367 \n", + "4 0.379322 \n", "\n", - " Population under age 5 (millions) \\\n", - "0 0.225085 \n", - "1 0.227833 \n", - "2 -0.259851 \n", - "3 -0.906248 \n", - "4 0.056909 \n", + " Vulnerable employment (% of total employment) Total population (millions) \\\n", + "0 1.556274 -0.009236 \n", + "1 1.051012 -0.051786 \n", + "2 0.567386 -0.249884 \n", + "3 -1.213266 -0.269075 \n", + "4 -0.575057 0.036887 \n", "\n", - " Adolescent birth rate (births per 1,000 women ages 15-19) \\\n", - "0 0.534851 \n", - "1 2.593772 \n", - "2 -0.709932 \n", - "3 -0.827685 \n", - "4 0.378985 \n", + " Urban population (%) ... Gender Development Index (GDI) \\\n", + "0 -1.443911 ... -3.764694 \n", + "1 0.299618 ... -0.482479 \n", + "2 0.083835 ... 0.378430 \n", + "3 1.240433 ... 1.606995 \n", + "4 1.413060 ... 0.728174 \n", "\n", - " Sex ratio at birth (male to female births) \\\n", - "0 0.462524 \n", - "1 -1.216161 \n", - "2 2.141209 \n", - "3 0.381230 \n", - "4 -0.656599 \n", + " Estimated gross national income per capita, female (2017 PPP $) \\\n", + "0 -0.880471 \n", + "1 -0.605187 \n", + "2 -0.241164 \n", + "3 1.807789 \n", + "4 0.001609 \n", "\n", - " Young age (0-14) dependency ratio (per 100 people ages 15-64) \\\n", - "0 1.513894 \n", - "1 2.161911 \n", - "2 -0.932226 \n", - "3 -0.952069 \n", - "4 -0.324548 \n", + " Estimated gross national income per capita, male (2017 PPP $) \\\n", + "0 -0.858135 \n", + "1 -0.720907 \n", + "2 -0.329338 \n", + "3 1.645966 \n", + "4 0.105056 \n", "\n", - " Old-age (65 and older) dependency ratio (per 100 people ages 15-64) \\\n", - "0 -0.950705 \n", - "1 -0.999777 \n", - "2 0.730564 \n", - "3 0.859705 \n", - "4 0.390002 \n", + " Human Development Index (HDI), female Human Development Index (HDI), male \\\n", + "0 -1.898855 -1.037358 \n", + "1 -0.901924 -0.907477 \n", + "2 0.509877 0.506782 \n", + "3 1.183786 0.806977 \n", + "4 0.850444 0.744897 \n", "\n", - " HDI rank Total unemployment rate (female to male ratio) \\\n", - "0 1.364735 -0.115960 \n", - "1 0.976870 -0.308338 \n", - "2 -0.482239 -0.372276 \n", - "3 -1.091741 0.128707 \n", - "4 -0.907043 -0.191780 \n", + " Inequality-adjusted income index \\\n", + "0 0.259496 \n", + "1 -0.644363 \n", + "2 0.608240 \n", + "3 0.664195 \n", + "4 0.352855 \n", "\n", - " Youth unemployment rate (female to male ratio) \\\n", - "0 -0.049773 \n", - "1 -0.485075 \n", - "2 -0.600939 \n", - "3 0.062921 \n", - "4 -0.068182 \n", + " Overall loss in HDI due to inequality (%) Inequality in income (%) \\\n", + "0 0.022060 -3.689626 \n", + "1 1.394523 0.453170 \n", + "2 -0.962154 -1.054023 \n", + "3 -0.226792 1.145381 \n", + "4 -0.645497 0.094515 \n", "\n", " Coefficient of human inequality Inequality-adjusted HDI (IHDI) \n", - "0 1.192044 -1.315801 \n", - "1 1.408239 -1.110134 \n", - "2 -0.890168 0.631779 \n", - "3 -0.981317 1.074191 \n", - "4 -0.631542 0.749400 \n", + "0 0.089123 -0.987541 \n", + "1 1.470928 -1.139891 \n", + "2 -0.941337 0.650424 \n", + "3 -0.241610 0.809411 \n", + "4 -0.669900 0.771314 \n", "\n", "[5 rows x 67 columns]" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Share of seats in parliament (% held by women)Vulnerable employment (% of total employment)Urban population (%)Labour force participation rate (% ages 15 and older), femaleLabour force participation rate (% ages 15 and older), maleRemittances, inflows (% of GDP)Foreign direct investment, net inflows (% of GDP)...Population under age 5 (millions)Adolescent birth rate (births per 1,000 women ages 15-19)Sex ratio at birth (male to female births)Young age (0-14) dependency ratio (per 100 people ages 15-64)Old-age (65 and older) dependency ratio (per 100 people ages 15-64)HDI rankTotal unemployment rate (female to male ratio)Youth unemployment rate (female to male ratio)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
0-1.242745-1.537407-0.9721840.3666961.554740-1.443911-2.0356800.233312-0.027719-0.441756...0.2250850.5348510.4625241.513894-0.9507051.364735-0.115960-0.0497731.192044-1.315801
1-1.094493-1.202607-0.9300830.6013351.0448870.2996181.6126590.733974-0.796618-0.915527...0.2278332.593772-1.2161612.161911-0.9997770.976870-0.308338-0.4850751.408239-1.110134
21.1529201.1807061.0875380.5594470.5568680.083835-0.355562-0.9539240.8356830.386758...-0.259851-0.7099322.141209-0.9322260.730564-0.482239-0.372276-0.600939-0.8901680.631779
30.4085550.4303880.3770852.000054-0.8327101.2404330.155448-0.052027-0.508438-0.105242...-0.906248-0.8276850.381230-0.9520690.859705-1.0917410.1287070.062921-0.9813171.074191
4-0.1330710.014194-0.3085011.442235-0.5959531.413060-0.0873930.006454-0.776802-0.307092...0.0569090.378985-0.656599-0.3245480.390002-0.907043-0.191780-0.068182-0.6315420.749400
\n

5 rows × 67 columns

\n
" + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
0-1.213211-1.499493-0.950269-1.941779-1.1126770.3618970.5329751.556274-0.009236-1.443911...-3.764694-0.880471-0.858135-1.898855-1.0373580.2594960.022060-3.6896260.089123-0.987541
1-1.068259-1.172144-0.909136-1.318987-0.9840830.5965092.5626661.051012-0.0517860.299618...-0.482479-0.605187-0.720907-0.901924-0.907477-0.6443631.3945230.4531701.470928-1.139891
21.1291251.1581331.0621250.3855140.5659660.554626-0.6941360.567386-0.2498840.083835...0.378430-0.241164-0.3293380.5098770.5067820.608240-0.962154-1.054023-0.9413370.650424
30.4013300.4245120.3679960.6067180.5477521.995072-0.728367-1.213266-0.2690751.240433...1.6069951.8077891.6459661.1837860.8069770.664195-0.2267921.145381-0.2416100.809411
4-0.1282390.017579-0.3018360.8120150.6078591.4373150.379322-0.5750570.0368871.413060...0.7281740.0016090.1050560.8504440.7448970.352855-0.6454970.094515-0.6699000.771314
\n

5 rows × 67 columns

\n
" }, "metadata": {}, - "execution_count": 22 + "execution_count": 32 } ], "source": [ - "feat_cols = final_df.columns.values.tolist()\n", + "feat_cols = imputed_df.columns.values.tolist()\n", "#print(feat_cols)\n", - "normalized_final_df = pd.DataFrame(x, columns=feat_cols)\n", - "normalized_final_df.head()" + "normalized_imputed_df = pd.DataFrame(x, columns=feat_cols)\n", + "normalized_imputed_df.head()" ] }, + { + "source": [ + "### Now we start with the PCA Part" + ], + "cell_type": "markdown", + "metadata": {} + }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 33, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "['PC1', 'PC2', 'PC3']\n['49.73', '7.93', '7.18']\n" + "['PC1', 'PC2', 'PC3']\n['49.70', '7.85', '7.20']\n" ] } ], "source": [ "num_components = 3\n", - "pca_final = PCA(n_components=num_components)\n", - "pComponents_final = pca_final.fit_transform(x)\n", + "pca_imputed = PCA(n_components=num_components)\n", + "pComponents_imputed = pca_imputed.fit_transform(x)\n", "component_col = ['PC'+str(i+1) for i in range(num_components)]\n", "print(component_col)\n", "\n", - "percentage_list = [element * 100 for element in pca_final.explained_variance_ratio_]\n", + "percentage_list = [element * 100 for element in pca_imputed.explained_variance_ratio_]\n", "percentage_list = ['%.2f' % elem for elem in percentage_list]\n", "print(percentage_list)" ] }, + { + "source": [ + "### PC stands for principal components" + ], + "cell_type": "markdown", + "metadata": {} + }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -1611,61 +1825,61 @@ "data": { "text/plain": [ " PC1 PC2 PC3\n", - "0 8.660925 3.855080 -0.080563\n", - "1 7.369314 -2.616149 0.028787\n", - "2 -3.287324 2.085875 -0.449373\n", - "3 -5.811149 -0.738707 -0.166838\n", - "4 -3.917583 0.676241 -0.271113" + "0 7.815705 3.771093 -0.133255\n", + "1 7.339138 -2.606812 0.272919\n", + "2 -3.214204 2.176000 -0.611012\n", + "3 -5.807156 -1.477608 -0.284501\n", + "4 -3.974595 0.524419 -0.239346" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PC1PC2PC3
08.6609253.855080-0.080563
17.369314-2.6161490.028787
2-3.2873242.085875-0.449373
3-5.811149-0.738707-0.166838
4-3.9175830.676241-0.271113
\n
" + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PC1PC2PC3
07.8157053.771093-0.133255
17.339138-2.6068120.272919
2-3.2142042.176000-0.611012
3-5.807156-1.477608-0.284501
4-3.9745950.524419-0.239346
\n
" }, "metadata": {}, - "execution_count": 24 + "execution_count": 34 } ], "source": [ - "pc_final_df = pd.DataFrame(data = pComponents_final, columns = component_col)\n", - "print(pc_final_df.shape)\n", - "pc_final_df.head()" + "pc_imputed_df = pd.DataFrame(data = pComponents_imputed, columns = component_col)\n", + "print(pc_imputed_df.shape)\n", + "pc_imputed_df.head()" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 35, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Explained variation percentage per principal component: ['49.73', '7.93', '7.18']\nTotal percentage of the explained data by 3 components is: 64.84\nPercentage of the information that is lost for using 3 components is: 35.16\n" + "Explained variation percentage per principal component: ['49.70', '7.85', '7.20']\nTotal percentage of the explained data by 3 components is: 64.75\nPercentage of the information that is lost for using 3 components is: 35.25\n" ] } ], "source": [ "print('Explained variation percentage per principal component: {}'.format(percentage_list))\n", - "total_explained_percentage = (sum(pca_final.explained_variance_ratio_)*100)\n", - "print('Total percentage of the explained data by',pca_final.n_components,'components is: %.2f' %total_explained_percentage)\n", - "print('Percentage of the information that is lost for using',pca_final.n_components,'components is: %.2f' %(100-total_explained_percentage))" + "total_explained_percentage = (sum(pca_imputed.explained_variance_ratio_)*100)\n", + "print('Total percentage of the explained data by',pca_imputed.n_components,'components is: %.2f' %total_explained_percentage)\n", + "print('Percentage of the information that is lost for using',pca_imputed.n_components,'components is: %.2f' %(100-total_explained_percentage))" ] }, { "source": [ - "## 3 Main Principle Component is presented" + "### Outliers are a big problem as it can be seen from the graph" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 36, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "{'0': 'PC1 49.73%', '1': 'PC2 7.93%', '2': 'PC3 7.18%'}\n" + "{'0': 'PC1 49.70%', '1': 'PC2 7.85%', '2': 'PC3 7.20%'}\n" ] }, { @@ -1677,7 +1891,7 @@ }, "data": [ { - "hovertemplate": "PC1 49.73%=%{x}
PC2 7.93%=%{y}
PC3 7.18%=%{z}", + "hovertemplate": "PC1 49.70%=%{x}
PC2 7.85%=%{y}
PC3 7.20%=%{z}", "legendgroup": "", "marker": { "color": "#636efa", @@ -1689,595 +1903,595 @@ "showlegend": false, "type": "scatter3d", "x": [ - 8.660925373030112, - 7.36931441039187, - -3.287323774612212, - -5.811149454434806, - -3.917583415567073, - -2.9338836176267127, - -1.3634938862156527, - -8.95719329714002, - -8.061102744290617, - -2.0143966716041626, - 9.518730559247157, - -8.83584022021243, - 8.101931475789897, - 9.390221152113469, - 3.7920355348972397, - -4.533593406252421, - -4.360984547988361, - -3.512033805407493, - -2.693074770407761, - -5.187274735018256, - 0.15634895125033405, - 1.327148550243007, - -0.5915289853603273, - -3.651639000535378, - -4.506113576087406, - 3.628606254985032, - 0.4220843585147365, - 12.680295441880574, - -7.967091555964078, - -4.194571416724602, - -2.275633082655154, - 8.366996344735632, - 7.300290963343796, - 8.926587475681194, - 5.557030223889865, - -0.5358221359930567, - 7.389569574497576, - 2.0766840364109256, - -2.7277717561457733, - -3.4448467607081747, - -6.890528980649567, - -7.618664623703192, - -8.793588332529188, - 5.030759542043407, - -0.8133041827675904, - -9.048869551995287, - -0.3224865164826, - -0.11270737710548943, - -0.21558223396562975, - 1.706906391115579, - 8.221535016841335, - -7.165904482034793, - 8.698629888933718, - -9.324042146819695, - -0.4363534601027277, - -7.403491382198684, - 2.137625968690034, - -3.2522807297427105, - 4.437806353565892, - 7.79742829320645, - 6.402924375558027, - -6.254361937358284, - -0.24840011661343397, - -5.9743490272615185, - 4.43695230324791, - 1.9170572823945042, - 4.8116043036306495, - 13.0309792014796, - 4.56048084557989, - 10.124601509654775, - 9.464432864991926, - 3.332273020710495, - 1.334235774731984, - -8.677829019013167, - 3.566237278455438, - 8.102761520239902, - -5.990221066605703, - 1.0212879422538246, - 3.799763414682114, - -8.961571182924617, - -2.0116371496465018, - 2.3452054287494515, - -8.998425604718578, - -7.192573125538015, - -6.69926394386997, - -0.6197240202964007, - -0.8871561941125945, - -8.160398493492213, - -4.375289152115088, - 5.384390076964414, - -0.7368806438860154, - 3.8542782035866976, - -6.959926349684004, - -2.912873268210257, - 5.11979306543985, - -0.8804355671773069, - 8.221737810503074, - -0.2959305965655109, - -6.745582778644443, - 6.607815336068809, - -6.665402163818996, - -10.22624722126282, - -6.383178963211472, - 1.6061607812886278, - -0.923072842836434, - -2.1045430816121162, - 7.742286990664149, - -0.3249499652865133, - -1.0545194081043436, - -0.8793745013263952, - -2.315936291337086, - 11.171093857013355, - -7.8586969724826385, - 5.10063014743016, - -4.671798219956093, - -1.1803364178532467, - 9.495409105780801, - 7.274862403400485, - -2.911851692298899, - 7.592600143178789, - -3.3239892133995954, - 4.158783210879166, - 11.659981452804312, - 8.106398056165082, - 2.082527374112786, - -8.882946614749645, - -9.40254395453483, - 4.960564659314305, - -0.45350878877708417, - -8.014766635562616, - -3.0901261913689666, - 6.6922476770592, - -4.707503064696859, - 0.7571221305201962, - -0.1676971430788005, - -5.956312959493098, - -9.131681154878528, - -6.951574928918852, - -7.842286950536407, - -1.0862985842844837, - -0.553407667927873, - -1.7756904913436278, - -2.284128602738151, - -1.0306200985471243, - 0.686392192277255, - 7.906495025661678, - -6.386655189132161, - -5.633462144999635, - 0.9379578630417558, - -5.340853869141102, - -4.103711688500217, - -4.6722331884107575, - 6.607486409903617, - -4.249172805924318, - 7.790891416947143, - 7.429333092490025, - -8.868267693969555, - 4.2617616621659735, - 9.739984460067737, - -1.2982756071941874, - 6.6997542096048734, - -4.044533181810401, - 11.37384943365761, - 2.9989471218939494, - 0.43928328385974, - -6.105665614117344, - -8.093367975526451, - -9.18598504844754, - -2.4745554817040665, - 4.837127138188524, - 7.551378070484332, - -1.2843924193698983, - 0.820800722755169, - 0.7473458450490823, - 5.365815568843352, - -0.7597514742681404, - -2.4373413206171173, - -0.7138582435099253, - -2.705881390776994, - 0.31476642032113766, - 7.71333296545075, - 7.186259568291081, - -3.7978657071531776, - -3.3979525762581626, - -7.75882743273823, - -0.5968403419780313, - -0.5838730219221859, - -0.007916107619517623, - -0.2041579547749093, - 4.88102780317078, - 0.015884491794824422, - 9.204539209440739, - 1.4973101808646454, - 6.220218114546498, - 5.442054166264446 + 7.815705391742462, + 7.339137705703802, + -3.214203916269259, + -5.80715643866871, + -3.9745954619497295, + -2.890966092672859, + -1.1134671652005752, + -9.02976272407463, + -8.089903395153193, + -1.9800675811561705, + 9.527366508949298, + -8.814781431817284, + 8.057536998694976, + 9.389458414018907, + 3.8049608938917885, + -4.5411540540591, + -4.23920966194199, + -3.1588756354356065, + -2.6858306608095894, + -5.136335371331304, + 0.12810416951246514, + 1.267042524569538, + -0.6885217978448994, + -3.65835391674737, + -4.182557988623389, + 3.7408392194080093, + 0.20400898125735614, + 12.56978154058819, + -8.050563036925059, + -4.288361890597987, + -2.3582248937799273, + 8.327761469360418, + 7.2752883396901575, + 8.856710810591405, + 5.533077430318204, + -0.6019520590967266, + 7.294069988360144, + 1.8970704667026417, + -2.788007359243617, + -3.0580619971246326, + -6.588294978619638, + -7.573929299906972, + -8.902789615997523, + 5.372849242136768, + -0.5339264068916624, + -9.082419175322016, + -0.3654413763172631, + -0.11787503537257654, + -0.26730624222407656, + 1.6301659735629148, + 7.904739443603567, + -7.157833186894553, + 8.762975653218588, + -9.194067425167436, + -0.3051455870997747, + -7.3806651712145515, + 2.1534605695235087, + -3.1876558033358795, + 4.47778550921977, + 7.862744607195441, + 5.901182830324904, + -6.354542916764469, + -0.4067551835511576, + -5.970468538879629, + 4.566984752150551, + 1.9292708308075317, + 4.772592533434519, + 13.069788552035853, + 4.367579870283229, + 10.18771836172564, + 9.546609925194428, + 3.268280023564391, + 1.3641510634203449, + -8.959144626756714, + 3.5239748566941724, + 8.09574152733288, + -5.997380117994976, + 1.0110406969635937, + 3.785140734566966, + -9.092556100865856, + -2.093224510897799, + 2.313017712713081, + -9.083314846698999, + -7.18731043401891, + -6.72232805979924, + -0.6159066258325677, + -0.9248226691956368, + -8.241936643448538, + -4.3512784393720105, + 5.314985585514629, + -0.634269111601688, + 3.6412780541396295, + -7.021759781450271, + -2.7879093551636958, + 5.21499904167344, + -0.5048660224959203, + 8.250222773015107, + -0.028671322512146755, + -10.239720758557572, + 6.6673450911698335, + -6.599811015887725, + -9.075382520720181, + -6.347740337271529, + 1.5171468658736802, + -1.362051712582059, + -2.037529218145667, + 7.7171716974342415, + -0.32339256579989906, + -1.1282922477603508, + 0.21885241143556144, + -2.31846231910023, + 11.205339947133302, + -7.361576598086409, + 4.790180540615756, + -4.5948425662985315, + -1.0509994004013095, + 9.471266868062813, + 7.273411428415602, + -2.89776118464564, + 7.513645783446732, + -2.9246343517899582, + 4.019324064053343, + 11.673093122705474, + 8.071012495061858, + 2.0699265091720696, + -8.997003865902158, + -9.445122207161843, + 5.101055040274665, + -0.21329078140762486, + -8.094356897681944, + -3.150516903237161, + 6.676900922868478, + -3.9195141665897246, + 1.8540679921789573, + -0.15376162236217705, + -5.648809454506782, + -9.218137325881434, + -6.979610366647846, + -7.885858153911095, + -1.4386898053967137, + -0.6233359470292887, + -1.7593857873642393, + -2.344725313882094, + -1.0534229727076467, + 0.6654668206170398, + 7.887735069516859, + -6.38882597077141, + -5.6397559950521154, + 0.8455120890389446, + -5.250593981881587, + -4.112425219809245, + -4.681161480300002, + 6.522331966196904, + -4.023280194965856, + 7.742324860246996, + 7.352758852805935, + -8.962687133514889, + 4.31975979988239, + 9.705167181253548, + -2.484810312171407, + 7.561131630567262, + -4.017490652651401, + 11.093618100157807, + 2.9909862541842602, + 0.28649873104569173, + -6.073720961255064, + -8.067662923582365, + -9.228762918666252, + -2.572977914669187, + 4.497489633099259, + 7.555490434805937, + -1.3302502498349542, + 0.889150114494581, + 0.6492983535956304, + 5.372302012623104, + -0.3836415766204878, + -2.3372990413558163, + -0.7046133426963144, + -2.7672631482209042, + 0.7779732705161404, + 7.679315506536789, + 7.212648012351597, + -3.717171194809398, + -3.443218965335672, + -7.9884355813861525, + -0.15437542776681207, + -0.3252988281038899, + -0.04736873525040834, + -0.06581490480457773, + 4.876186643841398, + 0.36241169157713116, + 8.959685526258205, + 1.3513180314927358, + 6.150885694615564, + 5.4789655277814875 ], "y": [ - 3.855079906757912, - -2.6161491630776537, - 2.0858745119674817, - -0.7387070741223745, - 0.6762411291710205, - 3.2612679019976785, - -0.7597612819340497, - -1.5132518573724385, - -1.3141138830707781, - -0.4603812837704453, - -4.176798797673366, - -0.17145939483571848, - -2.3552322867774333, - -1.0701328964463008, - 0.8702761364071849, - 0.44236870815169077, - -1.9358357839167768, - -0.875340692809248, - 4.195432845157893, - -1.2376211881520474, - 0.12409870870908377, - -2.1090907082786456, - 1.685357352839291, - 0.6758843835084245, - -0.4501378746479654, - -1.066802136251232, - 0.6634853407176996, - -1.7623622324432309, - -1.1554207697853431, - -0.06063224652875297, - 3.3758883351636406, - 0.4512264379143446, - -2.456638459417957, - -0.43679824747548973, - -0.7382688818718947, - -0.2520532911701617, - 2.750530427456509, - 1.125146783490973, - 0.7892791006156633, - 0.5765882273665656, - -0.6854056696141374, - -1.279685771976567, - -1.1825978137443862, - 1.3222286500907285, - 0.11745983009345888, - -1.542437895261929, - -0.2782375674209586, - 3.937745783870189, - -1.3320803207099212, - 4.169856066791524, - -2.2477669326690903, - -1.150485868378693, - -3.8712181278625093, - -0.7022335549777545, - 0.5420963052877399, - 0.4678926303972273, - 3.4403713919822887, - 0.4863806254463764, - -0.6238378308545871, - 1.343829136369984, - 0.14409084055530824, - 2.5343927756276208, - -0.5526272432620005, - 1.2248475756753234, - -3.7675068565379526, - 0.5204095710567854, - 3.8376607993496474, - -2.0760363685139835, - 0.5644581810204278, - -1.1459815564721405, - -2.041770332342056, - -0.004542519610788543, - 1.7031900127353492, - -1.3742218685131904, - -0.7147485799669284, - 1.4881169379618266, - -0.19050682488166057, - -0.14475243097201565, - 6.360968776083738, - -2.078243330810698, - 4.14095498498866, - 5.227145165733215, - -3.3612305563899025, - -1.1255274387743974, - 2.1625635432281007, - 0.4396840547361396, - 5.751134848691572, - -0.9000703677554964, - -1.7793354355241953, - -2.4139451622163732, - 0.8205060831759692, - 0.7039545268141139, - -0.7726894334348547, - -1.5249528470778462, - -3.868128583078392, - 3.202644291798853, - -2.962651113308873, - 4.430985736188797, - -2.4314062342671723, - 1.3807182968551648, - -0.7449036382521768, - -0.8069094474868895, - -0.6759332084903706, - 3.0748583207932327, - 0.1405897640815465, - 2.392095650546612, - -5.076137879528213, - 0.1097660154101488, - -0.09293666247089191, - 0.6138002511135776, - 2.3161173720426596, - -1.071210529898813, - -0.19711844410400842, - -0.9021406931148502, - 2.1673739587776044, - -0.19060429757994993, - -3.685341829885408, - 2.7174995622045883, - 0.7284784869926412, - -3.0190587027490117, - -0.7989147485071739, - 2.285982156684738, - -2.7164474192473183, - 2.0824407561550218, - -0.9369710311952276, - -2.092283307281979, - -2.184152854419849, - -4.04801007971819, - 0.6121619616567823, - -2.096903142825676, - -0.449370857527864, - 3.246313330481864, - 0.2072632677881261, - -3.526650756993172, - 5.912620412600382, - -3.2069462495949463, - -2.6355919352137627, - 1.1560393798870965, - -0.810113928458289, - -0.6400475866863665, - 1.6073144480858566, - 1.6113479644274307, - -1.4330355400833183, - -2.5206657362497764, - 0.1919453712673838, - 1.7782666634371787, - -0.22981003650282733, - -0.4233650947364542, - -1.3739336823800556, - -4.744448285298656, - 0.41078286576938844, - 0.2198503546716565, - -4.949990753643417, - 1.599525194878205, - 3.9230026623695577, - 2.094233447191595, - -3.308099746818885, - -4.3619148663696246, - -0.8069372215208568, - -0.31619288705894766, - 2.2622864147208737, - 1.7822106604828154, - -0.3317218431250816, - 1.3483469394442205, - 2.043981599768457, - -0.3321920606619467, - -0.7869185545988505, - -1.5467856892585568, - -0.25708760611913994, - 4.164638837124573, - -3.2659992541243765, - -1.9281826961005137, - 4.136589214412604, - -0.6703021414154839, - -1.5635311694465044, - -0.44951947244297247, - -0.3761386627777358, - 3.9764711009886002, - 2.3419627959783003, - 0.6089978678769945, - -4.338425696323253, - -2.5225599257667737, - 1.6428508719923456, - 0.12007001975983786, - 0.24402194312458406, - -0.2773245069485185, - 1.628382866958211, - 1.0067797556961153, - -2.66536832367383, - -1.4735063838523634, - 3.620964936225569, - 6.36265096697167, - 4.446035444782243, - -1.7217799443553965, - -3.5869356337268425 + 3.7710930700377343, + -2.6068122483972225, + 2.1759998382097714, + -1.477608137355973, + 0.5244193644395958, + 3.2222414860008306, + -0.6243507824918618, + -1.4525585842358641, + -1.2454320204471736, + -0.3197983036265329, + -4.237515714493149, + -0.1745838558446209, + -2.3382043958696803, + -1.0001205825719757, + 1.111163226050274, + 0.36914800662604874, + -2.0102061441495627, + -0.9596191176361444, + 4.114508502610506, + -1.1129139648683155, + -0.04650566248863399, + -2.1837767842915867, + 1.59071944797144, + 0.47363682832027093, + -0.3555564643680984, + -0.9772141578971821, + 0.6748364196510189, + -1.852551303827457, + -1.094111964537084, + -0.13869719837063346, + 4.666867362889493, + 0.3930464320109389, + -2.5179622696437893, + -0.32093333784864697, + -0.7806349080444681, + -0.32532378405232726, + 2.3955915410128226, + 1.141869680698887, + 0.5613822279535107, + 0.6199184188077461, + -0.7912863453361185, + -1.170416996964333, + -1.0513991436296977, + 0.9590765521071616, + -0.27242128263812415, + -1.5085111318643667, + -0.3085735129738893, + 4.131216809952003, + -1.3260495297428727, + 4.055202391554322, + -2.2912905440280102, + -1.047719714753937, + -3.561210590917475, + -0.7534647588303485, + 0.521686291212891, + 0.5153572670991243, + 3.2982843117596703, + 0.5439050810776036, + -0.6139123293485069, + 1.24966754387615, + 0.08283426661537716, + 2.4205953671128966, + -0.8620360299059099, + 1.218415315069224, + -3.702394948803829, + 0.44234956852966806, + 3.479352348848602, + -2.0858220432044217, + 0.825736813216072, + -1.019524143668798, + -2.2098267746146685, + -0.18289127807047462, + 1.783105542226986, + -1.584742368267455, + -0.7885358100669981, + 1.4064754574794345, + -0.06121314024465169, + 0.20217916802341582, + 7.474788743257434, + -1.7492366112861004, + 4.134103049603261, + 5.069571363427267, + -3.3111111011242764, + -1.1911420440298752, + 2.085456665139465, + 0.3894734380967889, + 5.668793789144949, + -0.558134535111894, + -1.6454439632996396, + -2.4859943981787693, + 1.00599629341969, + 0.3529129239948134, + -0.6009508210767537, + -1.4789193673035914, + -3.7748892487898007, + 3.100974039846159, + -2.897771044300527, + 4.298572957046066, + -1.4505126708965623, + 1.3700733845045165, + -0.8428440174556316, + -1.4758330493933198, + -0.6967325849637968, + 3.1668800161854556, + -0.23873747865098688, + 2.4856432233399466, + -4.9921256886164125, + 0.16518106042801775, + -0.11171481571152728, + 1.0949247613992086, + 2.3302094506334567, + -1.0266218714485222, + -0.5290480011503546, + -0.8344495211845591, + 2.2091109582233983, + -0.10605596618334574, + -3.553147300917237, + 2.8083552413960424, + 0.6479697790399467, + -3.1489644344238026, + -0.7947752862465971, + 1.8864930596369995, + -2.5018749242658975, + 2.1472048210075987, + -1.066223004796494, + -2.0100362941854204, + -2.0854082137719283, + -3.6505235452230527, + 0.26715344146391684, + -2.0767724469085764, + -0.4691622797649216, + 3.3168574113875895, + 0.1298244177503368, + -3.4670042691428264, + 5.789549947812651, + -3.3078602462065954, + -2.569035394856519, + 1.0984202420704408, + -0.8037305700455658, + -0.6636828835248264, + 1.6051363516612027, + 1.671485719666504, + -1.518616085313642, + -2.5393250747189424, + 0.3120517966570482, + 1.787750467826509, + -0.19724141628178163, + -0.4943330634356602, + -1.4935380283703719, + -4.649245770451785, + 0.4345804417117196, + 0.3501036381156643, + -4.949826018260202, + 1.5072756647611225, + 3.7003818677917777, + 2.1195330631670433, + -3.4129919044965176, + -4.081080733037543, + -0.8384739520601534, + -0.7090983114822332, + 2.5751290648726224, + 1.735506551700908, + -0.39572799630852773, + 1.2958850189263693, + 1.8970870955716783, + -0.308620040638708, + -0.7589333585377616, + -1.4816789461586213, + -0.904686467685462, + 4.104837364351266, + -3.1929672847548205, + -1.8159181491685787, + 4.114620300186373, + -0.7798587119233383, + -1.5540163204104063, + -0.44774452331140946, + -0.42143364507485415, + 3.9104734934031127, + 2.3841210940811215, + 0.3755125442647771, + -4.223587424909779, + -2.4465322252575294, + 1.6310274245445033, + -0.0028246323915299227, + 0.6216299027178394, + -0.014010365301508051, + 1.594650154053963, + 1.0192261832505012, + -2.4447386976502656, + -1.2992800194951994, + 3.5555081366304244, + 6.260046043530518, + 4.073331971606124, + -1.6167691506707658, + -3.5674654661302934 ], "z": [ - -0.08056349942375234, - 0.028786945680939334, - -0.44937341234933376, - -0.16683803717768994, - -0.27111339571903237, - -0.8468634775668962, - -0.5502370446162456, - 0.4632032199948351, - 0.017798805062920902, - 0.571825806803118, - 0.8610933028242622, - -0.41718466981031516, - 0.16298126000548457, - 0.0792910898343927, - 1.8128567388748213, - -0.5915769084573904, - -0.6600713773871396, - -0.8618537879404934, - -1.3168602103635203, - -0.22386892528755858, - -1.2014952503028953, - 0.07455281400208881, - 1.9166641224580574, - -1.2135979612511367, - -0.6447006133013446, - -0.12282338586990073, - -1.3674628062622514, - -0.20112813577814573, - 0.4490646679359641, - -0.30546512881185595, - 21.075170142106238, - -0.3461901436442329, - 0.028559610619205766, - 0.7183639705212421, - -0.8787618828044577, - 0.12710832268882694, - -0.9921767250704747, - -1.4080096325967255, - -0.8219680893706439, - -0.16275604933559068, - -1.1533739954894404, - -0.0337917509503916, - 1.6799185317306151, - -1.6512330639034185, - -0.7302482647171719, - -0.057800768168225665, - -0.5793990807734664, - -0.3083473792952449, - 0.06495870174368194, - 0.26669955368531123, - 0.47673235544920756, - -0.36120354829237655, - 2.041100499830188, - -0.4143178336544228, - -0.6015274112600257, - 0.7478337195452976, - -1.8828288126503987, - -0.7500800314342261, - -0.12798166920024825, - -1.1867701805068653, - -1.1196282252328504, - -0.6493267239833703, - -0.43756308949814743, - -0.7533933663839528, - 0.4302699784022649, - -0.7319465680265883, - -2.2780987950134697, - -0.06682395650749673, - -0.4419775297297648, - -0.3445492341786709, - -0.11254660017123147, - -0.18043721696072984, - -1.2245532604994218, - -0.4508568352724152, - -0.5245332955884419, - -1.4928953735994406, - -0.3800223951921673, - 3.3336995724742122, - 16.58065950845859, - -0.10654620050939036, - 0.008192649556087934, - -0.6423735209945476, - 0.152297292659392, - -0.36560878774545547, - 0.49191207289471833, - -1.1099575355724403, - -1.7319271224030202, - 2.7873230415457813, - 0.3265442671574655, - 0.5945942363898596, - -0.9086831018342412, - -0.5682675113500351, - 0.9809557365077258, - -0.2955124194307041, - 0.5430900659914154, - -0.875669448080304, - 0.015598845436188625, - -1.7110254542689507, - 0.7159377765562558, - -2.110361788242772, - -0.6140117208816129, - -2.2213300253252046, - -0.4855754948258507, - -0.020397403392057183, - -0.6674547258338281, - -1.0322834653250168, - 0.841765444160154, - -0.16573915738325531, - 1.3532932568239346, - -1.0306742402186815, - -1.1673487760360945, - 0.055627548134166156, - -1.0807593074388993, - 0.2489197605289283, - -1.2548555213743704, - -1.1222118522352336, - 0.37543854283410066, - -0.8582102171406314, - -0.8972184590567043, - 0.0557980004106488, - 0.2075716873589323, - -2.118355272391502, - 0.8960991584668245, - 1.619249911125445, - -0.48425138660513256, - 0.20873183469256704, - 0.13649392700008095, - 1.1043508356549243, - -0.7750278052214509, - -0.03990788295694306, - -0.3456984462829911, - 2.7893599209321964, - -0.8224626080103721, - 1.0510801949597657, - -1.8906202128098901, - 0.1765606034112846, - 0.19411942215412678, - 0.1833261337225062, - 1.0667397058802544, - -0.2867439193164209, - -1.4817313055181802, - -0.43374751726861893, - -0.4787152619003991, - 0.48739304026159463, - 0.7960149868538892, - -0.1506706339034871, - 0.342128657741293, - -0.10838198808233662, - -0.2324437265816887, - 0.23903962222116307, - -0.10779761283468126, - 1.9882259122761288, - 0.7137857571409595, - -0.38100084383652005, - -0.7369267139576294, - -0.5866833474499575, - -0.2601928237466296, - 0.7355547951926357, - -0.3915917226406725, - -0.6608095025394163, - -0.4820656942062738, - -0.890245666833216, - -0.2379812125363602, - -1.1282432906633666, - -0.9714088903315417, - -0.6977684963639371, - -0.3478849704694565, - -0.01431592220793677, - -0.9684387291859726, - -0.6535085737776317, - -0.06077323296566774, - 1.2253400465197497, - -1.088695983798872, - -0.3074576335207782, - 0.06497871050632115, - -0.5866301760000734, - -0.6938107871003306, - -0.9784676321572428, - 0.5843442289245169, - -0.7814587484817813, - 1.068832535600227, - 0.5516405641878677, - -0.37564916203542914, - -0.6494838123648803, - 7.139715614122555, - 0.003818061511962325, - -1.5702626196716343, - -0.6436689069476974, - 1.7452512841104453, - 0.21842118139393515, - -1.1804007080469503, - -0.48422171683216453, - -1.8277115375834077, - -0.3770607910856339, - -0.14827461968716107 + -0.1332549867679698, + 0.27291870991937145, + -0.611012267273753, + -0.2845008401372673, + -0.23934607177095424, + -0.989885897143346, + -0.5913310286751665, + 0.4918869311562419, + 0.02152328028019099, + 0.6527665776802173, + 1.1779896925604951, + -0.4708636708683358, + 0.2956680935739928, + 0.12663008610763415, + 1.6942631052609094, + -0.5648214463106873, + -0.6120798399280499, + -0.8126412115199241, + -1.5597933259979462, + -0.20172561562931726, + -1.047714000388964, + 0.2833283572551736, + 1.947988206124562, + -1.0940250957737738, + -0.700773597471764, + 0.08586194539976243, + -1.4115772304017518, + -0.08515973093916311, + 0.4719010931574096, + -0.34883138810793957, + 20.966696914227896, + -0.3274459280580112, + 0.29271254763675336, + 0.6196062751105513, + -0.8265694284581329, + 0.19528142811908988, + -1.0421850982750887, + -1.4945598964235005, + -0.7987533916470302, + -0.4414013258569815, + -0.9833877041475038, + -0.014107758956231299, + 1.6975210715833469, + -1.3432928236773465, + -0.5691741516819883, + -0.016253383951449877, + -0.5609881949423667, + -0.7751208351141133, + 0.14824692386359914, + 0.00927794382893188, + 0.3179320083242132, + -0.3576194520490325, + 2.211293354678765, + -0.40672738166071587, + -0.6075844208483971, + 0.6765252189015973, + -2.020671173119792, + -0.8020750071696471, + 0.04334994483721389, + -1.086057747558172, + -0.8670786486384499, + -0.7475883165995749, + -0.4853487655958365, + -0.83531533811805, + 0.734179483116577, + -0.6696378871532909, + -2.387871871639611, + 0.2056780509902696, + -0.6271404340494551, + -0.3105679935654806, + 0.15434282400106256, + -0.041509387748856236, + -1.3518728132750355, + -0.30979810793800244, + -0.4076506800725293, + -1.4368858634288142, + -0.439837150288455, + 3.284033180624785, + 16.222883878857836, + -0.2875562816754254, + -0.3592141552110312, + -0.881347200571578, + 0.3237967022487916, + -0.299888668387499, + 0.3693021525982787, + -1.0364700904846793, + -2.1255576816295476, + 2.884194216499615, + 0.41893252327431185, + 0.7829805728736471, + -0.9766590654787115, + -0.6126549175381664, + 0.8963086398057337, + -0.06684147199340751, + 0.9059223522651572, + -1.166783909012498, + 0.2518120483509488, + -1.9813558678292322, + 0.5081446050076046, + -2.179479213880696, + -0.521918288574282, + -1.5747977920674756, + -0.43605984926764313, + -0.2761240963499135, + -0.6427173988235734, + -1.2010568896745912, + 1.0894778343477631, + -0.10654044827049912, + 1.4019091347689008, + -1.0265730834270628, + -1.3601266111839339, + 0.15646587786945004, + -0.9382090527097158, + 0.5053523595060782, + -1.4667694835270888, + -1.1604490104069942, + 0.5150624787802507, + -1.1027292782221167, + -0.839105415019863, + 0.2776553132752387, + 0.21503674198584916, + -2.0704733945969522, + 0.9887373441404025, + 1.5217215798291597, + -0.3220752306439535, + 0.28679952660295216, + 0.1587864411478722, + 1.290438072221053, + -0.6496490293445591, + 0.002632923994493077, + -0.3760188151867507, + 2.7115844513505634, + -0.9828777983239214, + 1.083015524634079, + -2.2842817622522524, + 0.4199335852091797, + 0.2738786451283277, + 0.10490493644303588, + 1.1314337861119779, + -0.35609718357504794, + -1.5997477704369565, + -0.5515669205914375, + -0.3926802265739144, + 0.701690271753913, + 0.7841817274062157, + -0.2575142832338854, + 0.3640003794240824, + -0.05399275040865746, + -0.06743258867823183, + 0.3163775826699158, + -0.1462798928758705, + 1.9942716803820362, + 0.9359999525922797, + -0.5623556831453945, + -0.8469774592591988, + -0.8779543598166668, + -0.011611440250646143, + 1.0550222920744383, + -0.3923020976890576, + -0.7590866947217605, + -0.9289791128963573, + -0.9967515617595287, + -0.47678452948726496, + -1.2313850212238613, + -1.0910139483733312, + -0.6840759561480423, + -0.31184363442804364, + 0.004357646405543191, + -0.7426386538844539, + -1.3054675039254935, + 0.1566801829898895, + 1.368288604939323, + -1.3662589846026323, + -0.3096134232154807, + 0.1770769632986913, + -0.5471472073762823, + -0.6466621016673111, + -1.248041765770726, + 0.3939036866698287, + -0.6813283958170979, + 1.2742176082427117, + 0.744638907265581, + -0.39433976596142706, + -0.5799619618503732, + 7.060467356342127, + -0.2211897015322298, + -1.5809316826536888, + -0.80120105935382, + 2.0811505851585417, + 0.26892275492564527, + -1.480500416081267, + -1.3618537976781397, + -1.970376263321043, + -0.34348196426888905, + 0.18144894292221464 ] } ], @@ -2298,17 +2512,17 @@ }, "xaxis": { "title": { - "text": "PC1 49.73%" + "text": "PC1 49.70%" } }, "yaxis": { "title": { - "text": "PC2 7.93%" + "text": "PC2 7.85%" } }, "zaxis": { "title": { - "text": "PC3 7.18%" + "text": "PC3 7.20%" } } }, @@ -3119,7 +3333,7 @@ } }, "title": { - "text": "Total Explained Variance: 64.84%" + "text": "Total Explained Variance: 64.75%" } } } @@ -3135,7 +3349,7 @@ "print(l_dict)\n", "\n", "fig = px.scatter_3d(\n", - " pComponents_final, x=0, y=1, z=2,\n", + " pComponents_imputed, x=0, y=1, z=2,\n", " title=f'Total Explained Variance: {total_explained_percentage:.2f}%',\n", " labels=l_dict\n", ")\n", @@ -3144,26 +3358,26 @@ ] }, { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], "source": [ - "## Conversion of the Dataset to CSV" - ], - "cell_type": "markdown", - "metadata": {} + "imputed_df.to_csv(\"../data/unlabeled/preprocessed/hdro_preprocessed.csv\")" + ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "final_df.to_csv(\"../data/unlabeled/preprocessed/hdro_preprocessed.csv\")" - ] + "source": [] } ], "metadata": { "kernelspec": { - "name": "python374jvsc74a57bd0dca0ade3e726a953b501b15e8e990130d2b7799f14cfd9f4271676035ebe5511", - "display_name": "Python 3.7.4 64-bit ('base': conda)" + "name": "python394jvsc74a57bd01e002e48d41ce7b93ab532133c559bfbfa167161e2109b258903f10473d9f54b", + "display_name": "Python 3.9.4 64-bit ('wsenv': conda)" }, "language_info": { "codemirror_mode": { @@ -3175,7 +3389,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.9.4" } }, "nbformat": 4, From ba63fb29d8c7cac26746fd16a60d72fd8ebfc487 Mon Sep 17 00:00:00 2001 From: Joachim Bache-Mathiesen Date: Tue, 18 May 2021 14:46:21 +0200 Subject: [PATCH 2/6] Working documentation --- generate_documentation.py | 12 +++++++----- requirements.txt | 3 ++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/generate_documentation.py b/generate_documentation.py index e7e5fdf..1d6a142 100644 --- a/generate_documentation.py +++ b/generate_documentation.py @@ -7,13 +7,15 @@ pdoc.link_inheritance(context) def recursive_htmls(mod): - yield mod.name, mod.html() + yield mod.name, mod.html(), bool(mod.submodules()) for submod in mod.submodules(): yield from recursive_htmls(submod) -for module_name, html in recursive_htmls(modules): - fname = f"documentation/{'/'.join(module_name.split('.'))}/index.html" +for module_name, html, has_subm in recursive_htmls(modules): + if has_subm: + fname = f"documentation/{'/'.join(module_name.split('.'))}/index.html" + else: + fname = f"documentation/{'/'.join(module_name.split('.'))}.html" os.makedirs(os.path.dirname(fname), exist_ok=True) with open(fname,"w", encoding="utf-8") as f: - f.writelines(html) - + f.writelines(html) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 2f892a7..bc7050b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,4 +17,5 @@ folium ipyleaflet voila==0.1.24 haversine -country-converter \ No newline at end of file +country-converter +pdoc3 \ No newline at end of file From dae6c147f7204d4e216c60a8af569c0378c74e4c Mon Sep 17 00:00:00 2001 From: Joachim Bache-Mathiesen Date: Tue, 18 May 2021 15:31:38 +0200 Subject: [PATCH 3/6] Html upload and ignore file --- .docignore | 4 + .../classification/classifier.html | 165 ++++ .../classification/feature_selection.html | 159 ++++ .../WaterSecurity/classification/index.html | 81 ++ .../classification/model_handler.html | 861 ++++++++++++++++++ documentation/WaterSecurity/index.html | 75 ++ .../labeled_preprocessing/imputation.html | 418 +++++++++ .../labeled_preprocessing/index.html | 65 ++ .../unlabeled_preprocessing/helpers.html | 211 +++++ .../unlabeled_preprocessing/index.html | 65 ++ documentation/WaterSecurity/utils/geo.html | 470 ++++++++++ documentation/WaterSecurity/utils/index.html | 70 ++ documentation/WaterSecurity/utils/nlp.html | 287 ++++++ generate_documentation.py | 8 +- 14 files changed, 2937 insertions(+), 2 deletions(-) create mode 100644 .docignore create mode 100644 documentation/WaterSecurity/classification/classifier.html create mode 100644 documentation/WaterSecurity/classification/feature_selection.html create mode 100644 documentation/WaterSecurity/classification/index.html create mode 100644 documentation/WaterSecurity/classification/model_handler.html create mode 100644 documentation/WaterSecurity/index.html create mode 100644 documentation/WaterSecurity/labeled_preprocessing/imputation.html create mode 100644 documentation/WaterSecurity/labeled_preprocessing/index.html create mode 100644 documentation/WaterSecurity/unlabeled_preprocessing/helpers.html create mode 100644 documentation/WaterSecurity/unlabeled_preprocessing/index.html create mode 100644 documentation/WaterSecurity/utils/geo.html create mode 100644 documentation/WaterSecurity/utils/index.html create mode 100644 documentation/WaterSecurity/utils/nlp.html diff --git a/.docignore b/.docignore new file mode 100644 index 0000000..7afe47c --- /dev/null +++ b/.docignore @@ -0,0 +1,4 @@ +WaterSecurity.data +WaterSecurity.documentation +WaterSecurity.run +WaterSecurity.generate_documentation \ No newline at end of file diff --git a/documentation/WaterSecurity/classification/classifier.html b/documentation/WaterSecurity/classification/classifier.html new file mode 100644 index 0000000..39e2cea --- /dev/null +++ b/documentation/WaterSecurity/classification/classifier.html @@ -0,0 +1,165 @@ + + + + + + +WaterSecurity.classification.classifier API documentation + + + + + + + + + + + +
+
+
+

Module WaterSecurity.classification.classifier

+
+
+
+ +Expand source code + +
from sklearn.base import BaseEstimator, RegressorMixin
+import numpy as np
+
+
+class Classifier(BaseEstimator, RegressorMixin):
+    # @TODO implement
+    def fit(self, x_data, y_data):
+        """
+        x_data: the nxm features
+        y_data: the n labels, with values 0,1,2 or 3
+        """
+        return self
+
+    def predict(self, x_data):
+        """
+        x_data: the nxm features
+        Returns n predictions, which have values 0 to 3, they can be floats
+        """
+        return np.zeros(x_data.shape[0])
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class Classifier +
+
+

Base class for all estimators in scikit-learn.

+

Notes

+

All estimators should specify all the parameters that can be set +at the class level in their __init__ as explicit keyword +arguments (no *args or **kwargs).

+
+ +Expand source code + +
class Classifier(BaseEstimator, RegressorMixin):
+    # @TODO implement
+    def fit(self, x_data, y_data):
+        """
+        x_data: the nxm features
+        y_data: the n labels, with values 0,1,2 or 3
+        """
+        return self
+
+    def predict(self, x_data):
+        """
+        x_data: the nxm features
+        Returns n predictions, which have values 0 to 3, they can be floats
+        """
+        return np.zeros(x_data.shape[0])
+
+

Ancestors

+
    +
  • sklearn.base.BaseEstimator
  • +
  • sklearn.base.RegressorMixin
  • +
+

Methods

+
+
+def fit(self, x_data, y_data) +
+
+

x_data: the nxm features +y_data: the n labels, with values 0,1,2 or 3

+
+ +Expand source code + +
def fit(self, x_data, y_data):
+    """
+    x_data: the nxm features
+    y_data: the n labels, with values 0,1,2 or 3
+    """
+    return self
+
+
+
+def predict(self, x_data) +
+
+

x_data: the nxm features +Returns n predictions, which have values 0 to 3, they can be floats

+
+ +Expand source code + +
def predict(self, x_data):
+    """
+    x_data: the nxm features
+    Returns n predictions, which have values 0 to 3, they can be floats
+    """
+    return np.zeros(x_data.shape[0])
+
+
+
+
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/classification/feature_selection.html b/documentation/WaterSecurity/classification/feature_selection.html new file mode 100644 index 0000000..01369dd --- /dev/null +++ b/documentation/WaterSecurity/classification/feature_selection.html @@ -0,0 +1,159 @@ + + + + + + +WaterSecurity.classification.feature_selection API documentation + + + + + + + + + + + +
+
+
+

Module WaterSecurity.classification.feature_selection

+
+
+
+ +Expand source code + +
from sklearn import base
+from sklearn.base import BaseEstimator, TransformerMixin
+
+
+class FeatureSelectionAndGeneration(BaseEstimator, TransformerMixin):
+    # @TODO implement
+
+    def fit(self, x_data, y_data):
+        """
+        Fits to nxm features x_data and n predictions y_data
+        """
+        return self
+
+    def transform(self, x_data):
+        """
+        Transforms x_data from nxm to kxm
+        """
+        return x_data
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class FeatureSelectionAndGeneration +
+
+

Base class for all estimators in scikit-learn.

+

Notes

+

All estimators should specify all the parameters that can be set +at the class level in their __init__ as explicit keyword +arguments (no *args or **kwargs).

+
+ +Expand source code + +
class FeatureSelectionAndGeneration(BaseEstimator, TransformerMixin):
+    # @TODO implement
+
+    def fit(self, x_data, y_data):
+        """
+        Fits to nxm features x_data and n predictions y_data
+        """
+        return self
+
+    def transform(self, x_data):
+        """
+        Transforms x_data from nxm to kxm
+        """
+        return x_data
+
+

Ancestors

+
    +
  • sklearn.base.BaseEstimator
  • +
  • sklearn.base.TransformerMixin
  • +
+

Methods

+
+
+def fit(self, x_data, y_data) +
+
+

Fits to nxm features x_data and n predictions y_data

+
+ +Expand source code + +
def fit(self, x_data, y_data):
+    """
+    Fits to nxm features x_data and n predictions y_data
+    """
+    return self
+
+
+
+def transform(self, x_data) +
+
+

Transforms x_data from nxm to kxm

+
+ +Expand source code + +
def transform(self, x_data):
+    """
+    Transforms x_data from nxm to kxm
+    """
+    return x_data
+
+
+
+
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/classification/index.html b/documentation/WaterSecurity/classification/index.html new file mode 100644 index 0000000..24f58b0 --- /dev/null +++ b/documentation/WaterSecurity/classification/index.html @@ -0,0 +1,81 @@ + + + + + + +WaterSecurity.classification API documentation + + + + + + + + + + + +
+ + +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/classification/model_handler.html b/documentation/WaterSecurity/classification/model_handler.html new file mode 100644 index 0000000..207feab --- /dev/null +++ b/documentation/WaterSecurity/classification/model_handler.html @@ -0,0 +1,861 @@ + + + + + + +WaterSecurity.classification.model_handler API documentation + + + + + + + + + + + +
+
+
+

Module WaterSecurity.classification.model_handler

+
+
+
+ +Expand source code + +
from sklearn.base import BaseEstimator
+from data.labeled.preprocessed import LABELED_CITIES
+import os
+import pickle
+import pandas as pd
+import numpy as np
+import importlib
+from data.model import MODEL_PATH
+from sklearn.pipeline import Pipeline
+from classification.classifier import Classifier
+from classification.feature_selection import FeatureSelectionAndGeneration
+from sklearn.exceptions import NotFittedError
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import confusion_matrix, classification_report
+from classification import RANDOM_SEED
+from data.model.metrics import VALIDATION_METRICS_PATH, TRAINING_METRICS_PATH
+from data.model.predictions import PREDICTION_MASK_PATH, FILLED_DATASET_PATH
+from utils.geo import is_close, get_place
+
+
+class TrainingRequired(NotFittedError):
+    def __init__(self, obj):
+        super().__init__(f"{obj} could not be loaded. Training model is required")
+
+
+class InvalidCoordinates(BaseException):
+    pass
+
+
+class ModelHandler:
+    """
+    Trains and Tests the model, while also computing metrics.
+    During training the model is first fitted, then produces predictions for any unlabled points inside the dataset
+    """
+
+    def __init__(self):
+        self._model = None
+        self._dataset = None
+        self._valid_metrics = None
+        self._train_metrics = None
+        self._filled_dataset = None
+        self.train_mask = None
+        self.feat_names = None
+        self.lab_names = None
+        # The id columns to remain in the filled dataset
+        self.id_columns = ["city", "country", "latitude", "longitude"]
+
+    @property
+    def model(self) -> Pipeline:
+        """
+        If model is not defined, try to loaded from disk
+        """
+        if self._model is None:
+            try:
+                from data.model import MODEL
+
+                self._model = MODEL
+            except ImportError:
+                raise TrainingRequired("Model")
+        return self._model
+
+    @model.setter
+    def model(self, model: Pipeline):
+        self._model = model
+
+    def save_model(self) -> None:
+        """
+        Saves model to memory
+        """
+        with open(os.path.join(MODEL_PATH), "wb") as out:
+            pickle.dump(self.model, out)
+        import data.model
+
+        importlib.reload(data.model)
+
+    @property
+    def dataset(self) -> pd.DataFrame:
+        """
+        The dataset for the training step.
+        When it is loaded the first time, several variables are defined:
+            - lab_names: the labels names/columns of the dataset
+            - unique_labs: the unique labels values
+            - feat_names: the features names/columns of the dataset
+            - train_mask: the mask that refers to the cities that are labeled at least for one risk
+        """
+        if self._dataset is None:
+            from data.labeled.preprocessed import LABELED_CITIES, RISKS_MAPPING
+            from data.dataset import DATASET as dataset
+
+            self.lab_names = sorted(RISKS_MAPPING.keys())
+            self.unique_labs = np.unique(dataset[self.lab_names].T.stack().values)
+            self.feat_names = [
+                x
+                for x in dataset.columns
+                if x not in self.lab_names and x not in LABELED_CITIES.columns
+            ]
+            self.train_mask = dataset[self.lab_names].apply(
+                lambda x: all(pd.isnull(x)), axis=1
+            )
+            self._dataset = dataset
+        return self._dataset
+
+    @property
+    def filled_dataset(self) -> pd.DataFrame:
+        """
+        The dataset that has filled labels, which were produced from the predictions
+        """
+        if self._filled_dataset is None:
+            try:
+                self._filled_dataset = pd.read_csv(FILLED_DATASET_PATH)
+            except IOError:
+                raise TrainingRequired("Filled Dataset")
+        return self._filled_dataset
+
+    @filled_dataset.setter
+    def filled_dataset(self, dataset: pd.DataFrame):
+        self._filled_dataset = dataset
+
+    def compute_metrics(self, y_true, y_pred):
+        """
+        Compute metrics for regression labels of size nx1
+        """
+        metrics = {}
+        # Interpolate predictions to labels, eg convert 0.2 to 0, 0.7 to 1 etc.
+        y_pred_interp = self.unique_labs[
+            np.abs(np.reshape(self.unique_labs, (-1, 1)) - y_pred).argmin(axis=0)
+        ]
+        metrics["confusion_matrix"] = confusion_matrix(y_true, y_pred_interp)
+        metrics["classification_report"] = classification_report(y_true, y_pred)
+        return metrics
+
+    @property
+    def is_fitted(self) -> bool:
+        """
+        Tries to load model from memory/disk, if it fails, returns False, else returns True
+        """
+        try:
+            self.model
+        except TrainingRequired:
+            return False
+        return True
+
+    def train(self) -> None:
+        """
+        - Trains 7 different models, one per each different water security risk.
+        - Applies feature selection and generation per different model.
+        - Keeps 0.3 validation size, computes classification metrics, saves them, then fits each model to the whole available dataset for each risk.
+        - Creates the filled dataset and saves it to disk
+        - Creates the prediction mask (what labels from the filled dataset were predicted) and saves it to memory
+        """
+        dataset = self.dataset
+        labeled = dataset[self.train_mask]
+        labeled[self.lab_names]
+
+        model = {}
+        train_metrics = {}
+        valid_metrics = {}
+        filled_dataset = dataset[self.lab_names + self.id_columns].copy()
+        for label in self.lab_names:
+            train_mask = ~pd.isnull(dataset[label])
+            labeled = dataset.loc[train_mask, :]
+            train_set, valid_set = train_test_split(
+                labeled, test_size=0.3, random_state=RANDOM_SEED
+            )
+
+            model[label] = Pipeline(
+                [
+                    ("FeatureSelection", FeatureSelectionAndGeneration()),
+                    ("Classification", Classifier()),
+                ]
+            )
+            model[label].fit(train_set[self.feat_names], train_set[label])
+            train_preds = model[label].predict(train_set[self.feat_names])
+            valid_preds = model[label].predict(valid_set[self.feat_names])
+            train_metrics[label] = self.compute_metrics(train_set[label], train_preds)
+            valid_metrics[label] = self.compute_metrics(valid_set[label], valid_preds)
+            model[label].fit(labeled[self.feat_names], labeled[label])
+
+            filled_dataset.loc[~train_mask, label] = model[label].predict(
+                dataset.loc[~train_mask, self.feat_names]
+            )
+        self.model = model
+        self.save_model()
+        with open(VALIDATION_METRICS_PATH, "wb") as out:
+            pickle.dump(valid_metrics, out)
+        with open(TRAINING_METRICS_PATH, "wb") as out:
+            pickle.dump(train_metrics, out)
+        self.filled_dataset = filled_dataset
+        self.filled_dataset.to_csv(FILLED_DATASET_PATH, index=False)
+        pd.isnull(dataset[self.lab_names]).to_csv(PREDICTION_MASK_PATH, index=False)
+        import data.model.metrics
+
+        importlib.reload(data.model.metrics)
+        import data.model.predictions
+
+        importlib.reload(data.model.predictions)
+
+    def test(self, latitude, longitude):
+        """
+        Given a specific latitude and longitude value, either returns saved predictions from the filled dataset, if the point is close to the
+        ones that have already been predicted, or uses a REST API to load the country to which the latitude and longitude refer, uses the country data
+        to create the feature vector and computes the prediction using the trained models.
+        Returns the series of the found labels, which also contain city and country, and the series of booleans which shows which predictions were predicted and which where not.
+        """
+        try:
+            from data.model.predictions import FILLED_DATASET, PREDICTION_MASK
+        except ImportError:
+            raise TrainingRequired("Filled Dataset")
+        check_existing = FILLED_DATASET.apply(
+            lambda x: is_close((latitude, longitude), (x["latitude"], x["longitude"])),
+            axis=1,
+        )
+        if np.any(check_existing):
+            labs = list(sorted(self.model.keys()))
+            return (
+                FILLED_DATASET.loc[check_existing, labs + ["city", "country"]].iloc[0],
+                PREDICTION_MASK.loc[check_existing, labs].iloc[0],
+            )
+        try:
+            place = get_place(latitude, longitude)
+        except AttributeError:
+            raise InvalidCoordinates
+        from data.unlabeled import COUNTRIES_DATASET
+
+        feats = COUNTRIES_DATASET.loc[place["code"]]
+        preds = {}
+        mask = {}
+        for label in self.model:
+            preds[label] = self.model[label].predict(feats)[0]
+            mask[label] = True
+        preds["city"] = place["city"]
+        preds["country"] = place["country"]
+        return pd.Series(preds), pd.Series(mask)
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class InvalidCoordinates +(*args, **kwargs) +
+
+

Common base class for all exceptions

+
+ +Expand source code + +
class InvalidCoordinates(BaseException):
+    pass
+
+

Ancestors

+
    +
  • builtins.BaseException
  • +
+
+
+class ModelHandler +
+
+

Trains and Tests the model, while also computing metrics. +During training the model is first fitted, then produces predictions for any unlabled points inside the dataset

+
+ +Expand source code + +
class ModelHandler:
+    """
+    Trains and Tests the model, while also computing metrics.
+    During training the model is first fitted, then produces predictions for any unlabled points inside the dataset
+    """
+
+    def __init__(self):
+        self._model = None
+        self._dataset = None
+        self._valid_metrics = None
+        self._train_metrics = None
+        self._filled_dataset = None
+        self.train_mask = None
+        self.feat_names = None
+        self.lab_names = None
+        # The id columns to remain in the filled dataset
+        self.id_columns = ["city", "country", "latitude", "longitude"]
+
+    @property
+    def model(self) -> Pipeline:
+        """
+        If model is not defined, try to loaded from disk
+        """
+        if self._model is None:
+            try:
+                from data.model import MODEL
+
+                self._model = MODEL
+            except ImportError:
+                raise TrainingRequired("Model")
+        return self._model
+
+    @model.setter
+    def model(self, model: Pipeline):
+        self._model = model
+
+    def save_model(self) -> None:
+        """
+        Saves model to memory
+        """
+        with open(os.path.join(MODEL_PATH), "wb") as out:
+            pickle.dump(self.model, out)
+        import data.model
+
+        importlib.reload(data.model)
+
+    @property
+    def dataset(self) -> pd.DataFrame:
+        """
+        The dataset for the training step.
+        When it is loaded the first time, several variables are defined:
+            - lab_names: the labels names/columns of the dataset
+            - unique_labs: the unique labels values
+            - feat_names: the features names/columns of the dataset
+            - train_mask: the mask that refers to the cities that are labeled at least for one risk
+        """
+        if self._dataset is None:
+            from data.labeled.preprocessed import LABELED_CITIES, RISKS_MAPPING
+            from data.dataset import DATASET as dataset
+
+            self.lab_names = sorted(RISKS_MAPPING.keys())
+            self.unique_labs = np.unique(dataset[self.lab_names].T.stack().values)
+            self.feat_names = [
+                x
+                for x in dataset.columns
+                if x not in self.lab_names and x not in LABELED_CITIES.columns
+            ]
+            self.train_mask = dataset[self.lab_names].apply(
+                lambda x: all(pd.isnull(x)), axis=1
+            )
+            self._dataset = dataset
+        return self._dataset
+
+    @property
+    def filled_dataset(self) -> pd.DataFrame:
+        """
+        The dataset that has filled labels, which were produced from the predictions
+        """
+        if self._filled_dataset is None:
+            try:
+                self._filled_dataset = pd.read_csv(FILLED_DATASET_PATH)
+            except IOError:
+                raise TrainingRequired("Filled Dataset")
+        return self._filled_dataset
+
+    @filled_dataset.setter
+    def filled_dataset(self, dataset: pd.DataFrame):
+        self._filled_dataset = dataset
+
+    def compute_metrics(self, y_true, y_pred):
+        """
+        Compute metrics for regression labels of size nx1
+        """
+        metrics = {}
+        # Interpolate predictions to labels, eg convert 0.2 to 0, 0.7 to 1 etc.
+        y_pred_interp = self.unique_labs[
+            np.abs(np.reshape(self.unique_labs, (-1, 1)) - y_pred).argmin(axis=0)
+        ]
+        metrics["confusion_matrix"] = confusion_matrix(y_true, y_pred_interp)
+        metrics["classification_report"] = classification_report(y_true, y_pred)
+        return metrics
+
+    @property
+    def is_fitted(self) -> bool:
+        """
+        Tries to load model from memory/disk, if it fails, returns False, else returns True
+        """
+        try:
+            self.model
+        except TrainingRequired:
+            return False
+        return True
+
+    def train(self) -> None:
+        """
+        - Trains 7 different models, one per each different water security risk.
+        - Applies feature selection and generation per different model.
+        - Keeps 0.3 validation size, computes classification metrics, saves them, then fits each model to the whole available dataset for each risk.
+        - Creates the filled dataset and saves it to disk
+        - Creates the prediction mask (what labels from the filled dataset were predicted) and saves it to memory
+        """
+        dataset = self.dataset
+        labeled = dataset[self.train_mask]
+        labeled[self.lab_names]
+
+        model = {}
+        train_metrics = {}
+        valid_metrics = {}
+        filled_dataset = dataset[self.lab_names + self.id_columns].copy()
+        for label in self.lab_names:
+            train_mask = ~pd.isnull(dataset[label])
+            labeled = dataset.loc[train_mask, :]
+            train_set, valid_set = train_test_split(
+                labeled, test_size=0.3, random_state=RANDOM_SEED
+            )
+
+            model[label] = Pipeline(
+                [
+                    ("FeatureSelection", FeatureSelectionAndGeneration()),
+                    ("Classification", Classifier()),
+                ]
+            )
+            model[label].fit(train_set[self.feat_names], train_set[label])
+            train_preds = model[label].predict(train_set[self.feat_names])
+            valid_preds = model[label].predict(valid_set[self.feat_names])
+            train_metrics[label] = self.compute_metrics(train_set[label], train_preds)
+            valid_metrics[label] = self.compute_metrics(valid_set[label], valid_preds)
+            model[label].fit(labeled[self.feat_names], labeled[label])
+
+            filled_dataset.loc[~train_mask, label] = model[label].predict(
+                dataset.loc[~train_mask, self.feat_names]
+            )
+        self.model = model
+        self.save_model()
+        with open(VALIDATION_METRICS_PATH, "wb") as out:
+            pickle.dump(valid_metrics, out)
+        with open(TRAINING_METRICS_PATH, "wb") as out:
+            pickle.dump(train_metrics, out)
+        self.filled_dataset = filled_dataset
+        self.filled_dataset.to_csv(FILLED_DATASET_PATH, index=False)
+        pd.isnull(dataset[self.lab_names]).to_csv(PREDICTION_MASK_PATH, index=False)
+        import data.model.metrics
+
+        importlib.reload(data.model.metrics)
+        import data.model.predictions
+
+        importlib.reload(data.model.predictions)
+
+    def test(self, latitude, longitude):
+        """
+        Given a specific latitude and longitude value, either returns saved predictions from the filled dataset, if the point is close to the
+        ones that have already been predicted, or uses a REST API to load the country to which the latitude and longitude refer, uses the country data
+        to create the feature vector and computes the prediction using the trained models.
+        Returns the series of the found labels, which also contain city and country, and the series of booleans which shows which predictions were predicted and which where not.
+        """
+        try:
+            from data.model.predictions import FILLED_DATASET, PREDICTION_MASK
+        except ImportError:
+            raise TrainingRequired("Filled Dataset")
+        check_existing = FILLED_DATASET.apply(
+            lambda x: is_close((latitude, longitude), (x["latitude"], x["longitude"])),
+            axis=1,
+        )
+        if np.any(check_existing):
+            labs = list(sorted(self.model.keys()))
+            return (
+                FILLED_DATASET.loc[check_existing, labs + ["city", "country"]].iloc[0],
+                PREDICTION_MASK.loc[check_existing, labs].iloc[0],
+            )
+        try:
+            place = get_place(latitude, longitude)
+        except AttributeError:
+            raise InvalidCoordinates
+        from data.unlabeled import COUNTRIES_DATASET
+
+        feats = COUNTRIES_DATASET.loc[place["code"]]
+        preds = {}
+        mask = {}
+        for label in self.model:
+            preds[label] = self.model[label].predict(feats)[0]
+            mask[label] = True
+        preds["city"] = place["city"]
+        preds["country"] = place["country"]
+        return pd.Series(preds), pd.Series(mask)
+
+

Instance variables

+
+
var dataset : pandas.core.frame.DataFrame
+
+

The dataset for the training step. +When it is loaded the first time, several variables are defined: +- lab_names: the labels names/columns of the dataset +- unique_labs: the unique labels values +- feat_names: the features names/columns of the dataset +- train_mask: the mask that refers to the cities that are labeled at least for one risk

+
+ +Expand source code + +
@property
+def dataset(self) -> pd.DataFrame:
+    """
+    The dataset for the training step.
+    When it is loaded the first time, several variables are defined:
+        - lab_names: the labels names/columns of the dataset
+        - unique_labs: the unique labels values
+        - feat_names: the features names/columns of the dataset
+        - train_mask: the mask that refers to the cities that are labeled at least for one risk
+    """
+    if self._dataset is None:
+        from data.labeled.preprocessed import LABELED_CITIES, RISKS_MAPPING
+        from data.dataset import DATASET as dataset
+
+        self.lab_names = sorted(RISKS_MAPPING.keys())
+        self.unique_labs = np.unique(dataset[self.lab_names].T.stack().values)
+        self.feat_names = [
+            x
+            for x in dataset.columns
+            if x not in self.lab_names and x not in LABELED_CITIES.columns
+        ]
+        self.train_mask = dataset[self.lab_names].apply(
+            lambda x: all(pd.isnull(x)), axis=1
+        )
+        self._dataset = dataset
+    return self._dataset
+
+
+
var filled_dataset : pandas.core.frame.DataFrame
+
+

The dataset that has filled labels, which were produced from the predictions

+
+ +Expand source code + +
@property
+def filled_dataset(self) -> pd.DataFrame:
+    """
+    The dataset that has filled labels, which were produced from the predictions
+    """
+    if self._filled_dataset is None:
+        try:
+            self._filled_dataset = pd.read_csv(FILLED_DATASET_PATH)
+        except IOError:
+            raise TrainingRequired("Filled Dataset")
+    return self._filled_dataset
+
+
+
var is_fitted : bool
+
+

Tries to load model from memory/disk, if it fails, returns False, else returns True

+
+ +Expand source code + +
@property
+def is_fitted(self) -> bool:
+    """
+    Tries to load model from memory/disk, if it fails, returns False, else returns True
+    """
+    try:
+        self.model
+    except TrainingRequired:
+        return False
+    return True
+
+
+
var model : sklearn.pipeline.Pipeline
+
+

If model is not defined, try to loaded from disk

+
+ +Expand source code + +
@property
+def model(self) -> Pipeline:
+    """
+    If model is not defined, try to loaded from disk
+    """
+    if self._model is None:
+        try:
+            from data.model import MODEL
+
+            self._model = MODEL
+        except ImportError:
+            raise TrainingRequired("Model")
+    return self._model
+
+
+
+

Methods

+
+
+def compute_metrics(self, y_true, y_pred) +
+
+

Compute metrics for regression labels of size nx1

+
+ +Expand source code + +
def compute_metrics(self, y_true, y_pred):
+    """
+    Compute metrics for regression labels of size nx1
+    """
+    metrics = {}
+    # Interpolate predictions to labels, eg convert 0.2 to 0, 0.7 to 1 etc.
+    y_pred_interp = self.unique_labs[
+        np.abs(np.reshape(self.unique_labs, (-1, 1)) - y_pred).argmin(axis=0)
+    ]
+    metrics["confusion_matrix"] = confusion_matrix(y_true, y_pred_interp)
+    metrics["classification_report"] = classification_report(y_true, y_pred)
+    return metrics
+
+
+
+def save_model(self) ‑> NoneType +
+
+

Saves model to memory

+
+ +Expand source code + +
def save_model(self) -> None:
+    """
+    Saves model to memory
+    """
+    with open(os.path.join(MODEL_PATH), "wb") as out:
+        pickle.dump(self.model, out)
+    import data.model
+
+    importlib.reload(data.model)
+
+
+
+def test(self, latitude, longitude) +
+
+

Given a specific latitude and longitude value, either returns saved predictions from the filled dataset, if the point is close to the +ones that have already been predicted, or uses a REST API to load the country to which the latitude and longitude refer, uses the country data +to create the feature vector and computes the prediction using the trained models. +Returns the series of the found labels, which also contain city and country, and the series of booleans which shows which predictions were predicted and which where not.

+
+ +Expand source code + +
def test(self, latitude, longitude):
+    """
+    Given a specific latitude and longitude value, either returns saved predictions from the filled dataset, if the point is close to the
+    ones that have already been predicted, or uses a REST API to load the country to which the latitude and longitude refer, uses the country data
+    to create the feature vector and computes the prediction using the trained models.
+    Returns the series of the found labels, which also contain city and country, and the series of booleans which shows which predictions were predicted and which where not.
+    """
+    try:
+        from data.model.predictions import FILLED_DATASET, PREDICTION_MASK
+    except ImportError:
+        raise TrainingRequired("Filled Dataset")
+    check_existing = FILLED_DATASET.apply(
+        lambda x: is_close((latitude, longitude), (x["latitude"], x["longitude"])),
+        axis=1,
+    )
+    if np.any(check_existing):
+        labs = list(sorted(self.model.keys()))
+        return (
+            FILLED_DATASET.loc[check_existing, labs + ["city", "country"]].iloc[0],
+            PREDICTION_MASK.loc[check_existing, labs].iloc[0],
+        )
+    try:
+        place = get_place(latitude, longitude)
+    except AttributeError:
+        raise InvalidCoordinates
+    from data.unlabeled import COUNTRIES_DATASET
+
+    feats = COUNTRIES_DATASET.loc[place["code"]]
+    preds = {}
+    mask = {}
+    for label in self.model:
+        preds[label] = self.model[label].predict(feats)[0]
+        mask[label] = True
+    preds["city"] = place["city"]
+    preds["country"] = place["country"]
+    return pd.Series(preds), pd.Series(mask)
+
+
+
+def train(self) ‑> NoneType +
+
+
    +
  • Trains 7 different models, one per each different water security risk.
  • +
  • Applies feature selection and generation per different model.
  • +
  • Keeps 0.3 validation size, computes classification metrics, saves them, then fits each model to the whole available dataset for each risk.
  • +
  • Creates the filled dataset and saves it to disk
  • +
  • Creates the prediction mask (what labels from the filled dataset were predicted) and saves it to memory
  • +
+
+ +Expand source code + +
def train(self) -> None:
+    """
+    - Trains 7 different models, one per each different water security risk.
+    - Applies feature selection and generation per different model.
+    - Keeps 0.3 validation size, computes classification metrics, saves them, then fits each model to the whole available dataset for each risk.
+    - Creates the filled dataset and saves it to disk
+    - Creates the prediction mask (what labels from the filled dataset were predicted) and saves it to memory
+    """
+    dataset = self.dataset
+    labeled = dataset[self.train_mask]
+    labeled[self.lab_names]
+
+    model = {}
+    train_metrics = {}
+    valid_metrics = {}
+    filled_dataset = dataset[self.lab_names + self.id_columns].copy()
+    for label in self.lab_names:
+        train_mask = ~pd.isnull(dataset[label])
+        labeled = dataset.loc[train_mask, :]
+        train_set, valid_set = train_test_split(
+            labeled, test_size=0.3, random_state=RANDOM_SEED
+        )
+
+        model[label] = Pipeline(
+            [
+                ("FeatureSelection", FeatureSelectionAndGeneration()),
+                ("Classification", Classifier()),
+            ]
+        )
+        model[label].fit(train_set[self.feat_names], train_set[label])
+        train_preds = model[label].predict(train_set[self.feat_names])
+        valid_preds = model[label].predict(valid_set[self.feat_names])
+        train_metrics[label] = self.compute_metrics(train_set[label], train_preds)
+        valid_metrics[label] = self.compute_metrics(valid_set[label], valid_preds)
+        model[label].fit(labeled[self.feat_names], labeled[label])
+
+        filled_dataset.loc[~train_mask, label] = model[label].predict(
+            dataset.loc[~train_mask, self.feat_names]
+        )
+    self.model = model
+    self.save_model()
+    with open(VALIDATION_METRICS_PATH, "wb") as out:
+        pickle.dump(valid_metrics, out)
+    with open(TRAINING_METRICS_PATH, "wb") as out:
+        pickle.dump(train_metrics, out)
+    self.filled_dataset = filled_dataset
+    self.filled_dataset.to_csv(FILLED_DATASET_PATH, index=False)
+    pd.isnull(dataset[self.lab_names]).to_csv(PREDICTION_MASK_PATH, index=False)
+    import data.model.metrics
+
+    importlib.reload(data.model.metrics)
+    import data.model.predictions
+
+    importlib.reload(data.model.predictions)
+
+
+
+
+
+class TrainingRequired +(obj) +
+
+

Exception class to raise if estimator is used before fitting.

+

This class inherits from both ValueError and AttributeError to help with +exception handling and backward compatibility.

+

Examples

+
>>> from sklearn.svm import LinearSVC
+>>> from sklearn.exceptions import NotFittedError
+>>> try:
+...     LinearSVC().predict([[1, 2], [2, 3], [3, 4]])
+... except NotFittedError as e:
+...     print(repr(e))
+NotFittedError("This LinearSVC instance is not fitted yet. Call 'fit' with
+appropriate arguments before using this estimator."...)
+
+
+

Changed in version: 0.18

+

Moved from sklearn.utils.validation.

+
+
+ +Expand source code + +
class TrainingRequired(NotFittedError):
+    def __init__(self, obj):
+        super().__init__(f"{obj} could not be loaded. Training model is required")
+
+

Ancestors

+
    +
  • sklearn.exceptions.NotFittedError
  • +
  • builtins.ValueError
  • +
  • builtins.AttributeError
  • +
  • builtins.Exception
  • +
  • builtins.BaseException
  • +
+
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/index.html b/documentation/WaterSecurity/index.html new file mode 100644 index 0000000..9503205 --- /dev/null +++ b/documentation/WaterSecurity/index.html @@ -0,0 +1,75 @@ + + + + + + +WaterSecurity API documentation + + + + + + + + + + + +
+ + +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/labeled_preprocessing/imputation.html b/documentation/WaterSecurity/labeled_preprocessing/imputation.html new file mode 100644 index 0000000..4b9553f --- /dev/null +++ b/documentation/WaterSecurity/labeled_preprocessing/imputation.html @@ -0,0 +1,418 @@ + + + + + + +WaterSecurity.labeled_preprocessing.imputation API documentation + + + + + + + + + + + +
+
+
+

Module WaterSecurity.labeled_preprocessing.imputation

+
+
+
+ +Expand source code + +
from sklearn.experimental import enable_iterative_imputer
+from sklearn.impute import IterativeImputer
+from sklearn.neighbors import KNeighborsRegressor
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.pipeline import make_pipeline
+from sklearn.feature_selection import SelectKBest, f_classif
+import numpy as np
+
+
+class LabeledDatasetImputer:
+    """
+    Imputes missing data on y. Assumes also missing data on X. Uses two different types of imputation, as it assumes that y is Categorical
+    k_features_per_label: the number of features to keep from X for the imputation, defaults to 0 (no features selection)
+    verbose: verbosity of the iterative imputers, defaults to 0
+    seed: the random seed, defaults to 42
+    labels_est: the estimator object to be used during labels imputation
+    feats_est: the estimator object to be used during features estimation
+    """
+
+    def __init__(
+        self,
+        k_features_per_label=0,
+        verbose=0,
+        seed=42,
+        labels_est=None,
+        feats_est=None,
+    ):
+        self.x_imputer = None
+        self.y_imputer = None
+        self.verbose = verbose
+        self.selection_mask = None
+        self.seed = seed
+        self.k_features_per_label = k_features_per_label
+        self.labels_est = labels_est
+        self.feats_est = feats_est
+
+    def create_selection_mask(self, X, y):
+        if not self.k_features_per_label:
+            return np.zeros(X.shape[1]) == 0
+        selection_mask = None
+        for cnt in range(y.shape[1]):
+            labeled = ~np.isnan(y[:, cnt])
+            _y = y[labeled, cnt]
+            _x = X[labeled, :]
+
+            selector = SelectKBest(f_classif, k=self.k_features_per_label).fit(
+                np.nan_to_num(_x), _y
+            )
+            if selection_mask is None:
+                selection_mask = selector.get_support()
+            else:
+                selection_mask = (selection_mask + selector.get_support()) > 0
+        return selection_mask
+
+    def fit_transform(self, X, y, ret_imputed_x=False):
+        """
+        X: nxp matrix
+        y: nxv matrix
+        Both matrices are allowed to have missing values
+        if `ret_imputed_x`, return (imputed_x,imputed_y), otherwise return imputed_y
+        """
+        print("Applying feature selection..")
+        self.selection_mask = self.create_selection_mask(X, y)
+        if self.feats_est is None:
+            self.feats_est = KNeighborsRegressor(n_neighbors=5)
+
+        print(f"Creating imputed X using {self.feats_est.__class__.__name__}..")
+        self.x_imputer = IterativeImputer(
+            estimator=self.feats_est,
+            initial_strategy="most_frequent",
+            verbose=self.verbose,
+            n_nearest_features=200,
+            random_state=self.seed,
+            skip_complete=True,
+        )
+        imputed_x = self.x_imputer.fit_transform(X[:, self.selection_mask])
+        if self.labels_est is None:
+            self.labels_est = make_pipeline(
+                SelectKBest(
+                    f_classif, k=min(int(0.1 * X.shape[0]), imputed_x.shape[1])
+                ),
+                RandomForestClassifier(n_estimators=50, random_state=self.seed),
+            )
+        print(f"Creating imputed Y using {self.labels_est.__class__.__name__}..")
+        self.y_imputer = IterativeImputer(
+            estimator=self.labels_est,
+            initial_strategy="most_frequent",
+            max_iter=10,
+            random_state=self.seed,
+            skip_complete=True,
+            verbose=self.verbose,
+        )
+        imputed_y = self.y_imputer.fit_transform(np.hstack([y, imputed_x]))[
+            :, : y.shape[1]
+        ]
+        if ret_imputed_x:
+            return imputed_x, imputed_y
+        return imputed_y
+
+    def transform(self, X, y, ret_imputed_x=False):
+        """
+        X: nxp matrix
+        y: nxv matrix
+        Both matrices are allowed to have missing values
+        if `ret_imputed_x`, return (imputed_x,imputed_y), otherwise return imputed_y
+        """
+        imputed_x = self.x_imputer.transform(X[:, self.selection_mask])
+        ret = self.y_imputer.transform(np.hstack([y, imputed_x]))[:, : y.shape[1]]
+        if ret_imputed_x:
+            return imputed_x, ret
+        return ret
+
+
+
+
+
+
+
+
+
+

Classes

+
+
+class LabeledDatasetImputer +(k_features_per_label=0, verbose=0, seed=42, labels_est=None, feats_est=None) +
+
+

Imputes missing data on y. Assumes also missing data on X. Uses two different types of imputation, as it assumes that y is Categorical +k_features_per_label: the number of features to keep from X for the imputation, defaults to 0 (no features selection) +verbose: verbosity of the iterative imputers, defaults to 0 +seed: the random seed, defaults to 42 +labels_est: the estimator object to be used during labels imputation +feats_est: the estimator object to be used during features estimation

+
+ +Expand source code + +
class LabeledDatasetImputer:
+    """
+    Imputes missing data on y. Assumes also missing data on X. Uses two different types of imputation, as it assumes that y is Categorical
+    k_features_per_label: the number of features to keep from X for the imputation, defaults to 0 (no features selection)
+    verbose: verbosity of the iterative imputers, defaults to 0
+    seed: the random seed, defaults to 42
+    labels_est: the estimator object to be used during labels imputation
+    feats_est: the estimator object to be used during features estimation
+    """
+
+    def __init__(
+        self,
+        k_features_per_label=0,
+        verbose=0,
+        seed=42,
+        labels_est=None,
+        feats_est=None,
+    ):
+        self.x_imputer = None
+        self.y_imputer = None
+        self.verbose = verbose
+        self.selection_mask = None
+        self.seed = seed
+        self.k_features_per_label = k_features_per_label
+        self.labels_est = labels_est
+        self.feats_est = feats_est
+
+    def create_selection_mask(self, X, y):
+        if not self.k_features_per_label:
+            return np.zeros(X.shape[1]) == 0
+        selection_mask = None
+        for cnt in range(y.shape[1]):
+            labeled = ~np.isnan(y[:, cnt])
+            _y = y[labeled, cnt]
+            _x = X[labeled, :]
+
+            selector = SelectKBest(f_classif, k=self.k_features_per_label).fit(
+                np.nan_to_num(_x), _y
+            )
+            if selection_mask is None:
+                selection_mask = selector.get_support()
+            else:
+                selection_mask = (selection_mask + selector.get_support()) > 0
+        return selection_mask
+
+    def fit_transform(self, X, y, ret_imputed_x=False):
+        """
+        X: nxp matrix
+        y: nxv matrix
+        Both matrices are allowed to have missing values
+        if `ret_imputed_x`, return (imputed_x,imputed_y), otherwise return imputed_y
+        """
+        print("Applying feature selection..")
+        self.selection_mask = self.create_selection_mask(X, y)
+        if self.feats_est is None:
+            self.feats_est = KNeighborsRegressor(n_neighbors=5)
+
+        print(f"Creating imputed X using {self.feats_est.__class__.__name__}..")
+        self.x_imputer = IterativeImputer(
+            estimator=self.feats_est,
+            initial_strategy="most_frequent",
+            verbose=self.verbose,
+            n_nearest_features=200,
+            random_state=self.seed,
+            skip_complete=True,
+        )
+        imputed_x = self.x_imputer.fit_transform(X[:, self.selection_mask])
+        if self.labels_est is None:
+            self.labels_est = make_pipeline(
+                SelectKBest(
+                    f_classif, k=min(int(0.1 * X.shape[0]), imputed_x.shape[1])
+                ),
+                RandomForestClassifier(n_estimators=50, random_state=self.seed),
+            )
+        print(f"Creating imputed Y using {self.labels_est.__class__.__name__}..")
+        self.y_imputer = IterativeImputer(
+            estimator=self.labels_est,
+            initial_strategy="most_frequent",
+            max_iter=10,
+            random_state=self.seed,
+            skip_complete=True,
+            verbose=self.verbose,
+        )
+        imputed_y = self.y_imputer.fit_transform(np.hstack([y, imputed_x]))[
+            :, : y.shape[1]
+        ]
+        if ret_imputed_x:
+            return imputed_x, imputed_y
+        return imputed_y
+
+    def transform(self, X, y, ret_imputed_x=False):
+        """
+        X: nxp matrix
+        y: nxv matrix
+        Both matrices are allowed to have missing values
+        if `ret_imputed_x`, return (imputed_x,imputed_y), otherwise return imputed_y
+        """
+        imputed_x = self.x_imputer.transform(X[:, self.selection_mask])
+        ret = self.y_imputer.transform(np.hstack([y, imputed_x]))[:, : y.shape[1]]
+        if ret_imputed_x:
+            return imputed_x, ret
+        return ret
+
+

Methods

+
+
+def create_selection_mask(self, X, y) +
+
+
+
+ +Expand source code + +
def create_selection_mask(self, X, y):
+    if not self.k_features_per_label:
+        return np.zeros(X.shape[1]) == 0
+    selection_mask = None
+    for cnt in range(y.shape[1]):
+        labeled = ~np.isnan(y[:, cnt])
+        _y = y[labeled, cnt]
+        _x = X[labeled, :]
+
+        selector = SelectKBest(f_classif, k=self.k_features_per_label).fit(
+            np.nan_to_num(_x), _y
+        )
+        if selection_mask is None:
+            selection_mask = selector.get_support()
+        else:
+            selection_mask = (selection_mask + selector.get_support()) > 0
+    return selection_mask
+
+
+
+def fit_transform(self, X, y, ret_imputed_x=False) +
+
+

X: nxp matrix +y: nxv matrix +Both matrices are allowed to have missing values +if ret_imputed_x, return (imputed_x,imputed_y), otherwise return imputed_y

+
+ +Expand source code + +
def fit_transform(self, X, y, ret_imputed_x=False):
+    """
+    X: nxp matrix
+    y: nxv matrix
+    Both matrices are allowed to have missing values
+    if `ret_imputed_x`, return (imputed_x,imputed_y), otherwise return imputed_y
+    """
+    print("Applying feature selection..")
+    self.selection_mask = self.create_selection_mask(X, y)
+    if self.feats_est is None:
+        self.feats_est = KNeighborsRegressor(n_neighbors=5)
+
+    print(f"Creating imputed X using {self.feats_est.__class__.__name__}..")
+    self.x_imputer = IterativeImputer(
+        estimator=self.feats_est,
+        initial_strategy="most_frequent",
+        verbose=self.verbose,
+        n_nearest_features=200,
+        random_state=self.seed,
+        skip_complete=True,
+    )
+    imputed_x = self.x_imputer.fit_transform(X[:, self.selection_mask])
+    if self.labels_est is None:
+        self.labels_est = make_pipeline(
+            SelectKBest(
+                f_classif, k=min(int(0.1 * X.shape[0]), imputed_x.shape[1])
+            ),
+            RandomForestClassifier(n_estimators=50, random_state=self.seed),
+        )
+    print(f"Creating imputed Y using {self.labels_est.__class__.__name__}..")
+    self.y_imputer = IterativeImputer(
+        estimator=self.labels_est,
+        initial_strategy="most_frequent",
+        max_iter=10,
+        random_state=self.seed,
+        skip_complete=True,
+        verbose=self.verbose,
+    )
+    imputed_y = self.y_imputer.fit_transform(np.hstack([y, imputed_x]))[
+        :, : y.shape[1]
+    ]
+    if ret_imputed_x:
+        return imputed_x, imputed_y
+    return imputed_y
+
+
+
+def transform(self, X, y, ret_imputed_x=False) +
+
+

X: nxp matrix +y: nxv matrix +Both matrices are allowed to have missing values +if ret_imputed_x, return (imputed_x,imputed_y), otherwise return imputed_y

+
+ +Expand source code + +
def transform(self, X, y, ret_imputed_x=False):
+    """
+    X: nxp matrix
+    y: nxv matrix
+    Both matrices are allowed to have missing values
+    if `ret_imputed_x`, return (imputed_x,imputed_y), otherwise return imputed_y
+    """
+    imputed_x = self.x_imputer.transform(X[:, self.selection_mask])
+    ret = self.y_imputer.transform(np.hstack([y, imputed_x]))[:, : y.shape[1]]
+    if ret_imputed_x:
+        return imputed_x, ret
+    return ret
+
+
+
+
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/labeled_preprocessing/index.html b/documentation/WaterSecurity/labeled_preprocessing/index.html new file mode 100644 index 0000000..b903ad0 --- /dev/null +++ b/documentation/WaterSecurity/labeled_preprocessing/index.html @@ -0,0 +1,65 @@ + + + + + + +WaterSecurity.labeled_preprocessing API documentation + + + + + + + + + + + +
+ + +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/unlabeled_preprocessing/helpers.html b/documentation/WaterSecurity/unlabeled_preprocessing/helpers.html new file mode 100644 index 0000000..bc4a713 --- /dev/null +++ b/documentation/WaterSecurity/unlabeled_preprocessing/helpers.html @@ -0,0 +1,211 @@ + + + + + + +WaterSecurity.unlabeled_preprocessing.helpers API documentation + + + + + + + + + + + +
+
+
+

Module WaterSecurity.unlabeled_preprocessing.helpers

+
+
+
+ +Expand source code + +
from sklearn.experimental import enable_iterative_imputer
+from sklearn.impute import IterativeImputer
+import pandas as pd
+
+def dropColumnHalf(df):
+    """
+    Removes columns where the number of missig values is 50% or more
+    """
+    df.dropna(thresh=len(df.index)/2, axis=1, inplace=True)
+
+def fill_missing_with_column(df, into, fro):
+    """
+    Merges one column into the other filling null values of the into colun and removing the fro column
+    """
+    df[into] = df[into].combine_first(df[fro])
+    df.drop([fro], axis=1, inplace = True)
+
+    
+def impute_df(df,verbose=0, **kwargs):
+    """
+    Imputes a df and returns a dataframe with the original and imputed values
+    """
+    imp = IterativeImputer(verbose=verbose, **kwargs)
+    imp.fit_transform(df)
+    imputed_df = imp.transform(df)
+    return pd.DataFrame(imputed_df, columns=df.columns,index=df.index)
+
+def print_missing_percentages(df):
+    """
+    Max, min and mean number of missing values for the columns
+    """
+    percent_missing = df.isnull().sum() * 100 / len(df)
+    max_missing = percent_missing.max()
+    min_missing = percent_missing.min()
+    mean_missing = percent_missing.mean()
+    print("Max, min and mean number of missing values for the columns")
+    print("Max:", max_missing,'%')
+    print("Min:", min_missing,'%')
+    print("Mean:", mean_missing,'%')
+    return min_missing, max_missing
+
+
+def find_all_integer_columns(df):
+    """
+    Returns an array of all colums that contain only integers or null values
+    """
+    integer_columns = df.applymap(lambda x: int(x)==x if pd.notnull(x) else x).prod().values.astype(bool)
+    return df.columns[integer_columns].values
+
+
+
+
+
+
+
+

Functions

+
+
+def dropColumnHalf(df) +
+
+

Removes columns where the number of missig values is 50% or more

+
+ +Expand source code + +
def dropColumnHalf(df):
+    """
+    Removes columns where the number of missig values is 50% or more
+    """
+    df.dropna(thresh=len(df.index)/2, axis=1, inplace=True)
+
+
+
+def fill_missing_with_column(df, into, fro) +
+
+

Merges one column into the other filling null values of the into colun and removing the fro column

+
+ +Expand source code + +
def fill_missing_with_column(df, into, fro):
+    """
+    Merges one column into the other filling null values of the into colun and removing the fro column
+    """
+    df[into] = df[into].combine_first(df[fro])
+    df.drop([fro], axis=1, inplace = True)
+
+
+
+def find_all_integer_columns(df) +
+
+

Returns an array of all colums that contain only integers or null values

+
+ +Expand source code + +
def find_all_integer_columns(df):
+    """
+    Returns an array of all colums that contain only integers or null values
+    """
+    integer_columns = df.applymap(lambda x: int(x)==x if pd.notnull(x) else x).prod().values.astype(bool)
+    return df.columns[integer_columns].values
+
+
+
+def impute_df(df, verbose=0, **kwargs) +
+
+

Imputes a df and returns a dataframe with the original and imputed values

+
+ +Expand source code + +
def impute_df(df,verbose=0, **kwargs):
+    """
+    Imputes a df and returns a dataframe with the original and imputed values
+    """
+    imp = IterativeImputer(verbose=verbose, **kwargs)
+    imp.fit_transform(df)
+    imputed_df = imp.transform(df)
+    return pd.DataFrame(imputed_df, columns=df.columns,index=df.index)
+
+
+
+def print_missing_percentages(df) +
+
+

Max, min and mean number of missing values for the columns

+
+ +Expand source code + +
def print_missing_percentages(df):
+    """
+    Max, min and mean number of missing values for the columns
+    """
+    percent_missing = df.isnull().sum() * 100 / len(df)
+    max_missing = percent_missing.max()
+    min_missing = percent_missing.min()
+    mean_missing = percent_missing.mean()
+    print("Max, min and mean number of missing values for the columns")
+    print("Max:", max_missing,'%')
+    print("Min:", min_missing,'%')
+    print("Mean:", mean_missing,'%')
+    return min_missing, max_missing
+
+
+
+
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/unlabeled_preprocessing/index.html b/documentation/WaterSecurity/unlabeled_preprocessing/index.html new file mode 100644 index 0000000..2e19913 --- /dev/null +++ b/documentation/WaterSecurity/unlabeled_preprocessing/index.html @@ -0,0 +1,65 @@ + + + + + + +WaterSecurity.unlabeled_preprocessing API documentation + + + + + + + + + + + +
+ + +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/utils/geo.html b/documentation/WaterSecurity/utils/geo.html new file mode 100644 index 0000000..917fb00 --- /dev/null +++ b/documentation/WaterSecurity/utils/geo.html @@ -0,0 +1,470 @@ + + + + + + +WaterSecurity.utils.geo API documentation + + + + + + + + + + + +
+
+
+

Module WaterSecurity.utils.geo

+
+
+
+ +Expand source code + +
from math import sqrt
+import requests
+
+import xml.etree.ElementTree as ET
+from typing import TypedDict
+import haversine as hs
+
+TWO_TO_THREE_LETTER_CODE = {
+    "AF": "AFG",
+    "AX": "ALA",
+    "AL": "ALB",
+    "DZ": "DZA",
+    "AS": "ASM",
+    "AD": "AND",
+    "AO": "AGO",
+    "AI": "AIA",
+    "AQ": "ATA",
+    "AG": "ATG",
+    "AR": "ARG",
+    "AM": "ARM",
+    "AW": "ABW",
+    "AU": "AUS",
+    "AT": "AUT",
+    "AZ": "AZE",
+    "BS": "BHS",
+    "BH": "BHR",
+    "BD": "BGD",
+    "BB": "BRB",
+    "BY": "BLR",
+    "BE": "BEL",
+    "BZ": "BLZ",
+    "BJ": "BEN",
+    "BM": "BMU",
+    "BT": "BTN",
+    "BO": "BOL",
+    "BA": "BIH",
+    "BW": "BWA",
+    "BV": "BVT",
+    "BR": "BRA",
+    "IO": "IOT",
+    "BN": "BRN",
+    "BG": "BGR",
+    "BF": "BFA",
+    "BI": "BDI",
+    "KH": "KHM",
+    "CM": "CMR",
+    "CA": "CAN",
+    "CV": "CPV",
+    "KY": "CYM",
+    "CF": "CAF",
+    "TD": "TCD",
+    "CL": "CHL",
+    "CN": "CHN",
+    "CX": "CXR",
+    "CC": "CCK",
+    "CO": "COL",
+    "KM": "COM",
+    "CG": "COG",
+    "CD": "COD",
+    "CK": "COK",
+    "CR": "CRI",
+    "CI": "CIV",
+    "HR": "HRV",
+    "CU": "CUB",
+    "CY": "CYP",
+    "CZ": "CZE",
+    "DK": "DNK",
+    "DJ": "DJI",
+    "DM": "DMA",
+    "DO": "DOM",
+    "EC": "ECU",
+    "EG": "EGY",
+    "SV": "SLV",
+    "GQ": "GNQ",
+    "ER": "ERI",
+    "EE": "EST",
+    "ET": "ETH",
+    "FK": "FLK",
+    "FO": "FRO",
+    "FJ": "FJI",
+    "FI": "FIN",
+    "FR": "FRA",
+    "GF": "GUF",
+    "PF": "PYF",
+    "TF": "ATF",
+    "GA": "GAB",
+    "GM": "GMB",
+    "GE": "GEO",
+    "DE": "DEU",
+    "GH": "GHA",
+    "GI": "GIB",
+    "GR": "GRC",
+    "GL": "GRL",
+    "GD": "GRD",
+    "GP": "GLP",
+    "GU": "GUM",
+    "GT": "GTM",
+    "GG": "GGY",
+    "GN": "GIN",
+    "GW": "GNB",
+    "GY": "GUY",
+    "HT": "HTI",
+    "HM": "HMD",
+    "VA": "VAT",
+    "HN": "HND",
+    "HK": "HKG",
+    "HU": "HUN",
+    "IS": "ISL",
+    "IN": "IND",
+    "ID": "IDN",
+    "IR": "IRN",
+    "IQ": "IRQ",
+    "IE": "IRL",
+    "IM": "IMN",
+    "IL": "ISR",
+    "IT": "ITA",
+    "JM": "JAM",
+    "JP": "JPN",
+    "JE": "JEY",
+    "JO": "JOR",
+    "KZ": "KAZ",
+    "KE": "KEN",
+    "KI": "KIR",
+    "KP": "PRK",
+    "KR": "KOR",
+    "KW": "KWT",
+    "KG": "KGZ",
+    "LA": "LAO",
+    "LV": "LVA",
+    "LB": "LBN",
+    "LS": "LSO",
+    "LR": "LBR",
+    "LY": "LBY",
+    "LI": "LIE",
+    "LT": "LTU",
+    "LU": "LUX",
+    "MO": "MAC",
+    "MK": "MKD",
+    "MG": "MDG",
+    "MW": "MWI",
+    "MY": "MYS",
+    "MV": "MDV",
+    "ML": "MLI",
+    "MT": "MLT",
+    "MH": "MHL",
+    "MQ": "MTQ",
+    "MR": "MRT",
+    "MU": "MUS",
+    "YT": "MYT",
+    "MX": "MEX",
+    "FM": "FSM",
+    "MD": "MDA",
+    "MC": "MCO",
+    "MN": "MNG",
+    "ME": "MNE",
+    "MS": "MSR",
+    "MA": "MAR",
+    "MZ": "MOZ",
+    "MM": "MMR",
+    "NA": "NAM",
+    "NR": "NRU",
+    "NP": "NPL",
+    "NL": "NLD",
+    "AN": "ANT",
+    "NC": "NCL",
+    "NZ": "NZL",
+    "NI": "NIC",
+    "NE": "NER",
+    "NG": "NGA",
+    "NU": "NIU",
+    "NF": "NFK",
+    "MP": "MNP",
+    "NO": "NOR",
+    "OM": "OMN",
+    "PK": "PAK",
+    "PW": "PLW",
+    "PS": "PSE",
+    "PA": "PAN",
+    "PG": "PNG",
+    "PY": "PRY",
+    "PE": "PER",
+    "PH": "PHL",
+    "PN": "PCN",
+    "PL": "POL",
+    "PT": "PRT",
+    "PR": "PRI",
+    "QA": "QAT",
+    "RE": "REU",
+    "RO": "ROU",
+    "RU": "RUS",
+    "RW": "RWA",
+    "BL": "BLM",
+    "SH": "SHN",
+    "KN": "KNA",
+    "LC": "LCA",
+    "MF": "MAF",
+    "PM": "SPM",
+    "VC": "VCT",
+    "WS": "WSM",
+    "SM": "SMR",
+    "ST": "STP",
+    "SA": "SAU",
+    "SN": "SEN",
+    "RS": "SRB",
+    "SC": "SYC",
+    "SL": "SLE",
+    "SG": "SGP",
+    "SK": "SVK",
+    "SI": "SVN",
+    "SB": "SLB",
+    "SO": "SOM",
+    "ZA": "ZAF",
+    "GS": "SGS",
+    "ES": "ESP",
+    "LK": "LKA",
+    "SD": "SDN",
+    "SR": "SUR",
+    "SJ": "SJM",
+    "SZ": "SWZ",
+    "SE": "SWE",
+    "CH": "CHE",
+    "SY": "SYR",
+    "TW": "TWN",
+    "TJ": "TJK",
+    "TZ": "TZA",
+    "TH": "THA",
+    "TL": "TLS",
+    "TG": "TGO",
+    "TK": "TKL",
+    "TO": "TON",
+    "TT": "TTO",
+    "TN": "TUN",
+    "TR": "TUR",
+    "TM": "TKM",
+    "TC": "TCA",
+    "TV": "TUV",
+    "UG": "UGA",
+    "UA": "UKR",
+    "AE": "ARE",
+    "GB": "GBR",
+    "US": "USA",
+    "UM": "UMI",
+    "UY": "URY",
+    "UZ": "UZB",
+    "VU": "VUT",
+    "VE": "VEN",
+    "VN": "VNM",
+    "VG": "VGB",
+    "VI": "VIR",
+    "WF": "WLF",
+    "EH": "ESH",
+    "YE": "YEM",
+    "ZM": "ZMB",
+    "ZW": "ZWE",
+}
+
+
+class PlaceInfo(TypedDict):
+    city: str
+    country: str
+    code: str
+
+
+def get_place(latitude: float, longitude: float) -> PlaceInfo:
+    """
+    Returns city, country and country 3-letter code, given latitude and longitude
+    Raises if the supplied coordinates cannot be matched to a specific country (eg if those refer to sea area)
+    """
+    req = requests.get(
+        f"http://api.geonames.org/findNearbyPlaceName?lat={latitude}&lng={longitude}&cities=cities15000&username=vaslem"
+    )
+    tree = ET.fromstring(req.text)
+    geoname = tree.find("geoname")
+    try:
+        city = geoname.find("toponymName").text
+        country = geoname.find("countryName").text
+        code = geoname.find("countryCode").text
+        return {
+            "city": city,
+            "country": country,
+            "code": TWO_TO_THREE_LETTER_CODE[code],
+        }
+    except AttributeError:
+        raise
+
+
+def is_close(loc1, loc2, thres: float = 3) -> bool:
+    """
+    Accepts 2 points defined by 2 coordinates (iterables of size 2) and based on a distance threshold (in km),
+    returns whether those points are close or not to each other
+    """
+    return hs.haversine(loc1, loc2) < thres
+
+
+
+
+
+
+
+

Functions

+
+
+def get_place(latitude: float, longitude: float) ‑> PlaceInfo +
+
+

Returns city, country and country 3-letter code, given latitude and longitude +Raises if the supplied coordinates cannot be matched to a specific country (eg if those refer to sea area)

+
+ +Expand source code + +
def get_place(latitude: float, longitude: float) -> PlaceInfo:
+    """
+    Returns city, country and country 3-letter code, given latitude and longitude
+    Raises if the supplied coordinates cannot be matched to a specific country (eg if those refer to sea area)
+    """
+    req = requests.get(
+        f"http://api.geonames.org/findNearbyPlaceName?lat={latitude}&lng={longitude}&cities=cities15000&username=vaslem"
+    )
+    tree = ET.fromstring(req.text)
+    geoname = tree.find("geoname")
+    try:
+        city = geoname.find("toponymName").text
+        country = geoname.find("countryName").text
+        code = geoname.find("countryCode").text
+        return {
+            "city": city,
+            "country": country,
+            "code": TWO_TO_THREE_LETTER_CODE[code],
+        }
+    except AttributeError:
+        raise
+
+
+
+def is_close(loc1, loc2, thres: float = 3) ‑> bool +
+
+

Accepts 2 points defined by 2 coordinates (iterables of size 2) and based on a distance threshold (in km), +returns whether those points are close or not to each other

+
+ +Expand source code + +
def is_close(loc1, loc2, thres: float = 3) -> bool:
+    """
+    Accepts 2 points defined by 2 coordinates (iterables of size 2) and based on a distance threshold (in km),
+    returns whether those points are close or not to each other
+    """
+    return hs.haversine(loc1, loc2) < thres
+
+
+
+
+
+

Classes

+
+
+class PlaceInfo +(*args, **kwargs) +
+
+

dict() -> new empty dictionary +dict(mapping) -> new dictionary initialized from a mapping object's +(key, value) pairs +dict(iterable) -> new dictionary initialized as if via: +d = {} +for k, v in iterable: +d[k] = v +dict(**kwargs) -> new dictionary initialized with the name=value pairs +in the keyword argument list. +For example: +dict(one=1, two=2)

+
+ +Expand source code + +
class PlaceInfo(TypedDict):
+    city: str
+    country: str
+    code: str
+
+

Ancestors

+
    +
  • builtins.dict
  • +
+

Class variables

+
+
var city : str
+
+
+
+
var code : str
+
+
+
+
var country : str
+
+
+
+
+
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/utils/index.html b/documentation/WaterSecurity/utils/index.html new file mode 100644 index 0000000..fa88845 --- /dev/null +++ b/documentation/WaterSecurity/utils/index.html @@ -0,0 +1,70 @@ + + + + + + +WaterSecurity.utils API documentation + + + + + + + + + + + +
+ + +
+ + + \ No newline at end of file diff --git a/documentation/WaterSecurity/utils/nlp.html b/documentation/WaterSecurity/utils/nlp.html new file mode 100644 index 0000000..61358c8 --- /dev/null +++ b/documentation/WaterSecurity/utils/nlp.html @@ -0,0 +1,287 @@ + + + + + + +WaterSecurity.utils.nlp API documentation + + + + + + + + + + + +
+
+
+

Module WaterSecurity.utils.nlp

+
+
+
+ +Expand source code + +
import numpy as np
+import spacy
+
+try:
+    nlp = spacy.load("en_core_web_md")
+except:
+    spacy.cli.download("en_core_web_md")
+    nlp = spacy.load("en_core_web_md")
+
+from sklearn.base import BaseEstimator, TransformerMixin
+
+
+class SimilarityAnalysis(BaseEstimator, TransformerMixin):
+    """
+    Creates similarity matrix to the provided pandas series. Can be fitted to a specific data.
+    The computed non empty spacy vectors will then be used as reference to compare with another
+    dataset.
+    """
+
+    def __init__(self):
+        self.similarity_vectors = None
+
+    def fit(self, description):
+        """
+        Creates an sxs matrix
+        """
+        ret = description.apply(lambda x: nlp(".".join(x)))
+        self.similarity_vectors = [x for x in ret if x]
+        return self
+
+    def transform(self, description):
+        """
+        Produces a nxs matrix
+        """
+        ret = description.apply(lambda x: nlp(".".join(x)))
+        ret = np.vstack(
+            ret.apply(
+                lambda x: [
+                    (x.similarity(y) if x else np.nan) for y in self.similarity_vectors
+                ]
+            )
+        )
+        return ret
+
+    def fit_transform(self, description):
+        """
+        Produces a nxn matrix
+        """
+        ret = description.apply(lambda x: nlp(".".join(x)))
+        self.similarity_vectors = [x for x in ret if x]
+        ret = ret.apply(
+            lambda x: [
+                (x.similarity(y) if x else np.nan) for y in self.similarity_vectors
+            ]
+        )
+        ret = np.vstack(ret)
+        return ret
+
+
+def create_sim_vector(description):
+    return SimilarityAnalysis().fit_transform(description)
+
+
+
+
+
+
+
+

Functions

+
+
+def create_sim_vector(description) +
+
+
+
+ +Expand source code + +
def create_sim_vector(description):
+    return SimilarityAnalysis().fit_transform(description)
+
+
+
+
+
+

Classes

+
+
+class SimilarityAnalysis +
+
+

Creates similarity matrix to the provided pandas series. Can be fitted to a specific data. +The computed non empty spacy vectors will then be used as reference to compare with another +dataset.

+
+ +Expand source code + +
class SimilarityAnalysis(BaseEstimator, TransformerMixin):
+    """
+    Creates similarity matrix to the provided pandas series. Can be fitted to a specific data.
+    The computed non empty spacy vectors will then be used as reference to compare with another
+    dataset.
+    """
+
+    def __init__(self):
+        self.similarity_vectors = None
+
+    def fit(self, description):
+        """
+        Creates an sxs matrix
+        """
+        ret = description.apply(lambda x: nlp(".".join(x)))
+        self.similarity_vectors = [x for x in ret if x]
+        return self
+
+    def transform(self, description):
+        """
+        Produces a nxs matrix
+        """
+        ret = description.apply(lambda x: nlp(".".join(x)))
+        ret = np.vstack(
+            ret.apply(
+                lambda x: [
+                    (x.similarity(y) if x else np.nan) for y in self.similarity_vectors
+                ]
+            )
+        )
+        return ret
+
+    def fit_transform(self, description):
+        """
+        Produces a nxn matrix
+        """
+        ret = description.apply(lambda x: nlp(".".join(x)))
+        self.similarity_vectors = [x for x in ret if x]
+        ret = ret.apply(
+            lambda x: [
+                (x.similarity(y) if x else np.nan) for y in self.similarity_vectors
+            ]
+        )
+        ret = np.vstack(ret)
+        return ret
+
+

Ancestors

+
    +
  • sklearn.base.BaseEstimator
  • +
  • sklearn.base.TransformerMixin
  • +
+

Methods

+
+
+def fit(self, description) +
+
+

Creates an sxs matrix

+
+ +Expand source code + +
def fit(self, description):
+    """
+    Creates an sxs matrix
+    """
+    ret = description.apply(lambda x: nlp(".".join(x)))
+    self.similarity_vectors = [x for x in ret if x]
+    return self
+
+
+
+def fit_transform(self, description) +
+
+

Produces a nxn matrix

+
+ +Expand source code + +
def fit_transform(self, description):
+    """
+    Produces a nxn matrix
+    """
+    ret = description.apply(lambda x: nlp(".".join(x)))
+    self.similarity_vectors = [x for x in ret if x]
+    ret = ret.apply(
+        lambda x: [
+            (x.similarity(y) if x else np.nan) for y in self.similarity_vectors
+        ]
+    )
+    ret = np.vstack(ret)
+    return ret
+
+
+
+def transform(self, description) +
+
+

Produces a nxs matrix

+
+ +Expand source code + +
def transform(self, description):
+    """
+    Produces a nxs matrix
+    """
+    ret = description.apply(lambda x: nlp(".".join(x)))
+    ret = np.vstack(
+        ret.apply(
+            lambda x: [
+                (x.similarity(y) if x else np.nan) for y in self.similarity_vectors
+            ]
+        )
+    )
+    return ret
+
+
+
+
+
+
+
+ +
+ + + \ No newline at end of file diff --git a/generate_documentation.py b/generate_documentation.py index 1d6a142..b242e07 100644 --- a/generate_documentation.py +++ b/generate_documentation.py @@ -2,7 +2,10 @@ import os context = pdoc.Context() -modules = pdoc.Module(".", context=context, skip_errors=True) +with open(".docignore","r") as rf: + ignore_strings = rf.read().splitlines() + +modules = pdoc.Module(".", context=context, skip_errors=True, docfilter=lambda x: x.name not in ignore_strings) pdoc.link_inheritance(context) @@ -18,4 +21,5 @@ def recursive_htmls(mod): fname = f"documentation/{'/'.join(module_name.split('.'))}.html" os.makedirs(os.path.dirname(fname), exist_ok=True) with open(fname,"w", encoding="utf-8") as f: - f.writelines(html) \ No newline at end of file + f.writelines(html) + From c04a6d7b8b20345c49ad095f97a9d58df930d28c Mon Sep 17 00:00:00 2001 From: Joachim Bache-Mathiesen Date: Wed, 19 May 2021 19:45:27 +0200 Subject: [PATCH 4/6] Remove changes to prep_hdro --- unlabeled_preprocessing/prep_hdro_v2.ipynb | 2358 +++++++++----------- 1 file changed, 1072 insertions(+), 1286 deletions(-) diff --git a/unlabeled_preprocessing/prep_hdro_v2.ipynb b/unlabeled_preprocessing/prep_hdro_v2.ipynb index 6cf9b35..820f22f 100644 --- a/unlabeled_preprocessing/prep_hdro_v2.ipynb +++ b/unlabeled_preprocessing/prep_hdro_v2.ipynb @@ -7,9 +7,16 @@ "outputs": [ { "output_type": "stream", - "name": "stderr", + "name": "stdout", "text": [ - "..\\data\\unlabeled\\raw\\__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)\n..\\data\\unlabeled\\raw\\__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)\n..\\data\\unlabeled\\raw\\__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)\n" + "Something went wrong loading the Economic Fitness Dataset [Errno 2] File b'../data/unlabeled/raw/Economic_Fitness_CSV\\\\Country.csv' does not exist: b'../data/unlabeled/raw/Economic_Fitness_CSV\\\\Country.csv'\n", + "Something went wrong loading the Education Dataset [Errno 2] File b'../data/unlabeled/raw/Edstats_csv/EdStatsCountry.csv' does not exist: b'../data/unlabeled/raw/Edstats_csv/EdStatsCountry.csv'\n", + "../data/unlabeled/raw/__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n", + " aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)\n", + "../data/unlabeled/raw/__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n", + " aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)\n", + "../data/unlabeled/raw/__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.\n", + " aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)\n" ] } ], @@ -31,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": { "tags": [] }, @@ -51,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": { "tags": [] }, @@ -309,10 +316,10 @@ "\n", "[195 rows x 98 columns]" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gross enrolment ratio, pre-primary (% of preschool-age children)Percentage of primary schools with access to the internetPercentage of secondary schools with access to the internetGross enrolment ratio, tertiary (% of tertiary school-age population)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, female (%)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, male (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are female (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are male (%)Primary school teachers trained to teach (%)Pupil-teacher ratio, primary school (pupils per teacher)
AFG26.08013.22036.9201.9486.00627.24468.95779.72638.04225.8...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AGO30.23223.13338.0564.0236.35930.000150.52665.99531.82566.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ALB93.17493.70092.4979.70210.61429.50819.64252.8522.88161.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AND72.32771.48473.32710.43910.56446.429NaNNaN0.07788.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ARG57.15859.16154.82811.12310.72939.87762.78221.80544.78192.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
WSM74.94279.12771.583NaNNaN10.00023.88629.9830.19718.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEM28.02019.92036.9182.8805.1460.97160.35245.62729.16237.3...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZAF75.47874.97778.20710.03110.29145.33367.90810.29858.55866.9...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZMB44.44038.48854.0686.2838.17617.964120.11278.13417.86144.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZWE64.93559.79270.7838.0668.92334.57186.13564.73914.64532.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n

195 rows × 98 columns

\n
" + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gross enrolment ratio, pre-primary (% of preschool-age children)Percentage of primary schools with access to the internetPercentage of secondary schools with access to the internetGross enrolment ratio, tertiary (% of tertiary school-age population)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, female (%)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, male (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are female (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are male (%)Primary school teachers trained to teach (%)Pupil-teacher ratio, primary school (pupils per teacher)
AFG26.08013.22036.9201.9486.00627.24468.95779.72638.04225.8...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AGO30.23223.13338.0564.0236.35930.000150.52665.99531.82566.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ALB93.17493.70092.4979.70210.61429.50819.64252.8522.88161.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AND72.32771.48473.32710.43910.56446.429NaNNaN0.07788.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ARG57.15859.16154.82811.12310.72939.87762.78221.80544.78192.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
WSM74.94279.12771.583NaNNaN10.00023.88629.9830.19718.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEM28.02019.92036.9182.8805.1460.97160.35245.62729.16237.3...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZAF75.47874.97778.20710.03110.29145.33367.90810.29858.55866.9...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZMB44.44038.48854.0686.2838.17617.964120.11278.13417.86144.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZWE64.93559.79270.7838.0668.92334.57186.13564.73914.64532.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n

195 rows × 98 columns

\n
" }, "metadata": {}, - "execution_count": 6 + "execution_count": 3 } ], "source": [ @@ -331,7 +338,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -349,7 +356,7 @@ ] }, "metadata": {}, - "execution_count": 7 + "execution_count": 4 } ], "source": [ @@ -365,7 +372,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -381,14 +388,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Max, min and mean number of missing values for the columns\nMax: 49.743589743589745 %\nMin: 0.0 %\nMean: 10.616150019135095 %\n" + "Max, min and mean number of missing values for the columns\nMax: 49.743589743589745 %\nMin: 0.0 %\nMean: 10.616150019135096 %\n" ] } ], @@ -398,7 +405,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -567,10 +574,10 @@ "\n", "[8 rows x 67 columns]" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
count175.000000167.000000167.000000174.000000174.000000193.000000185.000000180.000000195.000000195.000000...167.000000178.000000178.000000167.000000167.000000156.000000152.000000156.000000152.000000152.000000
mean61.06807461.73601265.8235878.4800179.13340222.98129548.30934638.25801139.39142659.257436...0.93899414440.99851124458.3313540.7026830.7421860.54506419.38914523.40138519.0025990.595250
std29.61079829.28405026.3953583.4213322.80298411.82878440.52883427.774415146.48585523.231038...0.07455915359.93598623943.4826280.1658670.1431940.1727899.9487189.7440049.7773240.190002
min0.0000001.7380009.0000001.0700002.2560000.1000000.2830000.1440000.01100013.200000...0.488000186.041000640.1050000.2700000.4320000.1760004.4440008.5000004.4240000.232000
25%37.29650036.92500045.3565005.8500006.73375014.76500013.17700012.5382502.08100041.200000...0.9085002925.6142506275.9337500.5770000.6215000.40475010.79225016.60300010.5940000.431500
50%64.82800068.06700070.6820009.1110009.25250021.09400040.53600032.5335008.77200060.000000...0.9650008399.44250016951.3570000.7300000.7600000.52750017.93450021.77950017.5260000.604000
75%89.14500087.99050090.91650011.23050011.55100030.00000070.50400063.34550028.56250078.000000...0.98600022583.77950035488.4760000.8315000.8485000.69175027.61525028.62500027.0125000.767250
max100.000000100.000000100.00000013.88200014.43100055.660000186.53800094.5810001433.784000100.000000...1.03600071387.276000107833.0290000.9490000.9650000.85800045.30700056.99600044.1670000.899000
\n

8 rows × 67 columns

\n
" + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
count175.000000167.000000167.000000174.000000174.000000193.000000185.000000180.000000195.000000195.000000...167.000000178.000000178.000000167.000000167.000000156.000000152.000000156.000000152.000000152.000000
mean61.06807461.73601265.8235878.4800179.13340222.98129548.30934638.25801139.39142659.257436...0.93899414440.99851124458.3313540.7026830.7421860.54506419.38914523.40138519.0025990.595250
std29.61079829.28405026.3953583.4213322.80298411.82878440.52883427.774415146.48585523.231038...0.07455915359.93598623943.4826280.1658670.1431940.1727899.9487189.7440049.7773240.190002
min0.0000001.7380009.0000001.0700002.2560000.1000000.2830000.1440000.01100013.200000...0.488000186.041000640.1050000.2700000.4320000.1760004.4440008.5000004.4240000.232000
25%37.29650036.92500045.3565005.8500006.73375014.76500013.17700012.5382502.08100041.200000...0.9085002925.6142506275.9337500.5770000.6215000.40475010.79225016.60300010.5940000.431500
50%64.82800068.06700070.6820009.1110009.25250021.09400040.53600032.5335008.77200060.000000...0.9650008399.44250016951.3570000.7300000.7600000.52750017.93450021.77950017.5260000.604000
75%89.14500087.99050090.91650011.23050011.55100030.00000070.50400063.34550028.56250078.000000...0.98600022583.77950035488.4760000.8315000.8485000.69175027.61525028.62500027.0125000.767250
max100.000000100.000000100.00000013.88200014.43100055.660000186.53800094.5810001433.784000100.000000...1.03600071387.276000107833.0290000.9490000.9650000.85800045.30700056.99600044.1670000.899000
\n

8 rows × 67 columns

\n
" }, "metadata": {}, - "execution_count": 10 + "execution_count": 7 } ], "source": [ @@ -589,7 +596,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -659,13 +666,22 @@ ] }, "metadata": {}, - "execution_count": 12 + "execution_count": 8 } ], "source": [ + "# Column values are shown\n", + "\n", "df_inicator_values.columns.values" ] }, + { + "source": [ + "## Initial Correlation Matrix" + ], + "cell_type": "markdown", + "metadata": {} + }, { "cell_type": "code", "execution_count": 13, @@ -700,16 +716,23 @@ "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size),cmap='mako')" ] }, + { + "source": [ + "## Division of the columns" + ], + "cell_type": "markdown", + "metadata": {} + }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 9, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "['Population with at least some secondary education (% ages 25 and older)', 'Population with at least some secondary education, female (% ages 25 and older)', 'Population with at least some secondary education, male (% ages 25 and older)', 'Share of seats in parliament (% held by women)', 'Vulnerable employment (% of total employment)', 'Urban population (%)', 'Labour force participation rate (% ages 15 and older), female', 'Labour force participation rate (% ages 15 and older), male', 'Remittances, inflows (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Infants lacking immunization, measles (% of one-year-olds)', 'Infants lacking immunization, DTP (% of one-year-olds)', 'Gross fixed capital formation (% of GDP)', 'Inequality in education (%)', 'Inequality in life expectancy (%)', 'Unemployment, youth (% ages 15?24)', 'Private capital flows (% of GDP)', 'Exports and imports (% of GDP)', 'Unemployment, total (% of labour force)', 'Youth not in school or employment (% ages 15-24)', 'Labour force participation rate (% ages 15 and older)', 'Employment to population ratio (% ages 15 and older)', 'Employment in agriculture (% of total employment)', 'Employment in services (% of total employment)', 'Working poor at PPP$3.20 a day (% of total employment)', 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 'Gross capital formation (% of GDP)', 'Overall loss in HDI due to inequality (%)', 'Inequality in income (%)'] \n 29\n['Gross domestic product (GDP), total (2017 PPP $ billions)', 'GDP per capita (2017 PPP $)', 'Gross national income (GNI) per capita (constant 2017 PPP$)', 'Estimated gross national income per capita, female (2017 PPP $)', 'Estimated gross national income per capita, male (2017 PPP $)'] \n 5\n['Gender Inequality Index (GII)', 'Inequality-adjusted education index', 'Inequality-adjusted life expectancy index', 'Life expectancy index', 'Income index', 'Education index', 'Human Development Index (HDI)', 'Gender Development Index (GDI)', 'Human Development Index (HDI), female', 'Human Development Index (HDI), male', 'Inequality-adjusted income index'] \n 11\n['Mean years of schooling, female (years)', 'Mean years of schooling, male (years)', 'Life expectancy at birth (years)', 'Expected years of schooling (years)', 'Mean years of schooling (years)', 'Life expectancy at birth, female (years)', 'Life expectancy at birth, male (years)', 'Expected years of schooling, female (years)', 'Expected years of schooling, male (years)'] \n 9\n['Adolescent birth rate (births per 1,000 women ages 15-19)', 'Total population (millions)', 'Sex ratio at birth (male to female births)', 'Population ages 15?64 (millions)', 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 'Population ages 65 and older (millions)', 'Population under age 5 (millions)', 'HDI rank', 'Total unemployment rate (female to male ratio)', 'Youth unemployment rate (female to male ratio)', 'Coefficient of human inequality', 'Inequality-adjusted HDI (IHDI)'] \n 13\n67\n" + "['Population with at least some secondary education (% ages 25 and older)', 'Population with at least some secondary education, female (% ages 25 and older)', 'Population with at least some secondary education, male (% ages 25 and older)', 'Share of seats in parliament (% held by women)', 'Vulnerable employment (% of total employment)', 'Urban population (%)', 'Labour force participation rate (% ages 15 and older), female', 'Labour force participation rate (% ages 15 and older), male', 'Remittances, inflows (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Infants lacking immunization, measles (% of one-year-olds)', 'Infants lacking immunization, DTP (% of one-year-olds)', 'Gross fixed capital formation (% of GDP)', 'Inequality in education (%)', 'Inequality in life expectancy (%)', 'Unemployment, youth (% ages 15?24)', 'Private capital flows (% of GDP)', 'Exports and imports (% of GDP)', 'Unemployment, total (% of labour force)', 'Youth not in school or employment (% ages 15-24)', 'Labour force participation rate (% ages 15 and older)', 'Employment to population ratio (% ages 15 and older)', 'Employment in agriculture (% of total employment)', 'Employment in services (% of total employment)', 'Working poor at PPP$3.20 a day (% of total employment)', 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 'Gross capital formation (% of GDP)', 'Overall loss in HDI due to inequality (%)', 'Inequality in income (%)'] \n 29\n['Gross domestic product (GDP), total (2017 PPP $ billions)', 'GDP per capita (2017 PPP $)', 'Gross national income (GNI) per capita (constant 2017 PPP$)', 'Estimated gross national income per capita, female (2017 PPP $)', 'Estimated gross national income per capita, male (2017 PPP $)'] \n 5\n['Gender Inequality Index (GII)', 'Inequality-adjusted education index', 'Inequality-adjusted life expectancy index', 'Life expectancy index', 'Income index', 'Education index', 'Human Development Index (HDI)', 'Gender Development Index (GDI)', 'Human Development Index (HDI), female', 'Human Development Index (HDI), male', 'Inequality-adjusted income index'] \n 11\n['Mean years of schooling, female (years)', 'Mean years of schooling, male (years)', 'Life expectancy at birth (years)', 'Expected years of schooling (years)', 'Mean years of schooling (years)', 'Life expectancy at birth, female (years)', 'Life expectancy at birth, male (years)', 'Expected years of schooling, female (years)', 'Expected years of schooling, male (years)'] \n 9\n['Total population (millions)', 'Population ages 15?64 (millions)', 'Population ages 65 and older (millions)', 'Population under age 5 (millions)'] \n 4\n['Adolescent birth rate (births per 1,000 women ages 15-19)', 'Sex ratio at birth (male to female births)', 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 'HDI rank', 'Total unemployment rate (female to male ratio)', 'Youth unemployment rate (female to male ratio)', 'Coefficient of human inequality', 'Inequality-adjusted HDI (IHDI)'] \n 9\n" ] } ], @@ -718,6 +741,7 @@ "money_columns = []\n", "index_columns = []\n", "year_columns = []\n", + "millions_columns = []\n", "rest = []\n", "\n", "for column in df_inicator_values.columns.values:\n", @@ -729,6 +753,8 @@ " year_columns.append(column)\n", " elif 'index' in column.lower():\n", " index_columns.append(column)\n", + " elif 'millions' in column.lower():\n", + " millions_columns.append(column)\n", " else:\n", " rest.append(column)\n", "\n", @@ -736,9 +762,8 @@ "print(money_columns,'\\n',len(money_columns))\n", "print(index_columns,'\\n',len(index_columns))\n", "print(year_columns,'\\n',len(year_columns))\n", - "print(rest,'\\n',len(rest))\n", - "\n", - "print(len(percentage_columns)+len(money_columns)+len(index_columns)+len(year_columns)+len(rest))" + "print(millions_columns,'\\n',len(millions_columns))\n", + "print(rest,'\\n',len(rest))" ] }, { @@ -750,7 +775,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -758,19 +783,20 @@ "split_df2 = df_inicator_values[money_columns]\n", "split_df3 = df_inicator_values[index_columns]\n", "split_df4 = df_inicator_values[year_columns]\n", - "split_df5 = df_inicator_values[rest]" + "split_df5 = df_inicator_values[millions_columns]\n", + "split_df6 = df_inicator_values[rest]" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 11, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "- Dataframe 1 -\nMax, min and mean number of missing values for the columns\nMax: 49.743589743589745 %\nMin: 0.0 %\nMean: 14.624226348364278 %\n- Dataframe 2 -\nMax, min and mean number of missing values for the columns\nMax: 8.717948717948717 %\nMin: 2.051282051282051 %\nMean: 6.153846153846153 %\n- Dataframe 3 -\nMax, min and mean number of missing values for the columns\nMax: 20.0 %\nMin: 2.051282051282051 %\nMean: 9.790209790209792 %\n- Dataframe 4 -\nMax, min and mean number of missing values for the columns\nMax: 10.76923076923077 %\nMin: 1.0256410256410255 %\nMean: 5.584045584045584 %\n- Dataframe 5 -\nMax, min and mean number of missing values for the columns\nMax: 22.05128205128205 %\nMin: 0.0 %\nMean: 7.57396449704142 %\n" + "- Dataframe 1 -\nMax, min and mean number of missing values for the columns\nMax: 49.743589743589745 %\nMin: 0.0 %\nMean: 14.624226348364274 %\n- Dataframe 2 -\nMax, min and mean number of missing values for the columns\nMax: 8.717948717948717 %\nMin: 2.051282051282051 %\nMean: 6.153846153846153 %\n- Dataframe 3 -\nMax, min and mean number of missing values for the columns\nMax: 20.0 %\nMin: 2.051282051282051 %\nMean: 9.790209790209792 %\n- Dataframe 4 -\nMax, min and mean number of missing values for the columns\nMax: 10.76923076923077 %\nMin: 1.0256410256410255 %\nMean: 5.584045584045585 %\n- Dataframe 5 -\nMax, min and mean number of missing values for the columns\nMax: 5.128205128205129 %\nMin: 0.0 %\nMean: 3.8461538461538467 %\n- Dataframe 6 -\nMax, min and mean number of missing values for the columns\nMax: 22.05128205128205 %\nMin: 3.076923076923077 %\nMean: 9.230769230769232 %\n" ] } ], @@ -784,7 +810,9 @@ "print('- Dataframe 4 -')\n", "min4, max4 = print_missing_percentages(split_df4)\n", "print('- Dataframe 5 -')\n", - "min5, max5 = print_missing_percentages(split_df5)" + "min5, max5 = print_missing_percentages(split_df5)\n", + "print('- Dataframe 6 -')\n", + "min6, max6 = print_missing_percentages(split_df6)" ] }, { @@ -797,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -805,174 +833,125 @@ "name": "stdout", "text": [ "[IterativeImputer] Completing matrix with shape (195, 29)\n", - "[IterativeImputer] Ending imputation round 1/49, elapsed time 0.08\n", - "[IterativeImputer] Change: 228.09157262466636, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 2/49, elapsed time 0.15\n", - "[IterativeImputer] Change: 62.25538442972473, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 3/49, elapsed time 0.21\n", - "[IterativeImputer] Change: 42.97964424882681, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 4/49, elapsed time 0.27\n", - "[IterativeImputer] Change: 32.49599901077046, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 5/49, elapsed time 0.40\n", - "[IterativeImputer] Change: 24.095208515105927, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 6/49, elapsed time 0.46\n", - "[IterativeImputer] Change: 31.682426801495314, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 7/49, elapsed time 0.52\n", - "[IterativeImputer] Change: 140.975818147823, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 8/49, elapsed time 0.57\n", - "[IterativeImputer] Change: 39.46695219020222, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 9/49, elapsed time 0.62\n", - "[IterativeImputer] Change: 49.53774118145801, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 10/49, elapsed time 0.67\n", - "[IterativeImputer] Change: 44.68154513428162, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 11/49, elapsed time 0.72\n", - "[IterativeImputer] Change: 46.60916297395449, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 12/49, elapsed time 0.78\n", - "[IterativeImputer] Change: 21.86554010415628, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 13/49, elapsed time 0.83\n", - "[IterativeImputer] Change: 25.92892647183729, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 14/49, elapsed time 0.88\n", - "[IterativeImputer] Change: 18.83927866098054, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 15/49, elapsed time 0.94\n", - "[IterativeImputer] Change: 38.405696393605794, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 16/49, elapsed time 0.99\n", - "[IterativeImputer] Change: 38.25297377918183, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 17/49, elapsed time 1.04\n", - "[IterativeImputer] Change: 41.19811600250341, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 18/49, elapsed time 1.11\n", - "[IterativeImputer] Change: 34.112333701182216, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 19/49, elapsed time 1.16\n", - "[IterativeImputer] Change: 5.287239305308968, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 20/49, elapsed time 1.21\n", - "[IterativeImputer] Change: 9.819564256199563, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 21/49, elapsed time 1.26\n", - "[IterativeImputer] Change: 19.412604471395234, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 22/49, elapsed time 1.31\n", - "[IterativeImputer] Change: 39.00936641411021, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 23/49, elapsed time 1.36\n", - "[IterativeImputer] Change: 21.32069071919966, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 24/49, elapsed time 1.42\n", - "[IterativeImputer] Change: 27.607026109939916, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 25/49, elapsed time 1.46\n", - "[IterativeImputer] Change: 12.932035632308278, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 26/49, elapsed time 1.52\n", - "[IterativeImputer] Change: 20.500731676727007, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 27/49, elapsed time 1.56\n", - "[IterativeImputer] Change: 37.90038155709468, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 28/49, elapsed time 1.62\n", - "[IterativeImputer] Change: 23.402932009174002, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 29/49, elapsed time 1.67\n", - "[IterativeImputer] Change: 37.483562456385734, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 30/49, elapsed time 1.73\n", - "[IterativeImputer] Change: 38.620880987628844, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 31/49, elapsed time 1.78\n", - "[IterativeImputer] Change: 34.235210757797404, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 32/49, elapsed time 1.83\n", - "[IterativeImputer] Change: 5.958748778584713, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 33/49, elapsed time 1.88\n", - "[IterativeImputer] Change: 12.587404647069434, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 34/49, elapsed time 1.93\n", - "[IterativeImputer] Change: 19.321006684253774, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 35/49, elapsed time 1.98\n", - "[IterativeImputer] Change: 23.179317411983668, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 36/49, elapsed time 2.03\n", - "[IterativeImputer] Change: 16.391282472541278, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 37/49, elapsed time 2.08\n", - "[IterativeImputer] Change: 16.142877556306736, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 38/49, elapsed time 2.13\n", - "[IterativeImputer] Change: 8.395072354352404, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 39/49, elapsed time 2.18\n", - "[IterativeImputer] Change: 6.103784872152183, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 40/49, elapsed time 2.23\n", - "[IterativeImputer] Change: 4.99694813675092, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 41/49, elapsed time 2.27\n", - "[IterativeImputer] Change: 4.166011730288048, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 42/49, elapsed time 2.32\n", - "[IterativeImputer] Change: 3.547339840851814, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 43/49, elapsed time 2.37\n", - "[IterativeImputer] Change: 3.076150838166326, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 44/49, elapsed time 2.43\n", - "[IterativeImputer] Change: 2.704813693700327, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 45/49, elapsed time 2.47\n", - "[IterativeImputer] Change: 2.4026926256614516, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 46/49, elapsed time 2.51\n", - "[IterativeImputer] Change: 2.1506562406006946, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 47/49, elapsed time 2.56\n", - "[IterativeImputer] Change: 1.93535948311116, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 48/49, elapsed time 2.61\n", - "[IterativeImputer] Change: 1.7485725296176398, scaled tolerance: 0.381517 \n", - "[IterativeImputer] Ending imputation round 49/49, elapsed time 2.66\n", - "[IterativeImputer] Change: 1.5846246034457572, scaled tolerance: 0.381517 \n", + "[IterativeImputer] Ending imputation round 1/49, elapsed time 0.22\n", + "[IterativeImputer] Ending imputation round 2/49, elapsed time 0.30\n", + "[IterativeImputer] Ending imputation round 3/49, elapsed time 0.42\n", + "[IterativeImputer] Ending imputation round 4/49, elapsed time 0.49\n", + "[IterativeImputer] Ending imputation round 5/49, elapsed time 0.56\n", + "[IterativeImputer] Ending imputation round 6/49, elapsed time 0.63\n", + "[IterativeImputer] Ending imputation round 7/49, elapsed time 0.73\n", + "[IterativeImputer] Ending imputation round 8/49, elapsed time 0.81\n", + "[IterativeImputer] Ending imputation round 9/49, elapsed time 0.92\n", + "[IterativeImputer] Ending imputation round 10/49, elapsed time 0.97\n", + "[IterativeImputer] Ending imputation round 11/49, elapsed time 1.03\n", + "[IterativeImputer] Ending imputation round 12/49, elapsed time 1.08\n", + "[IterativeImputer] Ending imputation round 13/49, elapsed time 1.13\n", + "[IterativeImputer] Ending imputation round 14/49, elapsed time 1.21\n", + "[IterativeImputer] Ending imputation round 15/49, elapsed time 1.30\n", + "[IterativeImputer] Ending imputation round 16/49, elapsed time 1.41\n", + "[IterativeImputer] Ending imputation round 17/49, elapsed time 1.50\n", + "[IterativeImputer] Ending imputation round 18/49, elapsed time 1.60\n", + "[IterativeImputer] Ending imputation round 19/49, elapsed time 1.69\n", + "[IterativeImputer] Ending imputation round 20/49, elapsed time 1.75\n", + "[IterativeImputer] Ending imputation round 21/49, elapsed time 1.81\n", + "[IterativeImputer] Ending imputation round 22/49, elapsed time 1.86\n", + "[IterativeImputer] Ending imputation round 23/49, elapsed time 1.92\n", + "[IterativeImputer] Ending imputation round 24/49, elapsed time 1.98\n", + "[IterativeImputer] Ending imputation round 25/49, elapsed time 2.06\n", + "[IterativeImputer] Ending imputation round 26/49, elapsed time 2.18\n", + "[IterativeImputer] Ending imputation round 27/49, elapsed time 2.26\n", + "[IterativeImputer] Ending imputation round 28/49, elapsed time 2.35\n", + "[IterativeImputer] Ending imputation round 29/49, elapsed time 2.43\n", + "[IterativeImputer] Ending imputation round 30/49, elapsed time 2.50\n", + "[IterativeImputer] Ending imputation round 31/49, elapsed time 2.57\n", + "[IterativeImputer] Ending imputation round 32/49, elapsed time 2.63\n", + "[IterativeImputer] Ending imputation round 33/49, elapsed time 2.71\n", + "[IterativeImputer] Ending imputation round 34/49, elapsed time 2.77\n", + "[IterativeImputer] Ending imputation round 35/49, elapsed time 2.90\n", + "[IterativeImputer] Ending imputation round 36/49, elapsed time 2.97\n", + "[IterativeImputer] Ending imputation round 37/49, elapsed time 3.09\n", + "[IterativeImputer] Ending imputation round 38/49, elapsed time 3.16\n", + "[IterativeImputer] Ending imputation round 39/49, elapsed time 3.22\n", + "[IterativeImputer] Ending imputation round 40/49, elapsed time 3.27\n", + "[IterativeImputer] Ending imputation round 41/49, elapsed time 3.32\n", + "[IterativeImputer] Ending imputation round 42/49, elapsed time 3.37\n", + "[IterativeImputer] Ending imputation round 43/49, elapsed time 3.44\n", + "[IterativeImputer] Ending imputation round 44/49, elapsed time 3.51\n", + "[IterativeImputer] Ending imputation round 45/49, elapsed time 3.61\n", + "[IterativeImputer] Ending imputation round 46/49, elapsed time 3.68\n", + "[IterativeImputer] Ending imputation round 47/49, elapsed time 3.75\n", + "[IterativeImputer] Ending imputation round 48/49, elapsed time 3.80\n", + "[IterativeImputer] Ending imputation round 49/49, elapsed time 3.86\n", "[IterativeImputer] Completing matrix with shape (195, 29)\n", "[IterativeImputer] Ending imputation round 1/49, elapsed time 0.00\n", "[IterativeImputer] Ending imputation round 2/49, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 3/49, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 4/49, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 4/49, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 5/49, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 6/49, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 7/49, elapsed time 0.03\n", + "[IterativeImputer] Ending imputation round 7/49, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 8/49, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 9/49, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 10/49, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 9/49, elapsed time 0.03\n", + "[IterativeImputer] Ending imputation round 10/49, elapsed time 0.03\n", "[IterativeImputer] Ending imputation round 11/49, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 12/49, elapsed time 0.05\n", - "[IterativeImputer] Ending imputation round 13/49, elapsed time 0.05\n", + "[IterativeImputer] Ending imputation round 12/49, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 13/49, elapsed time 0.04\n", "[IterativeImputer] Ending imputation round 14/49, elapsed time 0.05\n", - "[IterativeImputer] Ending imputation round 15/49, elapsed time 0.06\n", - "[IterativeImputer] Ending imputation round 16/49, elapsed time 0.06\n", - "[IterativeImputer] Ending imputation round 17/49, elapsed time 0.07\n", - "[IterativeImputer] Ending imputation round 18/49, elapsed time 0.07\n", + "[IterativeImputer] Ending imputation round 15/49, elapsed time 0.05\n", + "[IterativeImputer] Ending imputation round 16/49, elapsed time 0.05\n", + "[IterativeImputer] Ending imputation round 17/49, elapsed time 0.06\n", + "[IterativeImputer] Ending imputation round 18/49, elapsed time 0.06\n", "[IterativeImputer] Ending imputation round 19/49, elapsed time 0.07\n", - "[IterativeImputer] Ending imputation round 20/49, elapsed time 0.08\n", - "[IterativeImputer] Ending imputation round 21/49, elapsed time 0.08\n", - "[IterativeImputer] Ending imputation round 22/49, elapsed time 0.09\n", - "[IterativeImputer] Ending imputation round 23/49, elapsed time 0.09\n", - "[IterativeImputer] Ending imputation round 24/49, elapsed time 0.10\n", - "[IterativeImputer] Ending imputation round 25/49, elapsed time 0.10\n", - "[IterativeImputer] Ending imputation round 26/49, elapsed time 0.10\n", - "[IterativeImputer] Ending imputation round 27/49, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 28/49, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 29/49, elapsed time 0.12\n", - "[IterativeImputer] Ending imputation round 30/49, elapsed time 0.12\n", - "[IterativeImputer] Ending imputation round 31/49, elapsed time 0.12\n", - "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n", - "[IterativeImputer] Ending imputation round 32/49, elapsed time 0.13\n", - "[IterativeImputer] Ending imputation round 33/49, elapsed time 0.13\n", - "[IterativeImputer] Ending imputation round 34/49, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 35/49, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 36/49, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 37/49, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 38/49, elapsed time 0.15\n", - "[IterativeImputer] Ending imputation round 39/49, elapsed time 0.16\n", - "[IterativeImputer] Ending imputation round 40/49, elapsed time 0.16\n", - "[IterativeImputer] Ending imputation round 41/49, elapsed time 0.17\n", - "[IterativeImputer] Ending imputation round 42/49, elapsed time 0.17\n", - "[IterativeImputer] Ending imputation round 43/49, elapsed time 0.18\n", - "[IterativeImputer] Ending imputation round 44/49, elapsed time 0.18\n", - "[IterativeImputer] Ending imputation round 45/49, elapsed time 0.19\n", - "[IterativeImputer] Ending imputation round 46/49, elapsed time 0.19\n", - "[IterativeImputer] Ending imputation round 47/49, elapsed time 0.20\n", - "[IterativeImputer] Ending imputation round 48/49, elapsed time 0.20\n", - "[IterativeImputer] Ending imputation round 49/49, elapsed time 0.21\n" + "[IterativeImputer] Ending imputation round 20/49, elapsed time 0.07\n", + "[IterativeImputer] Ending imputation round 21/49, elapsed time 0.07\n", + "[IterativeImputer] Ending imputation round 22/49, elapsed time 0.08\n", + "[IterativeImputer] Ending imputation round 23/49, elapsed time 0.08\n", + "[IterativeImputer] Ending imputation round 24/49, elapsed time 0.08\n", + "[IterativeImputer] Ending imputation round 25/49, elapsed time 0.09\n", + "[IterativeImputer] Ending imputation round 26/49, elapsed time 0.09\n", + "[IterativeImputer] Ending imputation round 27/49, elapsed time 0.09\n", + "[IterativeImputer] Ending imputation round 28/49, elapsed time 0.10\n", + "[IterativeImputer] Ending imputation round 29/49, elapsed time 0.10\n", + "[IterativeImputer] Ending imputation round 30/49, elapsed time 0.10\n", + "[IterativeImputer] Ending imputation round 31/49, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 32/49, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 33/49, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 34/49, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 35/49, elapsed time 0.12\n", + "[IterativeImputer] Ending imputation round 36/49, elapsed time 0.12\n", + "[IterativeImputer] Ending imputation round 37/49, elapsed time 0.12\n", + "[IterativeImputer] Ending imputation round 38/49, elapsed time 0.13\n", + "[IterativeImputer] Ending imputation round 39/49, elapsed time 0.13\n", + "[IterativeImputer] Ending imputation round 40/49, elapsed time 0.13\n", + "[IterativeImputer] Ending imputation round 41/49, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 42/49, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 43/49, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 44/49, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 45/49, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 46/49, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 47/49, elapsed time 0.16\n", + "[IterativeImputer] Ending imputation round 48/49, elapsed time 0.16\n", + "[IterativeImputer] Ending imputation round 49/49, elapsed time 0.16\n", + "/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " \" reached.\", ConvergenceWarning)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 17 + "execution_count": 12 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:25.208277\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", - "image/png": "\n" + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -980,6 +959,9 @@ } ], "source": [ + "if max1 < 10:\n", + " max1 = 10\n", + "\n", "idf1 = impute_df(split_df1, max_iter= int(max1), verbose=2)\n", "size = idf1.shape[1]\n", "corr = idf1.corr()\n", @@ -989,7 +971,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -997,51 +979,47 @@ "name": "stdout", "text": [ "[IterativeImputer] Completing matrix with shape (195, 5)\n", - "[IterativeImputer] Ending imputation round 1/8, elapsed time 0.01\n", - "[IterativeImputer] Change: 457858.9943114285, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 2/8, elapsed time 0.02\n", - "[IterativeImputer] Change: 123566.60351871609, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 3/8, elapsed time 0.03\n", - "[IterativeImputer] Change: 66082.79555085694, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 4/8, elapsed time 0.05\n", - "[IterativeImputer] Change: 31356.06832177031, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 5/8, elapsed time 0.06\n", - "[IterativeImputer] Change: 13275.540026738847, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 6/8, elapsed time 0.07\n", - "[IterativeImputer] Change: 5607.778352352506, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 7/8, elapsed time 0.08\n", - "[IterativeImputer] Change: 4637.325683098177, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 8/8, elapsed time 0.09\n", - "[IterativeImputer] Change: 4069.896702134858, scaled tolerance: 131.03159 \n", + "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.08\n", + "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.10\n", + "[IterativeImputer] Ending imputation round 3/10, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 4/10, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 5/10, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 6/10, elapsed time 0.16\n", + "[IterativeImputer] Ending imputation round 7/10, elapsed time 0.17\n", + "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.18\n", + "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.21\n", + "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.22\n", "[IterativeImputer] Completing matrix with shape (195, 5)\n", - "[IterativeImputer] Ending imputation round 1/8, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 2/8, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 3/8, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 4/8, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 5/8, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 6/8, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 7/8, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 8/8, elapsed time 0.01\n", - "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n" + "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 3/10, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 4/10, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 5/10, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 6/10, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 7/10, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.02\n", + "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.02\n", + "/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " \" reached.\", ConvergenceWarning)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 18 + "execution_count": 13 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:25.919090\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", - "image/png": "\n" + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -1049,6 +1027,9 @@ } ], "source": [ + "if max2 < 10:\n", + " max2 = 10\n", + "\n", "idf2 = impute_df(split_df2, max_iter= int(max2), verbose=2)\n", "size = idf2.shape[1]\n", "corr = idf2.corr()\n", @@ -1058,7 +1039,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -1066,55 +1047,35 @@ "name": "stdout", "text": [ "[IterativeImputer] Completing matrix with shape (195, 11)\n", - "[IterativeImputer] Ending imputation round 1/20, elapsed time 0.02\n", - "[IterativeImputer] Change: 1.3057475071151101, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 2/20, elapsed time 0.04\n", - "[IterativeImputer] Change: 0.2431361335536576, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 3/20, elapsed time 0.06\n", - "[IterativeImputer] Change: 0.050910966788451084, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 4/20, elapsed time 0.08\n", - "[IterativeImputer] Change: 0.024727577272021295, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 5/20, elapsed time 0.10\n", - "[IterativeImputer] Change: 0.01669938231479373, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 6/20, elapsed time 0.12\n", - "[IterativeImputer] Change: 0.014436526598200827, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 7/20, elapsed time 0.14\n", - "[IterativeImputer] Change: 0.013328878116682485, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 8/20, elapsed time 0.16\n", - "[IterativeImputer] Change: 0.012628774076243077, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 9/20, elapsed time 0.17\n", - "[IterativeImputer] Change: 0.012053990511959045, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 10/20, elapsed time 0.19\n", - "[IterativeImputer] Change: 0.011528761105839724, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 11/20, elapsed time 0.21\n", - "[IterativeImputer] Change: 0.011028826842162887, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 12/20, elapsed time 0.23\n", - "[IterativeImputer] Change: 0.010551696384536902, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 13/20, elapsed time 0.25\n", - "[IterativeImputer] Change: 0.010132923628465185, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 14/20, elapsed time 0.26\n", - "[IterativeImputer] Change: 0.009943115736268826, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 15/20, elapsed time 0.28\n", - "[IterativeImputer] Change: 0.009758788813657215, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 16/20, elapsed time 0.30\n", - "[IterativeImputer] Change: 0.00957549042259187, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 17/20, elapsed time 0.31\n", - "[IterativeImputer] Change: 0.009397405580395923, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 18/20, elapsed time 0.34\n", - "[IterativeImputer] Change: 0.009222955401797739, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 19/20, elapsed time 0.36\n", - "[IterativeImputer] Change: 0.009052194265333768, scaled tolerance: 0.001036 \n", - "[IterativeImputer] Ending imputation round 20/20, elapsed time 0.38\n", - "[IterativeImputer] Change: 0.008884981830621363, scaled tolerance: 0.001036 \n", + "[IterativeImputer] Ending imputation round 1/20, elapsed time 0.09\n", + "[IterativeImputer] Ending imputation round 2/20, elapsed time 0.18\n", + "[IterativeImputer] Ending imputation round 3/20, elapsed time 0.20\n", + "[IterativeImputer] Ending imputation round 4/20, elapsed time 0.22\n", + "[IterativeImputer] Ending imputation round 5/20, elapsed time 0.24\n", + "[IterativeImputer] Ending imputation round 6/20, elapsed time 0.26\n", + "[IterativeImputer] Ending imputation round 7/20, elapsed time 0.29\n", + "[IterativeImputer] Ending imputation round 8/20, elapsed time 0.37\n", + "[IterativeImputer] Ending imputation round 9/20, elapsed time 0.39\n", + "[IterativeImputer] Ending imputation round 10/20, elapsed time 0.41\n", + "[IterativeImputer] Ending imputation round 11/20, elapsed time 0.43\n", + "[IterativeImputer] Ending imputation round 12/20, elapsed time 0.45\n", + "[IterativeImputer] Ending imputation round 13/20, elapsed time 0.47\n", + "[IterativeImputer] Ending imputation round 14/20, elapsed time 0.49\n", + "[IterativeImputer] Ending imputation round 15/20, elapsed time 0.54\n", + "[IterativeImputer] Ending imputation round 16/20, elapsed time 0.59\n", + "[IterativeImputer] Ending imputation round 17/20, elapsed time 0.62\n", + "[IterativeImputer] Ending imputation round 18/20, elapsed time 0.64\n", + "[IterativeImputer] Ending imputation round 19/20, elapsed time 0.66\n", + "[IterativeImputer] Ending imputation round 20/20, elapsed time 0.68\n", "[IterativeImputer] Completing matrix with shape (195, 11)\n", "[IterativeImputer] Ending imputation round 1/20, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 2/20, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 3/20, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 4/20, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 2/20, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 3/20, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 4/20, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 5/20, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 6/20, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 7/20, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 8/20, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 8/20, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 9/20, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 10/20, elapsed time 0.02\n", "[IterativeImputer] Ending imputation round 11/20, elapsed time 0.02\n", @@ -1124,29 +1085,29 @@ "[IterativeImputer] Ending imputation round 15/20, elapsed time 0.03\n", "[IterativeImputer] Ending imputation round 16/20, elapsed time 0.03\n", "[IterativeImputer] Ending imputation round 17/20, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 18/20, elapsed time 0.04\n", + "[IterativeImputer] Ending imputation round 18/20, elapsed time 0.03\n", "[IterativeImputer] Ending imputation round 19/20, elapsed time 0.04\n", "[IterativeImputer] Ending imputation round 20/20, elapsed time 0.04\n", - "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n" + "/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " \" reached.\", ConvergenceWarning)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 19 + "execution_count": 14 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:26.720961\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", - "image/png": "\n" + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAy4AAANSCAYAAACDU5oRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nOzde5SuZ1kn6N9dlcSEBKIcDEhwoMfYSuuokGEQZsBuDsZuNdijA9g9Rpe6naV46MN0p4dZOuqsHunVytCrtcctAvHQIkRto82IiortiMLWFiQEBWmEyCEiSiQEQqru+WMX3Zvtrl077971vE9VXddatfZX36He+3srh++37/t53uruAAAAzGxj7QIAAAD2IrgAAADTE1wAAIDpCS4AAMD0BBcAAGB6ggsAADA9wQUAAJjeRXs9oao+I8n1SR6epJO8K8kt3X3bPtcGAACQZI+OS1X90yQvTVJJXpvkdTu3f6Kqbtz/8gAAAJLq7t0frPrDJH+juz962v2XJLm1u6/Z5XXHkhxLkh/8jm947LGveNqFq/gA+IG7P33tEoa65KJau4Th7vrI9tolMMClFx+9f7avuHRz7RKGO/7Ke9YuYbhjX3jJ2iUMt7W9++edw+jue47m/6e+4TGffTD+w33rT839D+Tf+B+nPI97jYptJ/mUJH982v0P23nsjLr7eJLjSeb/xQAAANPbK7h8W5JXVdVbkrxz575PTfJpSZ6zn4UBAAB8zFmDS3f/QlV9epLH5eTi/Epye5LXdffWgPoAAAD23lWsu7eT/NaAWgAA4NDrrbn//n/KBS5xHRcAAOAAEFwAAIDp7TkqBgAAXEBb965dwYGk4wIAAExPcAEAAKZnVAwAAAbq7blHxewqBgAAsJDgAgAATE9wAQAApmeNCwAAjLS1tXYFB5KOCwAAMD3BBQAAmJ5RMQAAGKi35t4OeVY6LgAAwPQEFwAAYHpGxQAAYCSjYovouAAAANMTXAAAgOkZFQMAgIF626jYEjouAADA9AQXAABgekbFAABgpK2ttSs4kHRcAACA6QkuAADA9AQXAABgeta4AADAQL1lO+QldFwAAIDpCS4AAMD0jIoBAMBIRsUW0XEBAACmJ7gAAADTMyoGAAAD9fbW2iUcSDouAADA9AQXAABgekbFAABgIBegXEbHBQAAmN6+d1x+4O5P3+9DTOcbL/vDtUsY6oO/+XNrlzDc1p+/b+0Shtv8pAevXcJwl1/79LVLGG7rvXesXcJwTzzxY2uXMNzVj71h7RKG67vvWruEoT7yjjevXcI6HvOStStgHxkVAwCAkYyKLWJUDAAAmJ7gAgAATM+oGAAADOQClMvouAAAANMTXAAAgOkJLgAAwPSscQEAgJFsh7yIjgsAADA9wQUAAJieUTEAABiojYotouMCAABMT3ABAACmZ1QMAABGMiq2iI4LAAAwPcEFAACYnlExAAAYqLe31i7hQNJxAQAApie4AAAA0zMqBgAAI9lVbBEdFwAAYHqCCwAAMD3BBQAAmJ41LgAAMFBv2Q55CR0XAABgeoILAAAwPaNiAAAwUNsOeREdFwAAYHqCCwAAMD2jYgAAMNK2UbEldFwAAIDpCS4AAMD0jIoBAMBALkC5jI4LAAAwPcEFAACYnlExAAAYyajYIjouAADA9AQXAADgPqmq66rqD6rqrVV14xke/9Sq+tWq+o9V9Yaq+tvne0zBBQAAOGdVtZnk+5N8UZJHJ3l2VT36tKf970le1t2fl+RZSX7gfI+7eI1LVX1Nd7/4fAsAAICjpLfuXbuE8/W4JG/t7rclSVW9NMn1Sd50ynM6yQN2bl+Z5F3ne9Dz6bh8524PVNWxqjpRVSd+46dvPo9DAAAAI536WX7n69hpT3l4knee8v3tO/ed6v9I8ver6vYkr0jyzedb11k7LlX1ht0eSnLVbq/r7uNJjifJD5z4/V5cHQAAMNSpn+V3UWd62WnfPzvJS7r7e6vq85P8aFV9VndvL61rr1Gxq5J8YZI/P+3+SvKbSw8KAABH1sHfDvn2JI845fur81dHwb42yXVJ0t2vqapLkzw4yR1LD7rXqNjPJ7miu//4tK+3J/m1pQcFAAAOrNcluaaqHlVVl+Tk4vtbTnvOO5I8JUmq6jOTXJrkT8/noGftuHT3157lsa88nwMDAAAHT3ffW1XPSfLKJJtJXtTdt1bVdyU50d23JPlHSX6oqv5BTo6RfXV3n9cSksW7igEAAPddH/xRsXT3K3Jy0f2p9337KbfflOSJF/KYruMCAABMT3ABAACmZ1QMAAAG6u2DPyq2Bh0XAABgeoILAAAwPaNiAAAw0iHYVWwNOi4AAMD0BBcAAGB6RsUAAGCgw3AByjXouAAAANMTXAAAgOkJLgAAwPSscQEAgIF6a3vtEg4kHRcAAGB6ggsAADA9o2IAADCSUbFFdFwAAIDpCS4AAMD0jIoBAMBAvbW1dgkHko4LAAAwPcEFAACYnlExAAAYqLd67RIOJB0XAABgeoILAAAwPaNiAAAwULsA5SI6LgAAwPQEFwAAYHqCCwAAMD1rXAAAYCBrXJbRcQEAAKYnuAAAANMzKgYAAAP1dq9dwoGk4wIAAExPcAEAAKa376Nil1xU+32I6XzwN39u7RKGuuIJX7J2CcP11tbaJQx39xtevXYJwx3F93zxw69Zu4Th3vPOS9cuYbhPuevOtUsYri7+hLVLGGrjssvXLoGz6C2jYkvouAAAANMTXAAAgOnZVQwAAAbqozdxfkHouAAAANMTXAAAgOkZFQMAgIHsKraMjgsAADA9wQUAAJieUTEAABhoe3vtCg4mHRcAAGB6ggsAADA9wQUAAJieNS4AADBQb61dwcGk4wIAAExPcAEAAKZnVAwAAAYyKraMjgsAADA9wQUAAJieUTEAABhoe3vtCg4mHRcAAGB6ggsAADA9o2IAADCQXcWW0XEBAACmJ7gAAADTMyoGAAADbW/X2iUcSDouAADA9AQXAABgeoILAAAwPWtcAABgoO3ttSs4mHRcAACA6QkuAADA9IyKAQDAQL21dgUHk44LAAAwPcEFAACYnlExAAAYaHu71i7hQNJxAQAApie4AAAA0zMqBgAAA23bVWwRHRcAAGB6ewaXqvqMqnpKVV1x2v3X7V9ZAAAA/8VZg0tVfUuSn03yzUneWFXXn/LwPz/L645V1YmqOvHrP/XyC1MpAAAcAtvbNfXXrPZa4/L1SR7b3R+sqkcmubmqHtndL0iy67vq7uNJjifJC3/vjX2BagUAAI6ovYLLZnd/MEm6++1V9QU5GV7+q5wluAAAAFxIe61xeU9Vfe7HvtkJMV+c5MFJPns/CwMAAPiYvTouX5Xk3lPv6O57k3xVVf3gvlUFAACHVE+8jmRmZw0u3X37WR77/y58OQAAAH+V67gAAADT22tUDAAAuIC2t9eu4GDScQEAAKYnuAAAANMzKgYAAAPNfHX6mem4AAAA0xNcAACA6RkVAwCAgYyKLaPjAgAATE9wAQAApmdUDAAABtoyKraIjgsAADA9wQUAAJieUTEAABjIrmLL6LgAAADTE1wAAIDpCS4AAMD0rHEBAICBttsalyV0XAAAgOkJLgAAwPSMigEAwEDb22tXcDDpuAAAANMTXAAAgOkZFQMAgIG27Cq2iI4LAAAwPcEFAACYnlExAAAYaHvbqNgSOi4AAMD0BBcAAGB6RsUAAGAgu4oto+MCAABMT3ABAACmJ7gAAADTs8YFAAAG2rbGZREdFwAAYHr73nG56yPb+32I6Wz9+fvWLmGo3tpau4ThanNz7RKG2777rrVLGK4//KG1Sxhu80FH7/f8wY8cveGD7bs+sHYJw21cfuXaJQx1FP+bzeF39P5rDQAAK7Id8jJGxQAAgOkJLgAAwPSMigEAwEBbvXYFB5OOCwAAMD3BBQAAmJ5RMQAAGMgFKJfRcQEAAKYnuAAAANMzKgYAAAO5AOUyOi4AAMD0BBcAAGB6RsUAAGAgF6BcRscFAACYnuACAABMT3ABAACmZ40LAAAMtBXbIS+h4wIAAExPcAEAAKZnVAwAAAayHfIyOi4AAMD0BBcAAGB6RsUAAGCgrbULOKB0XAAAgOkJLgAAwPSMigEAwEBGxZbRcQEAAKYnuAAAANMzKgYAAANtpdYu4UDScQEAAKYnuAAAANMTXAAAgOlZ4wIAAANtda9dwoGk4wIAAExPcAEAAKZnVAwAAAbaWruAA0rHBQAAmJ7gAgAA3CdVdV1V/UFVvbWqbjzL8768qrqqrj3fY+45KlZVj0vS3f26qnp0kuuSvLm7X3G+BwcAgKPmoI+KVdVmku9P8rQktyd5XVXd0t1vOu1590/yLUl++0Ic96wdl6r6jiT/Ksm/qar/K8m/TnJFkhur6rkXogAAAOBAeVySt3b327r7niQvTXL9GZ733Un+RZIPX4iD7jUq9uVJnpjkSUm+Kckzuvu7knxhkmfu9qKqOlZVJ6rqxG/+u5svRJ0AAMAAp36W3/k6dtpTHp7knad8f/vOfaf+jM9L8oju/vkLVddeo2L3dvdWkg9V1R91951J0t13V9X2bi/q7uNJjifJC377Da6wAwAAO2YfFTv1s/wu6kwv+88PVm0keX6Sr76Qde3Vcbmnqu63c/uxpxRzZZJdgwsAAHBo3Z7kEad8f3WSd53y/f2TfFaSX6uqtyd5fJJbzneB/l7B5Und/aEk6e5Tg8rFSW44nwMDAAAH0uuSXFNVj6qqS5I8K8ktH3uwuz/Q3Q/u7kd29yOT/FaSL+3uE+dz0LOOinX3R3a5/31J3nc+BwYAgKNoKwd7JUV331tVz0nyyiSbSV7U3bdW1XclOdHdt5z9Jyyz53bIAAAAp9q5NMorTrvv23d57hdciGO6ACUAADA9wQUAAJieUTEAABho9u2QZ6XjAgAATE9wAQAApmdUDAAABtrqg70d8lp0XAAAgOkJLgAAwPSMigEAwEB2FVtGxwUAAJie4AIAAEzPqBgAAAy0FbuKLaHjAgAATE9wAQAApmdUDAAABjIqtoyOCwAAMD3BBQAAmJ5RMQAAGMgFKJfRcQEAAKYnuAAAANMTXAAAgOlZ4wIAAANtte2Ql9BxAQAApie4AAAA0zMqBgAAA23FqNgSOi4AAMD0BBcAAGB6RsUAAGAgo2LL6LgAAADTE1wAAIDpGRUDAICBtl2AchEdFwAAYHqCCwAAMD2jYgAAMJBdxZYRXPbB5ic9eO0Shrr7Da9eu4Thtu++a+0ShrviCV+ydgnD3f36o/fP9kUP+pS1SxjuIVd+eO0Shrv4oY9au4TxNo/WR55LL7187RLggjMqBgAATE9wAQAApne0+qYAALAya1yW0XEBAACmJ7gAAADTMyoGAAADbbVRsSV0XAAAgOkJLgAAwPSMigEAwEB2FVtGxwUAAJie4AIAAEzPqBgAAAy0bVexRXRcAACA6QkuAADA9IyKAQDAQHYVW0bHBQAAmJ7gAgAATM+oGAAADGRUbBkdFwAAYHqCCwAAMD3BBQAAmJ41LgAAMNB2W+OyhI4LAAAwPcEFAACYnlExAAAYyHbIy+i4AAAA0xNcAACA6RkVAwCAgbbsKraIjgsAADA9wQUAAJieUTEAABho265ii+i4AAAA0xNcAACA6RkVAwCAgewqtoyOCwAAMD3BBQAAmJ7gAgAATM8aFwAAGGjbGpdFdFwAAIDp3efgUlU/sh+FAAAA7Oaso2JVdcvpdyX5m1X1iUnS3V+6X4UBAMBhtBWjYkvs1XG5OsmdSb4vyffufP3lKbfPqKqOVdWJqjrxm//u5gtVKwAAcETtFVyuTfI7SZ6b5APd/WtJ7u7uV3f3q3d7UXcf7+5ru/vaJzzjyy9ctQAAwJF01lGx7t5O8vyqevnOn+/d6zUAAMDutnt77RIOpHMKId19e5KvqKq/k5OjYwAAAMPcp+5Jd//7JP9+n2oBAAA4I2NfAAAw0LZdxRZxAUoAAGB6ggsAADA9o2IAADDQVhsVW0LHBQAAmJ7gAgAATE9wAQAApmeNCwAADGQ75GV0XAAAgOkJLgAAwPSMigEAwEDbtkNeRMcFAACYnuACAABMz6gYAAAMtL12AQeUjgsAADA9wQUAAJieUTEAABjIrmLL6LgAAADTE1wAAIDpGRUDAICBtmNUbAkdFwAAYHqCCwAAMD2jYgAAMJBdxZbRcQEAAKYnuAAAANMTXAAAgOlZ4wIAAAPZDnkZHRcAAGB6ggsAADA9o2IAADCQUbFldFwAAIDpCS4AAMD0jIoBAMBA2ybFFtFxAQAApie4AAAA09v3UbFLL679PsR0Lr/26WuXMNTdb3j12iUM1x/+0NolDHf364/e7/myz3ny2iUMt3Xn+9cuYbgrH3Lv2iWMt3kEJ8W3jtbv+Z533LZ2Cas4KP9k21VsGR0XAABgeoILAAAwvYPSUQMAgEPBqNgyOi4AAMD0BBcAAGB6ggsAADA9a1wAAGCgtsRlER0XAABgeoILAAAwPaNiAAAwkO2Ql9FxAQAApie4AAAA0zMqBgAAAxkUW0bHBQAAmJ7gAgAATM+oGAAADGRXsWV0XAAAgOkJLgAAwPSMigEAwEAGxZbRcQEAAKYnuAAAANMzKgYAAAMZFVtGxwUAAJie4AIAAExPcAEAAKZnjQsAAAy0bZXLIjouAADA9AQXAABgekbFAABgIINiy+i4AAAA0xNcAACA6RkVAwCAgYyKLaPjAgAATE9wAQAApmdUDAAABjIqtoyOCwAAMD3BBQAAmJ7gAgAAA/XkX+eiqq6rqj+oqrdW1Y1nePwTquondx7/7ap65Dn+6F3dp+BSVf99Vf3Dqnr6+R4YAAA4eKpqM8n3J/miJI9O8uyqevRpT/vaJH/e3Z+W5PlJnne+xz1rcKmq155y++uT/Osk90/yHWdKVgAAwKH3uCRv7e63dfc9SV6a5PrTnnN9kpt2bt+c5ClVVedz0L06LhefcvtYkqd193cmeXqSv7fbi6rqWFWdqKoT/+Gnbz6f+gAAgIFO/Sy/83XstKc8PMk7T/n+9p37zvic7r43yQeSPOh86tprO+SNqvqknAw41d1/unPwu6rq3t1e1N3HkxxPkh/83d+34xsAABwQp36W38WZOienf+Y/l+fcJ3sFlyuT/M7OgbuqHtrd76mqK3YpBgAAONxuT/KIU76/Osm7dnnO7VV1UU7mivefz0HPGly6+5G7PLSd5MvO58AAAMCB9Lok11TVo5L8SZJnJfnK055zS5IbkrwmyZcn+ZXu3teOyxl194eS/KfzOTAAABxNB3twqbvvrarnJHllks0kL+ruW6vqu5Kc6O5bkvxwkh+tqrfmZKflWed73EXBBQAAOLq6+xVJXnHafd9+yu0PJ/mKC3lMF6AEAACmp+MCAABDHexRsbXouAAAANMTXAAAgOkZFQMAgKGMii2h4wIAAExPcAEAAKZnVAwAAEYyKbaIjgsAADA9wQUAAJie4AIAAEzPGhcAABhK72AJZw0AAJie4AIAAEzPqBgAAAxU9kNeRMcFAACYnuACAABMz6gYAACMVEbFltBxAQAApie4AAAA0zMqBgAAA9lVbBkdFwAAYHqCCwAAMD2jYgAAMJTewRLOGgAAMD3BBQAAmJ5RMQAAGKhcgHIRHRcAAGB6ggsAADA9wQUAAJieNS4AADBS6R0s4awBAADTE1wAAIDpGRUDAICBSu9gkX0PLldcurnfh5jO1nvvWLuEoS5++DVrlzDc5oPuWruE4S560KesXcJwW3e+f+0Shtt8wAPXLmG4j37k6F1PYePyK9cuYbjaPFqfR47i/5s5/MQ9AABgekbFAABgoKqj1+m9EHRcAACA6QkuAADA9IyKAQDASC5AuYizBgAATE9wAQAApmdUDAAABiqjYos4awAAwPQEFwAAYHqCCwAAMD1rXAAAYKDSO1jEWQMAAKYnuAAAANMzKgYAAAPZDnkZZw0AAJie4AIAAEzPqBgAAAxUtbl2CQeSjgsAADA9wQUAAJieUTEAABjIrmLLOGsAAMD0BBcAAGB6RsUAAGAgo2LLOGsAAMD0BBcAAGB6RsUAAGAgF6BcRscFAACYnuACAABMT3ABAACmZ40LAAAMZDvkZZw1AABgeoILAAAwPaNiAAAwkO2Ql9FxAQAApie4AAAA0zMqBgAAAxkVW0bHBQAAmJ7gAgAATO+so2JV9d8lua2776yqy5LcmOQxSd6U5J939wcG1AgAAIfGhgtQLrLXWXtRkg/t3H5BkiuTPG/nvhfvY10AAAD/2V7BZaO77925fW13f1t3/0Z3f2eSv7bbi6rqWFWdqKoTv/Kyl12wYgEAgKNpr13F3lhVX9PdL07y+qq6trtPVNWnJ/nobi/q7uNJjifJj7/pTX3hygUAgIPNrmLL7NVx+bokT66qP0ry6CSvqaq3JfmhnccAAAD23Vk7LjuL77+6qu6fk6NhFyW5vbvfO6I4AACA5BwvQNndf5nk9ftcCwAAwBmdU3ABAAAuDGtclrGJNAAAMD3BBQAAmJ5RMQAAGMio2DI6LgAAwPQEFwAAYHpGxQAAYKDaMCq2hI4LAAAwPcEFAACYnlExAAAYaMOuYovouAAAANMTXAAAgOkZFQMAgIFcgHIZHRcAAGB6ggsAADA9wQUAAJieNS4AADCQNS7L6LgAAADTE1wAAIDpGRUDAICBqnwEX0LHBQAAmJ7gAgAATE+fCgAABtqwq9giOi4AAMD0BBcAAGB6RsUAAGCg2jAqtoSOCwAAMD3BBQAAmJ5RMQAAGMgFKJfRcQEAAKYnuAAAANPTpwIAgIHKBSgX0XEBAACmJ7gAAADTE1wAAIDp7fsal+OvvGe/DzGdJ574sbVLGOo977x07RKG++BHjt7ysIdc+eG1Sxjuyofcu3YJw330I7V2CcNd8503rl3CcO+76fvWLmG4t7127QrG+pM7L1u7hFV82e/+47VLOCe2Q15GxwUAAJie4AIAAExPnwoAAAbasB3yIjouAADA9AQXAABgekbFAABgoNrwEXwJHRcAAGB6ggsAADA9fSoAABjIBSiX0XEBAACmJ7gAAADT06cCAICBygUoF9FxAQAApie4AAAA0xNcAACA6VnjAgAAA9kOeRkdFwAAYHqCCwAAMD19KgAAGKg2fARfQscFAACYnuACAABMT58KAAAGsqvYMjouAADA9AQXAABgevpUAAAwklGxRXRcAACA6QkuAADA9PSpAABgIBegXEbHBQAAmJ7gAgAATE+fCgAABnIBymV0XAAAgOkJLgAAwPQEFwAAYHoG7AAAYCTbIS+i4wIAAExPcAEAAKanTwUAACPV5toV7KuqemCSn0zyyCRvT/I/dfef7/LcByS5LcnPdPdzzvZzz9pxqapvqapHLCkYAAA4km5M8qruvibJq3a+3813J3n1ufzQvUbFvjvJb1fVf6iqb6yqh5xTqQAAwFF1fZKbdm7flOQZZ3pSVT02yVVJfvFcfuheweVtSa7OyQDz2CRvqqpfqKobqur+u72oqo5V1YmqOvHu1/zUudQBAABHQm1cNPfXKZ/ld76O3ce3eFV3vztJdv785L9yDqo2knxvkv/1XH/oXmtcuru3czIF/WJVXZzki5I8O8m/THLGDkx3H09yPEme/Pzf63MtBgAAWNepn+V3U1W/nOShZ3joued4mG9M8orufmdVndML9gouH/dTuvujSW5JcktVXXaORQEAAIdIdz91t8eq6r1V9bDufndVPSzJHWd42ucn+R+q6huTXJHkkqr6YHfvuh5mr+DyzLMUe/cerwUAAE5Xh35j31uS3JDke3b+/NnTn9Ddf+9jt6vqq5Nce7bQkuyxxqW7/3BJpQAAwJH1PUmeVlVvSfK0ne9TVddW1QuX/tBDH/cAAIBxuvvPkjzlDPefSPJ1Z7j/JUlestfPFVwAAGCg3vARfIm9tkMGAABYneACAABMT3ABAACmZ8AOAABG2thcu4IDSccFAACYnuACAABMz6gYAACMZFRsER0XAABgeoILAAAwPaNiAAAwUBsVW0THBQAAmJ7gAgAATM+oGAAADGRUbBkdFwAAYHqCCwAAMD2jYgAAMJJRsUV0XAAAgOkJLgAAwPQEFwAAYHrWuAAAwEC9oXewhLMGAABMT3ABAACmZ1QMAAAGatshL6LjAgAATE9wAQAApmdUDAAABtre1DtYwlkDAACmJ7gAAADTMyoGAAADuQDlMs4aAAAwPcEFAACYnlExAAAYyKjYMs4aAAAwvX3vuBz7wkv2+xDTufqxN6xdwlBXJ9m+6861yxhq+64PrF3CcBc/9FFrlzDe5tFrSm9cfuXaJQz3vpu+b+0ShnvwDf9w7RKGe+Az71q7hKGuTfLR29+ydhlwQR29/ytzwR210AIAsxNa5rZtVGwRZw0AAJie4AIAAExPcAEAAKZnjQsAAAzUm3oHSzhrAADA9AQXAABgekbFAABgoN6otUs4kHRcAACA6QkuAADA9IyKAQDAQNubRsWW0HEBAACmJ7gAAADTMyoGAAAD2VVsGR0XAABgeoILAAAwPaNiAAAwkFGxZXRcAACA6QkuAADA9AQXAABgeta4AADAQL25dgUHk44LAAAwPcEFAACYnlExAAAYyHbIy+i4AAAA0xNcAACA6RkVAwCAkbQOFnHaAACA6QkuAADA9IyKAQDASC5AuYiOCwAAMD3BBQAAmJ5RMQAAGEnrYBGnDQAAmJ7gAgAATM+oGAAAjKR1sIjTBgAATE9wAQAApie4AAAA0zvrGpequiTJs5K8q7t/uaq+MskTktyW5Hh3f3RAjQAAcGiU1sEiey3Of/HOc+5XVTckuSLJTyd5SpLHJblhf8sDAADYe1Tss7v7mUm+LMnTk3x5d/9okq9J8nm7vaiqjlXViao68Ssve9mFqxYAADiS9uq4bOyMi12e5H5Jrkzy/iSfkOTi3V7U3ceTHE+SH3/Tm/rClAoAAAdfbfh4vMReweWHk7w5yWaS5yZ5eVW9Lcnjk7x0n2sDAABIskdw6e7nV9VP7tx+V1X9SJKnJvmh7n7tiAIBAAD26riku991yu2/SHLzvlYEAACHmF3FlnHaAIa8cyoAAA2kSURBVACA6QkuAADA9PYcFQMAAC6cjc21KziYdFwAAIDpCS4AAMD0jIoBAMBAG1oHizhtAADA9AQXAABgeoILAAAwPWtcAABgoNrotUs4kHRcAACA6QkuAADA9IyKAQDAQLZDXsZpAwAApie4AAAA0zMqBgAAAxkVW8ZpAwAApie4AAAA0zMqBgAAAxkVW8ZpAwAApie4AAAA0zMqBgAAAxkVW8ZpAwAApie4AAAA0xNcAACA6VnjAgAAA1njsozTBgAATE9wAQAApmdUDAAABtrc6LVLOJB0XAAAgOkJLgAAwPSMigEAwEB2FVvGaQMAAKYnuAAAANMzKgYAAAMZFVtm34PL1vbR2+6t775r7RKGqos/Ye0Shtu4/Mq1Sxhv8wj+PcfWvWtXMFxtbq5dwnBve+3aFYz3wGcerf9PJcnGpZevXcJQmw940NolwAUn7wEAANM7gn+FCgAA69nUOljEaQMAAKYnuAAAANMzKgYAAANt1NoVHEw6LgAAwPQEFwAAYHqCCwAAMD1rXAAAYCDbIS/jtAEAANMTXAAAgOkZFQMAgIE2tA4WcdoAAIDpCS4AAMD0jIoBAMBAdhVbxmkDAACmJ7gAAADTMyoGAAADGRVbxmkDAACmJ7gAAADTMyoGAAADGRVbxmkDAACmJ7gAAADTE1wAAIDpWeMCAAADbWgdLOK0AQAA0xNcAACA6RkVAwCAgTZr7QoOJh0XAABgeoILAAAwPaNiAAAw0KbWwSJOGwAAMD3BBQAAmJ5RMQAAGMio2DJOGwAAMD3BBQAAmJ5RMQAAGOiiDVegXELHBQAAuGCq6oFV9UtV9ZadPz9pl+f9i6q6tapuq6p/VVVnTXR7Bpeq+q+r6h9X1Quq6nur6n+pqiuXvhEAAOBQuzHJq7r7miSv2vn+41TVE5I8Mcl/k+Szkvy3SZ58th961uBSVd+S5P9JcunOD7ssySOSvKaqvuA+vwUAADjiNjfm/roArk9y087tm5I84wzP6ZzMGJck+YQkFyd579l+6F6lfX2S67r7/0zy1CSP7u7nJrkuyfN3e1FVHauqE1V14ldf/vI9DgEAAMzi1M/yO1/H7uOPuKq7350kO39+8ulP6O7XJPnVJO/e+Xpld992th96LovzL0qylZNJ6P47B3pHVV282wu6+3iS40nyI2+8tc/hGAAAwARO/Sy/m6r65SQPPcNDzz2XY1TVpyX5zCRX79z1S1X1pO7+9d1es1dweWGS11XVbyV5UpLn7RzoIUnefy5FAQAAh0t3P3W3x6rqvVX1sO5+d1U9LMkdZ3jalyX5re7+4M5r/t8kj0+ya3A566hYd78gybOT/GKSZ3T3i3fu/9PuftJebwgAAPh4mzX31wVwS5Ibdm7fkORnz/CcdyR5clVdtDPJ9eQkZx0V23P5TXff2t03d/eb72PBAADA0fM9SZ5WVW9J8rSd71NV11bVC3eec3OSP0ry+0len+T13f1zZ/uhLkAJAABcMN39Z0mecob7TyT5up3bW0m+4b78XMEFAAAGukBbDh85ThsAADA9wQUAAJieUTEAABjIqNgyThsAADA9wQUAAJieUTEAABhoc+PCXOXxqNFxAQAApie4AAAA0zMqBgAAA9lVbBmnDQAAmJ7gAgAATE9wAQAApmeNCwAADLRpN+RFdFwAAIDpCS4AAMD0jIoBAMBAmxtmxZbQcQEAAKYnuAAAANMzKgYAAANtah0s4rQBAADTE1wAAIDpGRUDAICB7Cq2jI4LAAAwPcEFAACYnlExAAAYyK5iyzhtAADA9AQXAABgeoILAAAwPWtcAABgoI2yHfISOi4AAMD0BBcAAGB6RsUAAGAg2yEv47QBAADTE1wAAIDp7fuo2N33bO/3IabzkXe8ee0Shtq47PK1Sxhu++671i5huEsvPXq/53vecdvaJQx38cOvWbuE4f7kzsvWLmG4z7n9LWuXMNzmAx60dglDXfTJj1i7BM5ic8OuYkvouAAAANMTXAAAgOnZVQwAAAayq9gyThsAADA9wQUAAJieUTEAABjIrmLL6LgAAADTE1wAAIDpGRUDAICBjIoto+MCAABMT3ABAACmJ7gAAADTs8YFAAAG2tQ6WMRpAwAApie4AAAA0zMqBgAAA23YDnkRHRcAAGB6ggsAADA9o2IAADDQplGxRXRcAACA6QkuAADA9IyKAQDAQC5AuYzTBgAATE9wAQAApmdUDAAABrKr2DI6LgAAwPQEFwAAYHqCCwAAMD1rXAAAYKANa1wW0XEBAACmJ7gAAADTMyoGAAADbWodLOK0AQAA0xNcAACA6RkVAwCAgTbtKraIjgsAADA9wQUAAJieUTEAABjIqNgyOi4AAMD0BBcAAGB61d27P1h1ZZJ/luQZSR6yc/cdSX42yfd091/s8rpjSY7tfHu8u49fsIrvg6o6ttax13LU3vNRe7+J93xUeM9Hg/d8+B2195sczffMGHsFl1cm+ZUkN3X3e3bue2iSG5I8tbufNqTKharqRHdfu3YdIx2193zU3m/iPR8V3vPR4D0ffkft/SZH8z0zxl6jYo/s7ud9LLQkSXe/p7ufl+RT97c0AACAk/YKLn9cVf+kqq762B1VdVVV/dMk79zf0gAAAE7aK7g8M8mDkry6qt5fVe9P8mtJHpjkK/a5tgvhKM5XHrX3fNTeb+I9HxXe89HgPR9+R+39JkfzPTPAWde4nPWFVV/T3S++wPUAAAD8FecTXN7R3da5AAAA++6isz1YVW/Y7aEkV+3yGAAAwAW11xqXq5J8VZIvOcPXn+1vactV1XVV9QdV9daqunHtevZbVb2oqu6oqjeuXcsoVfWIqvrVqrqtqm6tqm9du6b9VlWXVtVrq+r1O+/5O9euaYSq2qyq/1hVP792LaNU1dur6ver6veq6sTa9ey3qvrEqrq5qt688+/0569d036qqr++87v92NedVfVta9e136rqH+z8t+uNVfUTVXXp2jXtt6r61p33e+th/R2f6TNIVT2wqn6pqt6y8+cnrVkjh8deweXnk1zR3X982tfbc3KR/nSqajPJ9yf5oiSPTvLsqnr0ulXtu5ckuW7tIga7N8k/6u7PTPL4JN90BH7PH0nyt7r7c5J8bpLrqurxK9c0wrcmuW3tIlbwN7v7c4/ItRBekOQXuvszknxODvnvu7v/YOd3+7lJHpvkQ0l+ZuWy9lVVPTzJtyS5trs/K8lmkmetW9X+qqrPSvL1SR6Xk/9cf3FVXbNuVfviJfmrn0FuTPKq7r4myat2vofzdtbg0t1f292/sctjX7k/JZ23xyV5a3e/rbvvSfLSJNevXNO+6u5fT/L+tesYqbvf3d2/u3P7L3Pyg87D161qf/VJH9z59uKdr2WL1A6Iqro6yd9J8sK1a2F/VNUDkjwpyQ8nSXff091/sW5VQz0lyR919x+vXcgAFyW5rKouSnK/JO9auZ799plJfqu7P9Td9yZ5dZIvW7mmC26XzyDXJ7lp5/ZNSZ4xtCgOrb06LgfRw/Px15i5PYf8A+1RV1WPTPJ5SX573Ur2387Y1O8luSPJL3X3YX/P/3eSf5Jke+1CBuskv1hVv1NVx9YuZp/9tSR/muTFOyOBL6yqy9cuaqBnJfmJtYvYb939J0n+ZZJ3JHl3kg909y+uW9W+e2OSJ1XVg6rqfkn+dpJHrFzTKFd197uTk3/RmOSTV66HQ+IwBpc6w32H+m+lj7KquiLJTyX5tu6+c+169lt3b+2Ml1yd5HE7owiHUlV9cZI7uvt31q5lBU/s7sfk5MjrN1XVk9YuaB9dlOQxSf5Nd39ekrtyRMZKquqSJF+a5OVr17LfdtY4XJ/kUUk+JcnlVfX3161qf3X3bUmel+SXkvxCktfn5JgzsNBhDC635+P/RuPqHP529JFUVRfnZGj58e7+6bXrGWlnlObXcrjXNj0xyZdW1dtzcuTzb1XVj61b0hjd/a6dP+/IybUPj1u3on11e5LbT+ke3pyTQeYo+KIkv9vd7127kAGemuQ/dfefdvdHk/x0kiesXNO+6+4f7u7HdPeTcnKc6i1r1zTIe6vqYUmy8+cdK9fDIXEYg8vrklxTVY/a+dusZyW5ZeWauMCqqnJyJv627v6+tesZoaoeUlWfuHP7spz8IPDmdavaP939z7r76u5+ZE7+e/wr3X2o/4Y2Sarq8qq6/8duJ3l6To6cHErd/Z4k76yqv75z11OSvGnFkkZ6do7AmNiOdyR5fFXdb+e/30/JId+EIUmq6pN3/vzUJH83R+f3fUuSG3Zu35DkZ1eshUPkrNdxOYi6+96qek6SV+bkriUv6u5bVy5rX1XVTyT5giQPrqrbk3xHd//wulXtuycm+Z+T/P7Omo8k+d+6+xUr1rTfHpbkpp2d8zaSvKy7j8wWwUfIVUl+5uRnu1yU5N929y+sW9K+++bk/2/fjo0IDKIojN5Xg5qUoAAjEGhEDXKJoRAJHWhBAYIVEAqxO/+cU8EN99vZzf592XRLsuy85+fefx7mSda9t/xDa+1cVcckl7yeS12T7Pqu+otTVc2SPJJsWmv33oO+7dMZJMk2yaGqVnlF66LfQqakWvP9AwAAGNsUn4oBAAATI1wAAIDhCRcAAGB4wgUAABiecAEAAIYnXAAAgOEJFwAAYHhPCkWEMFchH0YAAAAASUVORK5CYII=\n" }, "metadata": { "needs_background": "light" @@ -1154,6 +1115,9 @@ } ], "source": [ + "if max3 < 10:\n", + " max3 = 10\n", + "\n", "idf3 = impute_df(split_df3, max_iter= int(max3), verbose=2)\n", "size = idf3.shape[1]\n", "corr = idf3.corr()\n", @@ -1163,7 +1127,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -1171,26 +1135,16 @@ "name": "stdout", "text": [ "[IterativeImputer] Completing matrix with shape (195, 9)\n", - "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.02\n", - "[IterativeImputer] Change: 24.70630011052294, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.04\n", - "[IterativeImputer] Change: 3.081030825722686, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 3/10, elapsed time 0.05\n", - "[IterativeImputer] Change: 1.7974084139622217, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 4/10, elapsed time 0.07\n", - "[IterativeImputer] Change: 1.2013063098503434, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 5/10, elapsed time 0.08\n", - "[IterativeImputer] Change: 0.8709381898134296, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 6/10, elapsed time 0.09\n", - "[IterativeImputer] Change: 0.6848017337664789, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 7/10, elapsed time 0.10\n", - "[IterativeImputer] Change: 0.5789561706855952, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.11\n", - "[IterativeImputer] Change: 0.5398675991534301, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.12\n", - "[IterativeImputer] Change: 0.5228675076225393, scaled tolerance: 0.087747 \n", - "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.13\n", - "[IterativeImputer] Change: 0.5054232450128762, scaled tolerance: 0.087747 \n", + "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.06\n", + "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.08\n", + "[IterativeImputer] Ending imputation round 3/10, elapsed time 0.09\n", + "[IterativeImputer] Ending imputation round 4/10, elapsed time 0.11\n", + "[IterativeImputer] Ending imputation round 5/10, elapsed time 0.12\n", + "[IterativeImputer] Ending imputation round 6/10, elapsed time 0.13\n", + "[IterativeImputer] Ending imputation round 7/10, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.16\n", + "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.18\n", "[IterativeImputer] Completing matrix with shape (195, 9)\n", "[IterativeImputer] Ending imputation round 1/10, elapsed time 0.00\n", "[IterativeImputer] Ending imputation round 2/10, elapsed time 0.00\n", @@ -1202,26 +1156,26 @@ "[IterativeImputer] Ending imputation round 8/10, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 9/10, elapsed time 0.01\n", "[IterativeImputer] Ending imputation round 10/10, elapsed time 0.01\n", - "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n" + "/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", + " \" reached.\", ConvergenceWarning)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 20 + "execution_count": 15 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:27.310512\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", - "image/png": "\n" + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -1229,6 +1183,9 @@ } ], "source": [ + "if max4 < 10:\n", + " max4 = 10\n", + "\n", "idf4 = impute_df(split_df4, max_iter= int(max4), verbose=2)\n", "size = idf4.shape[1]\n", "corr = idf4.corr()\n", @@ -1238,101 +1195,32 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 16, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "[IterativeImputer] Completing matrix with shape (195, 13)\n", - "[IterativeImputer] Ending imputation round 1/22, elapsed time 0.03\n", - "[IterativeImputer] Change: 87.8605447830152, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 2/22, elapsed time 0.05\n", - "[IterativeImputer] Change: 28.586268171421256, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 3/22, elapsed time 0.07\n", - "[IterativeImputer] Change: 11.593324359484521, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 4/22, elapsed time 0.10\n", - "[IterativeImputer] Change: 5.237507958945714, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 5/22, elapsed time 0.12\n", - "[IterativeImputer] Change: 3.9889516059898855, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 6/22, elapsed time 0.13\n", - "[IterativeImputer] Change: 4.3327704567826295, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 7/22, elapsed time 0.15\n", - "[IterativeImputer] Change: 4.743884302961482, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 8/22, elapsed time 0.16\n", - "[IterativeImputer] Change: 4.9041296773902205, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 9/22, elapsed time 0.18\n", - "[IterativeImputer] Change: 4.8867183949047215, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 10/22, elapsed time 0.20\n", - "[IterativeImputer] Change: 4.74618090912474, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 11/22, elapsed time 0.22\n", - "[IterativeImputer] Change: 4.597447396969357, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 12/22, elapsed time 0.24\n", - "[IterativeImputer] Change: 4.383379000147137, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 13/22, elapsed time 0.25\n", - "[IterativeImputer] Change: 4.120519036033551, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 14/22, elapsed time 0.27\n", - "[IterativeImputer] Change: 3.829399094168947, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 15/22, elapsed time 0.28\n", - "[IterativeImputer] Change: 3.5249997877409696, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 16/22, elapsed time 0.30\n", - "[IterativeImputer] Change: 3.218127903389302, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 17/22, elapsed time 0.32\n", - "[IterativeImputer] Change: 2.9164496052546856, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 18/22, elapsed time 0.34\n", - "[IterativeImputer] Change: 2.625268955987952, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 19/22, elapsed time 0.36\n", - "[IterativeImputer] Change: 2.3497527779462066, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 20/22, elapsed time 0.38\n", - "[IterativeImputer] Change: 2.0912457219816023, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 21/22, elapsed time 0.39\n", - "[IterativeImputer] Change: 1.8498070813863305, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Ending imputation round 22/22, elapsed time 0.41\n", - "[IterativeImputer] Change: 1.6257610843838977, scaled tolerance: 1.4337840000000002 \n", - "[IterativeImputer] Completing matrix with shape (195, 13)\n", - "[IterativeImputer] Ending imputation round 1/22, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 2/22, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 3/22, elapsed time 0.00\n", - "[IterativeImputer] Ending imputation round 4/22, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 5/22, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 6/22, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 7/22, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 8/22, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 9/22, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 10/22, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 11/22, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 12/22, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 13/22, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 14/22, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 15/22, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 16/22, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 17/22, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 18/22, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 19/22, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 20/22, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 21/22, elapsed time 0.04\n", - "[IterativeImputer] Ending imputation round 22/22, elapsed time 0.05\n", - "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n" + "[IterativeImputer] Completing matrix with shape (195, 4)\n[IterativeImputer] Ending imputation round 1/10, elapsed time 0.03\n[IterativeImputer] Ending imputation round 2/10, elapsed time 0.04\n[IterativeImputer] Ending imputation round 3/10, elapsed time 0.04\n[IterativeImputer] Ending imputation round 4/10, elapsed time 0.04\n[IterativeImputer] Ending imputation round 5/10, elapsed time 0.05\n[IterativeImputer] Ending imputation round 6/10, elapsed time 0.05\n[IterativeImputer] Ending imputation round 7/10, elapsed time 0.06\n[IterativeImputer] Ending imputation round 8/10, elapsed time 0.08\n[IterativeImputer] Early stopping criterion reached.\n[IterativeImputer] Completing matrix with shape (195, 4)\n[IterativeImputer] Ending imputation round 1/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 2/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 3/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 4/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 5/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 6/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 7/8, elapsed time 0.00\n[IterativeImputer] Ending imputation round 8/8, elapsed time 0.00\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 21 + "execution_count": 16 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:28.158122\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", - "image/png": "\n" + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -1340,6 +1228,9 @@ } ], "source": [ + "if max5 < 10:\n", + " max5 = 10\n", + "\n", "idf5 = impute_df(split_df5, max_iter= int(max5), verbose=2)\n", "size = idf5.shape[1]\n", "corr = idf5.corr()\n", @@ -1347,171 +1238,132 @@ "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))" ] }, - { - "source": [ - "# Imputation of the whole dataset " - ], - "cell_type": "markdown", - "metadata": {} - }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 17, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "24.0\n" + "[IterativeImputer] Completing matrix with shape (195, 9)\n", + "[IterativeImputer] Ending imputation round 1/22, elapsed time 0.12\n", + "[IterativeImputer] Ending imputation round 2/22, elapsed time 0.14\n", + "[IterativeImputer] Ending imputation round 3/22, elapsed time 0.15\n", + "[IterativeImputer] Ending imputation round 4/22, elapsed time 0.16\n", + "[IterativeImputer] Ending imputation round 5/22, elapsed time 0.17\n", + "[IterativeImputer] Ending imputation round 6/22, elapsed time 0.18\n", + "[IterativeImputer] Ending imputation round 7/22, elapsed time 0.20\n", + "[IterativeImputer] Early stopping criterion reached.\n", + "[IterativeImputer] Completing matrix with shape (195, 9)\n", + "[IterativeImputer] Ending imputation round 1/7, elapsed time 0.00\n", + "[IterativeImputer] Ending imputation round 2/7, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 3/7, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 4/7, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 5/7, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 6/7, elapsed time 0.01\n", + "[IterativeImputer] Ending imputation round 7/7, elapsed time 0.02\n" ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "" + ] + }, + "metadata": {}, + "execution_count": 17 + }, + { + "output_type": "display_data", + "data": { + "text/plain": "
", + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } } ], "source": [ - "iter_number = (max_missing + min_missing) // 2\n", - "print(iter_number)" + "if max6 < 10:\n", + " max6 = 10\n", + "\n", + "idf6 = impute_df(split_df6, max_iter= int(max6), verbose=2)\n", + "size = idf6.shape[1]\n", + "corr = idf6.corr()\n", + "plt.subplots(figsize=(20,20))\n", + "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))" ] }, + { + "source": [ + "## Dataframe is merged and displayed below" + ], + "cell_type": "markdown", + "metadata": {} + }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 18, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "[IterativeImputer] Completing matrix with shape (195, 67)\n", - "[IterativeImputer] Ending imputation round 1/24, elapsed time 0.62\n", - "[IterativeImputer] Change: 373997.8709645349, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 2/24, elapsed time 1.24\n", - "[IterativeImputer] Change: 29511.1061871246, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 3/24, elapsed time 1.85\n", - "[IterativeImputer] Change: 17873.188347295738, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 4/24, elapsed time 2.47\n", - "[IterativeImputer] Change: 11459.888077535514, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 5/24, elapsed time 3.09\n", - "[IterativeImputer] Change: 5390.9739547168065, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 6/24, elapsed time 3.71\n", - "[IterativeImputer] Change: 3757.0867210105503, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 7/24, elapsed time 4.34\n", - "[IterativeImputer] Change: 3514.840674151647, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 8/24, elapsed time 4.96\n", - "[IterativeImputer] Change: 3272.008783191113, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 9/24, elapsed time 5.59\n", - "[IterativeImputer] Change: 3038.1248408404294, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 10/24, elapsed time 6.21\n", - "[IterativeImputer] Change: 2816.9578324964855, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 11/24, elapsed time 6.86\n", - "[IterativeImputer] Change: 2609.6887758823086, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 12/24, elapsed time 7.53\n", - "[IterativeImputer] Change: 2416.3188696420248, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 13/24, elapsed time 8.14\n", - "[IterativeImputer] Change: 2236.3250712345057, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 14/24, elapsed time 8.77\n", - "[IterativeImputer] Change: 2069.007986775061, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 15/24, elapsed time 9.40\n", - "[IterativeImputer] Change: 2002.417721664179, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 16/24, elapsed time 10.03\n", - "[IterativeImputer] Change: 1982.0451872189394, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 17/24, elapsed time 10.66\n", - "[IterativeImputer] Change: 1961.971235914504, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 18/24, elapsed time 11.30\n", - "[IterativeImputer] Change: 1942.2299581335687, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 19/24, elapsed time 11.93\n", - "[IterativeImputer] Change: 1922.8595087315427, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 20/24, elapsed time 12.56\n", - "[IterativeImputer] Change: 1903.8819537004383, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 21/24, elapsed time 13.19\n", - "[IterativeImputer] Change: 1885.3076371898812, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 22/24, elapsed time 13.83\n", - "[IterativeImputer] Change: 1867.1381917331032, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 23/24, elapsed time 14.46\n", - "[IterativeImputer] Change: 1849.3691382747977, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Ending imputation round 24/24, elapsed time 15.09\n", - "[IterativeImputer] Change: 1831.9916662386954, scaled tolerance: 131.03159 \n", - "[IterativeImputer] Completing matrix with shape (195, 67)\n", - "[IterativeImputer] Ending imputation round 1/24, elapsed time 0.01\n", - "[IterativeImputer] Ending imputation round 2/24, elapsed time 0.02\n", - "[IterativeImputer] Ending imputation round 3/24, elapsed time 0.03\n", - "[IterativeImputer] Ending imputation round 4/24, elapsed time 0.05\n", - "[IterativeImputer] Ending imputation round 5/24, elapsed time 0.06\n", - "[IterativeImputer] Ending imputation round 6/24, elapsed time 0.07\n", - "[IterativeImputer] Ending imputation round 7/24, elapsed time 0.08\n", - "[IterativeImputer] Ending imputation round 8/24, elapsed time 0.09\n", - "[IterativeImputer] Ending imputation round 9/24, elapsed time 0.10\n", - "[IterativeImputer] Ending imputation round 10/24, elapsed time 0.11\n", - "[IterativeImputer] Ending imputation round 11/24, elapsed time 0.12\n", - "[IterativeImputer] Ending imputation round 12/24, elapsed time 0.13\n", - "[IterativeImputer] Ending imputation round 13/24, elapsed time 0.14\n", - "[IterativeImputer] Ending imputation round 14/24, elapsed time 0.16\n", - "[IterativeImputer] Ending imputation round 15/24, elapsed time 0.17\n", - "[IterativeImputer] Ending imputation round 16/24, elapsed time 0.18\n", - "C:\\Users\\joach\\.conda\\envs\\wsenv\\lib\\site-packages\\sklearn\\impute\\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.\n", - " warnings.warn(\"[IterativeImputer] Early stopping criterion not\"\n", - "[IterativeImputer] Ending imputation round 17/24, elapsed time 0.19\n", - "[IterativeImputer] Ending imputation round 18/24, elapsed time 0.21\n", - "[IterativeImputer] Ending imputation round 19/24, elapsed time 0.22\n", - "[IterativeImputer] Ending imputation round 20/24, elapsed time 0.23\n", - "[IterativeImputer] Ending imputation round 21/24, elapsed time 0.24\n", - "[IterativeImputer] Ending imputation round 22/24, elapsed time 0.25\n", - "[IterativeImputer] Ending imputation round 23/24, elapsed time 0.26\n", - "[IterativeImputer] Ending imputation round 24/24, elapsed time 0.27\n" + "(195, 34)\n(195, 45)\n(195, 54)\n(195, 58)\n(195, 67)\n" ] - } - ], - "source": [ - "imputed_df = impute_df(df_inicator_values, max_iter=int(iter_number), verbose=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ + }, { "output_type": "display_data", "data": { - "text/plain": " Population with at least some secondary education (% ages 25 and older) \\\nAFG 26.080 \nAGO 30.232 \nALB 93.174 \nAND 72.327 \nARG 57.158 \n.. ... \nWSM 74.942 \nYEM 28.020 \nZAF 75.478 \nZMB 44.440 \nZWE 64.935 \n\n Population with at least some secondary education, female (% ages 25 and older) \\\nAFG 13.220 \nAGO 23.133 \nALB 93.700 \nAND 71.484 \nARG 59.161 \n.. ... \nWSM 79.127 \nYEM 19.920 \nZAF 74.977 \nZMB 38.488 \nZWE 59.792 \n\n Population with at least some secondary education, male (% ages 25 and older) \\\nAFG 36.920 \nAGO 38.056 \nALB 92.497 \nAND 73.327 \nARG 54.828 \n.. ... \nWSM 71.583 \nYEM 36.918 \nZAF 78.207 \nZMB 54.068 \nZWE 70.783 \n\n Mean years of schooling, female (years) \\\nAFG 1.94800 \nAGO 4.02300 \nALB 9.70200 \nAND 10.43900 \nARG 11.12300 \n.. ... \nWSM 11.16287 \nYEM 2.88000 \nZAF 10.03100 \nZMB 6.28300 \nZWE 8.06600 \n\n Mean years of schooling, male (years) \\\nAFG 6.006000 \nAGO 6.359000 \nALB 10.614000 \nAND 10.564000 \nARG 10.729000 \n.. ... \nWSM 10.415249 \nYEM 5.146000 \nZAF 10.291000 \nZMB 8.176000 \nZWE 8.923000 \n\n Share of seats in parliament (% held by women) \\\nAFG 27.244 \nAGO 30.000 \nALB 29.508 \nAND 46.429 \nARG 39.877 \n.. ... \nWSM 10.000 \nYEM 0.971 \nZAF 45.333 \nZMB 17.964 \nZWE 34.571 \n\n Adolescent birth rate (births per 1,000 women ages 15-19) \\\nAFG 68.957000 \nAGO 150.526000 \nALB 19.642000 \nAND 18.266334 \nARG 62.782000 \n.. ... \nWSM 23.886000 \nYEM 60.352000 \nZAF 67.908000 \nZMB 120.112000 \nZWE 86.135000 \n\n Vulnerable employment (% of total employment) \\\nAFG 79.726000 \nAGO 65.995000 \nALB 52.852000 \nAND 4.461035 \nARG 21.805000 \n.. ... \nWSM 29.983000 \nYEM 45.627000 \nZAF 10.298000 \nZMB 78.134000 \nZWE 64.739000 \n\n Total population (millions) Urban population (%) ... \\\nAFG 38.042 25.8 ... \nAGO 31.825 66.2 ... \nALB 2.881 61.2 ... \nAND 0.077 88.0 ... \nARG 44.781 92.0 ... \n.. ... ... ... \nWSM 0.197 18.1 ... \nYEM 29.162 37.3 ... \nZAF 58.558 66.9 ... \nZMB 17.861 44.1 ... \nZWE 14.645 32.2 ... \n\n Gender Development Index (GDI) \\\nAFG 0.659000 \nAGO 0.903000 \nALB 0.967000 \nAND 1.058332 \nARG 0.993000 \n.. ... \nWSM 0.949613 \nYEM 0.488000 \nZAF 0.986000 \nZMB 0.958000 \nZWE 0.931000 \n\n Estimated gross national income per capita, female (2017 PPP $) \\\nAFG 819.385000 \nAGO 5205.049000 \nALB 11004.455000 \nAND 43647.171247 \nARG 14872.167000 \n.. ... \nWSM 4054.375000 \nYEM 186.041000 \nZAF 9247.751000 \nZMB 3379.549000 \nZWE 2374.612000 \n\n Estimated gross national income per capita, male (2017 PPP $) \\\nAFG 3565.86500 \nAGO 7022.23100 \nALB 16884.66700 \nAND 66636.70046 \nARG 27825.75700 \n.. ... \nWSM 8410.10900 \nYEM 2980.03500 \nZAF 15094.54600 \nZMB 3270.42200 \nZWE 2984.89600 \n\n Human Development Index (HDI), female \\\nAFG 0.391000 \nAGO 0.552000 \nALB 0.780000 \nAND 0.888833 \nARG 0.835000 \n.. ... \nWSM 0.689133 \nYEM 0.270000 \nZAF 0.702000 \nZMB 0.569000 \nZWE 0.550000 \n\n Human Development Index (HDI), male Inequality-adjusted income index \\\nAFG 0.593000 0.590646 \nAGO 0.611000 0.442000 \nALB 0.807000 0.648000 \nAND 0.848604 0.657202 \nARG 0.840000 0.606000 \n.. ... ... \nWSM 0.723468 0.388513 \nYEM 0.553000 0.327000 \nZAF 0.712000 0.312000 \nZMB 0.593000 0.292000 \nZWE 0.590000 0.353000 \n\n Overall loss in HDI due to inequality (%) Inequality in income (%) \\\nAFG 19.599171 -14.312061 \nAGO 31.670000 28.900000 \nALB 10.943000 13.179000 \nAND 17.410515 36.120212 \nARG 13.728000 25.159000 \n.. ... ... \nWSM 18.988401 40.384439 \nYEM 31.702000 21.800000 \nZAF 33.992000 56.996000 \nZMB 31.336000 44.840000 \nZWE 22.767000 28.769000 \n\n Coefficient of human inequality Inequality-adjusted HDI (IHDI) \nAFG 19.795336 0.423465 \nAGO 31.733000 0.397000 \nALB 10.893000 0.708000 \nAND 16.938070 0.735618 \nARG 13.238000 0.729000 \n.. ... ... \nWSM 18.427962 0.575112 \nYEM 30.867000 0.321000 \nZAF 31.163000 0.468000 \nZMB 30.592000 0.401000 \nZWE 22.525000 0.441000 \n\n[195 rows x 67 columns]", - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
AFG26.08013.22036.9201.948006.00600027.24468.95700079.72600038.04225.8...0.659000819.3850003565.865000.3910000.5930000.59064619.599171-14.31206119.7953360.423465
AGO30.23223.13338.0564.023006.35900030.000150.52600065.99500031.82566.2...0.9030005205.0490007022.231000.5520000.6110000.44200031.67000028.90000031.7330000.397000
ALB93.17493.70092.4979.7020010.61400029.50819.64200052.8520002.88161.2...0.96700011004.45500016884.667000.7800000.8070000.64800010.94300013.17900010.8930000.708000
AND72.32771.48473.32710.4390010.56400046.42918.2663344.4610350.07788.0...1.05833243647.17124766636.700460.8888330.8486040.65720217.41051536.12021216.9380700.735618
ARG57.15859.16154.82811.1230010.72900039.87762.78200021.80500044.78192.0...0.99300014872.16700027825.757000.8350000.8400000.60600013.72800025.15900013.2380000.729000
..................................................................
WSM74.94279.12771.58311.1628710.41524910.00023.88600029.9830000.19718.1...0.9496134054.3750008410.109000.6891330.7234680.38851318.98840140.38443918.4279620.575112
YEM28.02019.92036.9182.880005.1460000.97160.35200045.62700029.16237.3...0.488000186.0410002980.035000.2700000.5530000.32700031.70200021.80000030.8670000.321000
ZAF75.47874.97778.20710.0310010.29100045.33367.90800010.29800058.55866.9...0.9860009247.75100015094.546000.7020000.7120000.31200033.99200056.99600031.1630000.468000
ZMB44.44038.48854.0686.283008.17600017.964120.11200078.13400017.86144.1...0.9580003379.5490003270.422000.5690000.5930000.29200031.33600044.84000030.5920000.401000
ZWE64.93559.79270.7838.066008.92300034.57186.13500064.73900014.64532.2...0.9310002374.6120002984.896000.5500000.5900000.35300022.76700028.76900022.5250000.441000
\n

195 rows × 67 columns

\n
" + "text/plain": " Population with at least some secondary education (% ages 25 and older) \\\nAFG 26.080 \nAGO 30.232 \nALB 93.174 \nAND 72.327 \nARG 57.158 \n.. ... \nWSM 74.942 \nYEM 28.020 \nZAF 75.478 \nZMB 44.440 \nZWE 64.935 \n\n Population with at least some secondary education, female (% ages 25 and older) \\\nAFG 13.220 \nAGO 23.133 \nALB 93.700 \nAND 71.484 \nARG 59.161 \n.. ... \nWSM 79.127 \nYEM 19.920 \nZAF 74.977 \nZMB 38.488 \nZWE 59.792 \n\n Population with at least some secondary education, male (% ages 25 and older) \\\nAFG 36.920 \nAGO 38.056 \nALB 92.497 \nAND 73.327 \nARG 54.828 \n.. ... \nWSM 71.583 \nYEM 36.918 \nZAF 78.207 \nZMB 54.068 \nZWE 70.783 \n\n Share of seats in parliament (% held by women) \\\nAFG 27.244 \nAGO 30.000 \nALB 29.508 \nAND 46.429 \nARG 39.877 \n.. ... \nWSM 10.000 \nYEM 0.971 \nZAF 45.333 \nZMB 17.964 \nZWE 34.571 \n\n Vulnerable employment (% of total employment) Urban population (%) \\\nAFG 79.72600 25.8 \nAGO 65.99500 66.2 \nALB 52.85200 61.2 \nAND 15.42884 88.0 \nARG 21.80500 92.0 \n.. ... ... \nWSM 29.98300 18.1 \nYEM 45.62700 37.3 \nZAF 10.29800 66.9 \nZMB 78.13400 44.1 \nZWE 64.73900 32.2 \n\n Labour force participation rate (% ages 15 and older), female \\\nAFG 21.595000 \nAGO 76.136000 \nALB 46.712000 \nAND 54.351364 \nARG 50.721000 \n.. ... \nWSM 31.104000 \nYEM 5.834000 \nZAF 49.610000 \nZMB 70.370000 \nZWE 78.106000 \n\n Labour force participation rate (% ages 15 and older), male \\\nAFG 74.658000 \nAGO 78.913000 \nALB 64.568000 \nAND 72.232983 \nARG 72.730000 \n.. ... \nWSM 55.456000 \nYEM 70.183000 \nZAF 62.749000 \nZMB 79.076000 \nZWE 88.993000 \n\n Remittances, inflows (% of GDP) \\\nAFG 4.542000 \nAGO 0.002000 \nALB 9.640000 \nAND 1.703573 \nARG 0.119000 \n.. ... \nWSM 17.254000 \nYEM 7.999177 \nZAF 0.253000 \nZMB 0.551000 \nZWE 8.068000 \n\n Foreign direct investment, net inflows (% of GDP) ... \\\nAFG 0.123000 ... \nAGO -4.331000 ... \nALB 7.912000 ... \nAND 3.286625 ... \nARG 1.389000 ... \n.. ... ... \nWSM 3.114609 ... \nYEM -4.382862 ... \nZAF 1.316000 ... \nZMB 2.087000 ... \nZWE 4.005312 ... \n\n Population under age 5 (millions) \\\nAFG 5.639000 \nAGO 5.670000 \nALB 0.169000 \nAND -7.122259 \nARG 3.742000 \n.. ... \nWSM 0.027000 \nYEM 4.099000 \nZAF 5.786000 \nZMB 2.902000 \nZWE 2.138000 \n\n Adolescent birth rate (births per 1,000 women ages 15-19) \\\nAFG 68.957000 \nAGO 150.526000 \nALB 19.642000 \nAND 14.976928 \nARG 62.782000 \n.. ... \nWSM 23.886000 \nYEM 60.352000 \nZAF 67.908000 \nZMB 120.112000 \nZWE 86.135000 \n\n Sex ratio at birth (male to female births) \\\nAFG 1.060000 \nAGO 1.030000 \nALB 1.090000 \nAND 1.058547 \nARG 1.040000 \n.. ... \nWSM 1.080000 \nYEM 1.050000 \nZAF 1.030000 \nZMB 1.030000 \nZWE 1.020000 \n\n Young age (0-14) dependency ratio (per 100 people ages 15-64) \\\nAFG 77.346000 \nAGO 91.097000 \nALB 25.439000 \nAND 25.017928 \nARG 38.334000 \n.. ... \nWSM 66.194000 \nYEM 67.773000 \nZAF 44.148000 \nZMB 83.229000 \nZWE 76.845000 \n\n Old-age (65 and older) dependency ratio (per 100 people ages 15-64) \\\nAFG 4.764000 \nAGO 4.297000 \nALB 20.764000 \nAND 21.992987 \nARG 17.523000 \n.. ... \nWSM 8.623000 \nYEM 5.015000 \nZAF 8.253000 \nZMB 3.960000 \nZWE 5.433000 \n\n HDI rank Total unemployment rate (female to male ratio) \\\nAFG 169.0 1.356000 \nAGO 148.0 1.016000 \nALB 69.0 0.903000 \nAND 36.0 1.788414 \nARG 46.0 1.222000 \n.. ... ... \nWSM 111.0 1.297000 \nYEM 179.0 2.088000 \nZAF 114.0 1.149000 \nZMB 146.0 1.147000 \nZWE 150.0 1.231000 \n\n Youth unemployment rate (female to male ratio) \\\nAFG 1.308000 \nAGO 0.906000 \nALB 0.799000 \nAND 1.412073 \nARG 1.291000 \n.. ... \nWSM 1.492000 \nYEM 1.467000 \nZAF 1.161000 \nZMB 1.079000 \nZWE 1.269000 \n\n Coefficient of human inequality Inequality-adjusted HDI (IHDI) \nAFG 29.772732 0.360280 \nAGO 31.733000 0.397000 \nALB 10.893000 0.708000 \nAND 10.066536 0.786988 \nARG 13.238000 0.729000 \n.. ... ... \nWSM 21.104700 0.551524 \nYEM 30.867000 0.321000 \nZAF 31.163000 0.468000 \nZMB 30.592000 0.401000 \nZWE 22.525000 0.441000 \n\n[195 rows x 67 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Share of seats in parliament (% held by women)Vulnerable employment (% of total employment)Urban population (%)Labour force participation rate (% ages 15 and older), femaleLabour force participation rate (% ages 15 and older), maleRemittances, inflows (% of GDP)Foreign direct investment, net inflows (% of GDP)...Population under age 5 (millions)Adolescent birth rate (births per 1,000 women ages 15-19)Sex ratio at birth (male to female births)Young age (0-14) dependency ratio (per 100 people ages 15-64)Old-age (65 and older) dependency ratio (per 100 people ages 15-64)HDI rankTotal unemployment rate (female to male ratio)Youth unemployment rate (female to male ratio)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
AFG26.08013.22036.92027.24479.7260025.821.59500074.6580004.5420000.123000...5.63900068.9570001.06000077.3460004.764000169.01.3560001.30800029.7727320.360280
AGO30.23223.13338.05630.00065.9950066.276.13600078.9130000.002000-4.331000...5.670000150.5260001.03000091.0970004.297000148.01.0160000.90600031.7330000.397000
ALB93.17493.70092.49729.50852.8520061.246.71200064.5680009.6400007.912000...0.16900019.6420001.09000025.43900020.76400069.00.9030000.79900010.8930000.708000
AND72.32771.48473.32746.42915.4288488.054.35136472.2329831.7035733.286625...-7.12225914.9769281.05854725.01792821.99298736.01.7884141.41207310.0665360.786988
ARG57.15859.16154.82839.87721.8050092.050.72100072.7300000.1190001.389000...3.74200062.7820001.04000038.33400017.52300046.01.2220001.29100013.2380000.729000
..................................................................
WSM74.94279.12771.58310.00029.9830018.131.10400055.45600017.2540003.114609...0.02700023.8860001.08000066.1940008.623000111.01.2970001.49200021.1047000.551524
YEM28.02019.92036.9180.97145.6270037.35.83400070.1830007.999177-4.382862...4.09900060.3520001.05000067.7730005.015000179.02.0880001.46700030.8670000.321000
ZAF75.47874.97778.20745.33310.2980066.949.61000062.7490000.2530001.316000...5.78600067.9080001.03000044.1480008.253000114.01.1490001.16100031.1630000.468000
ZMB44.44038.48854.06817.96478.1340044.170.37000079.0760000.5510002.087000...2.902000120.1120001.03000083.2290003.960000146.01.1470001.07900030.5920000.401000
ZWE64.93559.79270.78334.57164.7390032.278.10600088.9930008.0680004.005312...2.13800086.1350001.02000076.8450005.433000150.01.2310001.26900022.5250000.441000
\n

195 rows × 67 columns

\n
" }, "metadata": {} } ], "source": [ - "display(imputed_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "#scatter_matrix(imputed_df, figsize=(size, size)) Takes a lot of time to work, visualization is not that great. But it can stay, just in case" + "# 29 - 5 - 11 - 9 - 4 - 9\n", + "final_df = idf1.merge(idf2, left_index=True, right_index=True)\n", + "print(final_df.shape)\n", + "final_df = final_df.merge(idf3, left_index=True, right_index=True)\n", + "print(final_df.shape)\n", + "final_df = final_df.merge(idf4, left_index=True, right_index=True)\n", + "print(final_df.shape)\n", + "final_df = final_df.merge(idf5, left_index=True, right_index=True)\n", + "print(final_df.shape)\n", + "final_df = final_df.merge(idf6, left_index=True, right_index=True)\n", + "print(final_df.shape)\n", + "\n", + "display(final_df)" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 19, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "" + "" ] }, "metadata": {}, - "execution_count": 26 + "execution_count": 19 }, { "output_type": "display_data", "data": { "text/plain": "
", - "image/svg+xml": "\r\n\r\n\r\n \r\n \r\n \r\n \r\n 2021-05-12T09:28:44.813663\r\n image/svg+xml\r\n \r\n \r\n Matplotlib v3.4.1, https://matplotlib.org/\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n\r\n", - "image/png": "\n" + "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n", + "image/png": "\n" }, "metadata": { "needs_background": "light" @@ -1519,130 +1371,64 @@ } ], "source": [ - "#sns.heatmap(corr, annot = True, vmin=-1, vmax=1, center= 0, fmt='.1g', cmap= 'coolwarm', linewidths=1, linecolor='black', square=True, yticklabels=False, xticklabels=False)\n", - "size = imputed_df.shape[1]\n", - "corr = imputed_df.corr()\n", + "size = final_df.shape[1]\n", + "corr = final_df.corr()\n", "plt.subplots(figsize=(20,20))\n", - "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size),cmap='mako')" + "sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))" ] }, { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "{0: 'Population with at least some secondary education (% ages 25 and older)', 1: 'Population with at least some secondary education, female (% ages 25 and older)', 2: 'Population with at least some secondary education, male (% ages 25 and older)', 3: 'Mean years of schooling, female (years)', 4: 'Mean years of schooling, male (years)', 5: 'Share of seats in parliament (% held by women)', 6: 'Adolescent birth rate (births per 1,000 women ages 15-19)', 7: 'Vulnerable employment (% of total employment)', 8: 'Total population (millions)', 9: 'Urban population (%)', 10: 'Labour force participation rate (% ages 15 and older), female', 11: 'Labour force participation rate (% ages 15 and older), male', 12: 'Sex ratio at birth (male to female births)', 13: 'Remittances, inflows (% of GDP)', 14: 'Foreign direct investment, net inflows (% of GDP)', 15: 'Population ages 15?64 (millions)', 16: 'Infants lacking immunization, measles (% of one-year-olds)', 17: 'Infants lacking immunization, DTP (% of one-year-olds)', 18: 'Gross fixed capital formation (% of GDP)', 19: 'Gender Inequality Index (GII)', 20: 'Life expectancy at birth (years)', 21: 'Expected years of schooling (years)', 22: 'Inequality-adjusted education index', 23: 'Inequality-adjusted life expectancy index', 24: 'Inequality in education (%)', 25: 'Inequality in life expectancy (%)', 26: 'Mean years of schooling (years)', 27: 'Life expectancy index', 28: 'Income index', 29: 'Education index', 30: 'Unemployment, youth (% ages 15?24)', 31: 'Private capital flows (% of GDP)', 32: 'Life expectancy at birth, female (years)', 33: 'Life expectancy at birth, male (years)', 34: 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 35: 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 36: 'Expected years of schooling, female (years)', 37: 'Expected years of schooling, male (years)', 38: 'Population ages 65 and older (millions)', 39: 'Population under age 5 (millions)', 40: 'Exports and imports (% of GDP)', 41: 'Human Development Index (HDI)', 42: 'Unemployment, total (% of labour force)', 43: 'HDI rank', 44: 'Youth not in school or employment (% ages 15-24)', 45: 'Labour force participation rate (% ages 15 and older)', 46: 'Employment to population ratio (% ages 15 and older)', 47: 'Employment in agriculture (% of total employment)', 48: 'Employment in services (% of total employment)', 49: 'Working poor at PPP$3.20 a day (% of total employment)', 50: 'Total unemployment rate (female to male ratio)', 51: 'Youth unemployment rate (female to male ratio)', 52: 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 53: 'Gross capital formation (% of GDP)', 54: 'Gross domestic product (GDP), total (2017 PPP $ billions)', 55: 'GDP per capita (2017 PPP $)', 56: 'Gross national income (GNI) per capita (constant 2017 PPP$)', 57: 'Gender Development Index (GDI)', 58: 'Estimated gross national income per capita, female (2017 PPP $)', 59: 'Estimated gross national income per capita, male (2017 PPP $)', 60: 'Human Development Index (HDI), female', 61: 'Human Development Index (HDI), male', 62: 'Inequality-adjusted income index', 63: 'Overall loss in HDI due to inequality (%)', 64: 'Inequality in income (%)', 65: 'Coefficient of human inequality', 66: 'Inequality-adjusted HDI (IHDI)'}\n" - ] - } - ], "source": [ - "map_columns = {}\n", - "count = 0\n", - "for col in imputed_df.columns:\n", - " map_columns[count] = col\n", - " count += 1\n", - "\n", - "print(map_columns)\n", - "\n", - "#Makes it easier to check the correlations" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[[ 26.08 13.22 36.92 ... -14.31206116 19.79533568\n 0.42346498]\n [ 30.232 23.133 38.056 ... 28.9 31.733\n 0.397 ]\n [ 93.174 93.7 92.497 ... 13.179 10.893\n 0.708 ]\n ...\n [ 75.478 74.977 78.207 ... 56.996 31.163\n 0.468 ]\n [ 44.44 38.488 54.068 ... 44.84 30.592\n 0.401 ]\n [ 64.935 59.792 70.783 ... 28.769 22.525\n 0.441 ]]\n" - ] - } + "## PCA of the Final Dataset" ], - "source": [ - "x = imputed_df.loc[:, imputed_df.columns].values\n", - "print(x)\n", - "x = StandardScaler().fit_transform(x)" - ] + "cell_type": "markdown", + "metadata": {} }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 20, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "(195, 67)\n(195, 67)\n" + "[[26.08 13.22 36.92 ... 1.308 29.77273216\n 0.36028022]\n [30.232 23.133 38.056 ... 0.906 31.733\n 0.397 ]\n [93.174 93.7 92.497 ... 0.799 10.893\n 0.708 ]\n ...\n [75.478 74.977 78.207 ... 1.161 31.163\n 0.468 ]\n [44.44 38.488 54.068 ... 1.079 30.592\n 0.401 ]\n [64.935 59.792 70.783 ... 1.269 22.525\n 0.441 ]]\n(195, 67)\n(195, 67)\n" ] } ], "source": [ - "print(imputed_df.shape)\n", + "x = final_df.loc[:, final_df.columns].values\n", + "print(x)\n", + "x = StandardScaler().fit_transform(x)\n", + "\n", + "print(final_df.shape)\n", "print(x.shape)" ] }, { "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": "array([[-1.213211 , -1.49949287, -0.95026927, ..., -3.68962647,\n 0.08912291, -0.98754125],\n [-1.06825946, -1.17214393, -0.90913571, ..., 0.45316976,\n 1.47092771, -1.13989063],\n [ 1.12912485, 1.15813273, 1.06212486, ..., -1.05402286,\n -0.94133748, 0.65042431],\n ...,\n [ 0.51133525, 0.53985833, 0.54469657, ..., 3.14676964,\n 1.40494925, -0.73116921],\n [-0.57224033, -0.66508825, -0.32935532, ..., 1.98135819,\n 1.33885503, -1.11686407],\n [ 0.14326588, 0.03841642, 0.27588008, ..., 0.44061062,\n 0.40508616, -0.88659848]])" - }, - "metadata": {} - } - ], - "source": [ - "display(x)" - ] - }, - { - "source": [ - "### We want to check if the mean of the normalized dataset is 0 and std is 1\n", - "### It looks like it" - ], - "cell_type": "markdown", - "metadata": {} - }, - { - "cell_type": "code", - "execution_count": 31, + "execution_count": 21, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ - "(-6.274693313304368e-17, 1.0)" + "(-1.713134035701734e-17, 1.0)" ] }, "metadata": {}, - "execution_count": 31 + "execution_count": 21 } ], "source": [ "np.mean(x), np.std(x)" ] }, - { - "source": [ - "### To show the normalized data" - ], - "cell_type": "markdown", - "metadata": {} - }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -1650,167 +1436,167 @@ "data": { "text/plain": [ " Population with at least some secondary education (% ages 25 and older) \\\n", - "0 -1.213211 \n", - "1 -1.068259 \n", - "2 1.129125 \n", - "3 0.401330 \n", - "4 -0.128239 \n", + "0 -1.242745 \n", + "1 -1.094493 \n", + "2 1.152920 \n", + "3 0.408555 \n", + "4 -0.133071 \n", "\n", " Population with at least some secondary education, female (% ages 25 and older) \\\n", - "0 -1.499493 \n", - "1 -1.172144 \n", - "2 1.158133 \n", - "3 0.424512 \n", - "4 0.017579 \n", + "0 -1.537407 \n", + "1 -1.202607 \n", + "2 1.180706 \n", + "3 0.430388 \n", + "4 0.014194 \n", "\n", " Population with at least some secondary education, male (% ages 25 and older) \\\n", - "0 -0.950269 \n", - "1 -0.909136 \n", - "2 1.062125 \n", - "3 0.367996 \n", - "4 -0.301836 \n", + "0 -0.972184 \n", + "1 -0.930083 \n", + "2 1.087538 \n", + "3 0.377085 \n", + "4 -0.308501 \n", "\n", - " Mean years of schooling, female (years) \\\n", - "0 -1.941779 \n", - "1 -1.318987 \n", - "2 0.385514 \n", - "3 0.606718 \n", - "4 0.812015 \n", + " Share of seats in parliament (% held by women) \\\n", + "0 0.366696 \n", + "1 0.601335 \n", + "2 0.559447 \n", + "3 2.000054 \n", + "4 1.442235 \n", "\n", - " Mean years of schooling, male (years) \\\n", - "0 -1.112677 \n", - "1 -0.984083 \n", - "2 0.565966 \n", - "3 0.547752 \n", - "4 0.607859 \n", + " Vulnerable employment (% of total employment) Urban population (%) \\\n", + "0 1.554740 -1.443911 \n", + "1 1.044887 0.299618 \n", + "2 0.556868 0.083835 \n", + "3 -0.832710 1.240433 \n", + "4 -0.595953 1.413060 \n", "\n", - " Share of seats in parliament (% held by women) \\\n", - "0 0.361897 \n", - "1 0.596509 \n", - "2 0.554626 \n", - "3 1.995072 \n", - "4 1.437315 \n", + " Labour force participation rate (% ages 15 and older), female \\\n", + "0 -2.035680 \n", + "1 1.612659 \n", + "2 -0.355562 \n", + "3 0.155448 \n", + "4 -0.087393 \n", "\n", - " Adolescent birth rate (births per 1,000 women ages 15-19) \\\n", - "0 0.532975 \n", - "1 2.562666 \n", - "2 -0.694136 \n", - "3 -0.728367 \n", - "4 0.379322 \n", + " Labour force participation rate (% ages 15 and older), male \\\n", + "0 0.233312 \n", + "1 0.733974 \n", + "2 -0.953924 \n", + "3 -0.052027 \n", + "4 0.006454 \n", + "\n", + " Remittances, inflows (% of GDP) \\\n", + "0 -0.027719 \n", + "1 -0.796618 \n", + "2 0.835683 \n", + "3 -0.508438 \n", + "4 -0.776802 \n", "\n", - " Vulnerable employment (% of total employment) Total population (millions) \\\n", - "0 1.556274 -0.009236 \n", - "1 1.051012 -0.051786 \n", - "2 0.567386 -0.249884 \n", - "3 -1.213266 -0.269075 \n", - "4 -0.575057 0.036887 \n", + " Foreign direct investment, net inflows (% of GDP) ... \\\n", + "0 -0.441756 ... \n", + "1 -0.915527 ... \n", + "2 0.386758 ... \n", + "3 -0.105242 ... \n", + "4 -0.307092 ... \n", "\n", - " Urban population (%) ... Gender Development Index (GDI) \\\n", - "0 -1.443911 ... -3.764694 \n", - "1 0.299618 ... -0.482479 \n", - "2 0.083835 ... 0.378430 \n", - "3 1.240433 ... 1.606995 \n", - "4 1.413060 ... 0.728174 \n", + " Population under age 5 (millions) \\\n", + "0 0.225085 \n", + "1 0.227833 \n", + "2 -0.259851 \n", + "3 -0.906248 \n", + "4 0.056909 \n", "\n", - " Estimated gross national income per capita, female (2017 PPP $) \\\n", - "0 -0.880471 \n", - "1 -0.605187 \n", - "2 -0.241164 \n", - "3 1.807789 \n", - "4 0.001609 \n", + " Adolescent birth rate (births per 1,000 women ages 15-19) \\\n", + "0 0.534851 \n", + "1 2.593772 \n", + "2 -0.709932 \n", + "3 -0.827685 \n", + "4 0.378985 \n", + "\n", + " Sex ratio at birth (male to female births) \\\n", + "0 0.462524 \n", + "1 -1.216161 \n", + "2 2.141209 \n", + "3 0.381230 \n", + "4 -0.656599 \n", "\n", - " Estimated gross national income per capita, male (2017 PPP $) \\\n", - "0 -0.858135 \n", - "1 -0.720907 \n", - "2 -0.329338 \n", - "3 1.645966 \n", - "4 0.105056 \n", + " Young age (0-14) dependency ratio (per 100 people ages 15-64) \\\n", + "0 1.513894 \n", + "1 2.161911 \n", + "2 -0.932226 \n", + "3 -0.952069 \n", + "4 -0.324548 \n", "\n", - " Human Development Index (HDI), female Human Development Index (HDI), male \\\n", - "0 -1.898855 -1.037358 \n", - "1 -0.901924 -0.907477 \n", - "2 0.509877 0.506782 \n", - "3 1.183786 0.806977 \n", - "4 0.850444 0.744897 \n", + " Old-age (65 and older) dependency ratio (per 100 people ages 15-64) \\\n", + "0 -0.950705 \n", + "1 -0.999777 \n", + "2 0.730564 \n", + "3 0.859705 \n", + "4 0.390002 \n", "\n", - " Inequality-adjusted income index \\\n", - "0 0.259496 \n", - "1 -0.644363 \n", - "2 0.608240 \n", - "3 0.664195 \n", - "4 0.352855 \n", + " HDI rank Total unemployment rate (female to male ratio) \\\n", + "0 1.364735 -0.115960 \n", + "1 0.976870 -0.308338 \n", + "2 -0.482239 -0.372276 \n", + "3 -1.091741 0.128707 \n", + "4 -0.907043 -0.191780 \n", "\n", - " Overall loss in HDI due to inequality (%) Inequality in income (%) \\\n", - "0 0.022060 -3.689626 \n", - "1 1.394523 0.453170 \n", - "2 -0.962154 -1.054023 \n", - "3 -0.226792 1.145381 \n", - "4 -0.645497 0.094515 \n", + " Youth unemployment rate (female to male ratio) \\\n", + "0 -0.049773 \n", + "1 -0.485075 \n", + "2 -0.600939 \n", + "3 0.062921 \n", + "4 -0.068182 \n", "\n", " Coefficient of human inequality Inequality-adjusted HDI (IHDI) \n", - "0 0.089123 -0.987541 \n", - "1 1.470928 -1.139891 \n", - "2 -0.941337 0.650424 \n", - "3 -0.241610 0.809411 \n", - "4 -0.669900 0.771314 \n", + "0 1.192044 -1.315801 \n", + "1 1.408239 -1.110134 \n", + "2 -0.890168 0.631779 \n", + "3 -0.981317 1.074191 \n", + "4 -0.631542 0.749400 \n", "\n", "[5 rows x 67 columns]" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
0-1.213211-1.499493-0.950269-1.941779-1.1126770.3618970.5329751.556274-0.009236-1.443911...-3.764694-0.880471-0.858135-1.898855-1.0373580.2594960.022060-3.6896260.089123-0.987541
1-1.068259-1.172144-0.909136-1.318987-0.9840830.5965092.5626661.051012-0.0517860.299618...-0.482479-0.605187-0.720907-0.901924-0.907477-0.6443631.3945230.4531701.470928-1.139891
21.1291251.1581331.0621250.3855140.5659660.554626-0.6941360.567386-0.2498840.083835...0.378430-0.241164-0.3293380.5098770.5067820.608240-0.962154-1.054023-0.9413370.650424
30.4013300.4245120.3679960.6067180.5477521.995072-0.728367-1.213266-0.2690751.240433...1.6069951.8077891.6459661.1837860.8069770.664195-0.2267921.145381-0.2416100.809411
4-0.1282390.017579-0.3018360.8120150.6078591.4373150.379322-0.5750570.0368871.413060...0.7281740.0016090.1050560.8504440.7448970.352855-0.6454970.094515-0.6699000.771314
\n

5 rows × 67 columns

\n
" + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Share of seats in parliament (% held by women)Vulnerable employment (% of total employment)Urban population (%)Labour force participation rate (% ages 15 and older), femaleLabour force participation rate (% ages 15 and older), maleRemittances, inflows (% of GDP)Foreign direct investment, net inflows (% of GDP)...Population under age 5 (millions)Adolescent birth rate (births per 1,000 women ages 15-19)Sex ratio at birth (male to female births)Young age (0-14) dependency ratio (per 100 people ages 15-64)Old-age (65 and older) dependency ratio (per 100 people ages 15-64)HDI rankTotal unemployment rate (female to male ratio)Youth unemployment rate (female to male ratio)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
0-1.242745-1.537407-0.9721840.3666961.554740-1.443911-2.0356800.233312-0.027719-0.441756...0.2250850.5348510.4625241.513894-0.9507051.364735-0.115960-0.0497731.192044-1.315801
1-1.094493-1.202607-0.9300830.6013351.0448870.2996181.6126590.733974-0.796618-0.915527...0.2278332.593772-1.2161612.161911-0.9997770.976870-0.308338-0.4850751.408239-1.110134
21.1529201.1807061.0875380.5594470.5568680.083835-0.355562-0.9539240.8356830.386758...-0.259851-0.7099322.141209-0.9322260.730564-0.482239-0.372276-0.600939-0.8901680.631779
30.4085550.4303880.3770852.000054-0.8327101.2404330.155448-0.052027-0.508438-0.105242...-0.906248-0.8276850.381230-0.9520690.859705-1.0917410.1287070.062921-0.9813171.074191
4-0.1330710.014194-0.3085011.442235-0.5959531.413060-0.0873930.006454-0.776802-0.307092...0.0569090.378985-0.656599-0.3245480.390002-0.907043-0.191780-0.068182-0.6315420.749400
\n

5 rows × 67 columns

\n
" }, "metadata": {}, - "execution_count": 32 + "execution_count": 22 } ], "source": [ - "feat_cols = imputed_df.columns.values.tolist()\n", + "feat_cols = final_df.columns.values.tolist()\n", "#print(feat_cols)\n", - "normalized_imputed_df = pd.DataFrame(x, columns=feat_cols)\n", - "normalized_imputed_df.head()" + "normalized_final_df = pd.DataFrame(x, columns=feat_cols)\n", + "normalized_final_df.head()" ] }, - { - "source": [ - "### Now we start with the PCA Part" - ], - "cell_type": "markdown", - "metadata": {} - }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 23, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "['PC1', 'PC2', 'PC3']\n['49.70', '7.85', '7.20']\n" + "['PC1', 'PC2', 'PC3']\n['49.73', '7.93', '7.18']\n" ] } ], "source": [ "num_components = 3\n", - "pca_imputed = PCA(n_components=num_components)\n", - "pComponents_imputed = pca_imputed.fit_transform(x)\n", + "pca_final = PCA(n_components=num_components)\n", + "pComponents_final = pca_final.fit_transform(x)\n", "component_col = ['PC'+str(i+1) for i in range(num_components)]\n", "print(component_col)\n", "\n", - "percentage_list = [element * 100 for element in pca_imputed.explained_variance_ratio_]\n", + "percentage_list = [element * 100 for element in pca_final.explained_variance_ratio_]\n", "percentage_list = ['%.2f' % elem for elem in percentage_list]\n", "print(percentage_list)" ] }, - { - "source": [ - "### PC stands for principal components" - ], - "cell_type": "markdown", - "metadata": {} - }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1825,61 +1611,61 @@ "data": { "text/plain": [ " PC1 PC2 PC3\n", - "0 7.815705 3.771093 -0.133255\n", - "1 7.339138 -2.606812 0.272919\n", - "2 -3.214204 2.176000 -0.611012\n", - "3 -5.807156 -1.477608 -0.284501\n", - "4 -3.974595 0.524419 -0.239346" + "0 8.660925 3.855080 -0.080563\n", + "1 7.369314 -2.616149 0.028787\n", + "2 -3.287324 2.085875 -0.449373\n", + "3 -5.811149 -0.738707 -0.166838\n", + "4 -3.917583 0.676241 -0.271113" ], - "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PC1PC2PC3
07.8157053.771093-0.133255
17.339138-2.6068120.272919
2-3.2142042.176000-0.611012
3-5.807156-1.477608-0.284501
4-3.9745950.524419-0.239346
\n
" + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PC1PC2PC3
08.6609253.855080-0.080563
17.369314-2.6161490.028787
2-3.2873242.085875-0.449373
3-5.811149-0.738707-0.166838
4-3.9175830.676241-0.271113
\n
" }, "metadata": {}, - "execution_count": 34 + "execution_count": 24 } ], "source": [ - "pc_imputed_df = pd.DataFrame(data = pComponents_imputed, columns = component_col)\n", - "print(pc_imputed_df.shape)\n", - "pc_imputed_df.head()" + "pc_final_df = pd.DataFrame(data = pComponents_final, columns = component_col)\n", + "print(pc_final_df.shape)\n", + "pc_final_df.head()" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 25, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Explained variation percentage per principal component: ['49.70', '7.85', '7.20']\nTotal percentage of the explained data by 3 components is: 64.75\nPercentage of the information that is lost for using 3 components is: 35.25\n" + "Explained variation percentage per principal component: ['49.73', '7.93', '7.18']\nTotal percentage of the explained data by 3 components is: 64.84\nPercentage of the information that is lost for using 3 components is: 35.16\n" ] } ], "source": [ "print('Explained variation percentage per principal component: {}'.format(percentage_list))\n", - "total_explained_percentage = (sum(pca_imputed.explained_variance_ratio_)*100)\n", - "print('Total percentage of the explained data by',pca_imputed.n_components,'components is: %.2f' %total_explained_percentage)\n", - "print('Percentage of the information that is lost for using',pca_imputed.n_components,'components is: %.2f' %(100-total_explained_percentage))" + "total_explained_percentage = (sum(pca_final.explained_variance_ratio_)*100)\n", + "print('Total percentage of the explained data by',pca_final.n_components,'components is: %.2f' %total_explained_percentage)\n", + "print('Percentage of the information that is lost for using',pca_final.n_components,'components is: %.2f' %(100-total_explained_percentage))" ] }, { "source": [ - "### Outliers are a big problem as it can be seen from the graph" + "## 3 Main Principle Component is presented" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 26, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "{'0': 'PC1 49.70%', '1': 'PC2 7.85%', '2': 'PC3 7.20%'}\n" + "{'0': 'PC1 49.73%', '1': 'PC2 7.93%', '2': 'PC3 7.18%'}\n" ] }, { @@ -1891,7 +1677,7 @@ }, "data": [ { - "hovertemplate": "PC1 49.70%=%{x}
PC2 7.85%=%{y}
PC3 7.20%=%{z}", + "hovertemplate": "PC1 49.73%=%{x}
PC2 7.93%=%{y}
PC3 7.18%=%{z}", "legendgroup": "", "marker": { "color": "#636efa", @@ -1903,595 +1689,595 @@ "showlegend": false, "type": "scatter3d", "x": [ - 7.815705391742462, - 7.339137705703802, - -3.214203916269259, - -5.80715643866871, - -3.9745954619497295, - -2.890966092672859, - -1.1134671652005752, - -9.02976272407463, - -8.089903395153193, - -1.9800675811561705, - 9.527366508949298, - -8.814781431817284, - 8.057536998694976, - 9.389458414018907, - 3.8049608938917885, - -4.5411540540591, - -4.23920966194199, - -3.1588756354356065, - -2.6858306608095894, - -5.136335371331304, - 0.12810416951246514, - 1.267042524569538, - -0.6885217978448994, - -3.65835391674737, - -4.182557988623389, - 3.7408392194080093, - 0.20400898125735614, - 12.56978154058819, - -8.050563036925059, - -4.288361890597987, - -2.3582248937799273, - 8.327761469360418, - 7.2752883396901575, - 8.856710810591405, - 5.533077430318204, - -0.6019520590967266, - 7.294069988360144, - 1.8970704667026417, - -2.788007359243617, - -3.0580619971246326, - -6.588294978619638, - -7.573929299906972, - -8.902789615997523, - 5.372849242136768, - -0.5339264068916624, - -9.082419175322016, - -0.3654413763172631, - -0.11787503537257654, - -0.26730624222407656, - 1.6301659735629148, - 7.904739443603567, - -7.157833186894553, - 8.762975653218588, - -9.194067425167436, - -0.3051455870997747, - -7.3806651712145515, - 2.1534605695235087, - -3.1876558033358795, - 4.47778550921977, - 7.862744607195441, - 5.901182830324904, - -6.354542916764469, - -0.4067551835511576, - -5.970468538879629, - 4.566984752150551, - 1.9292708308075317, - 4.772592533434519, - 13.069788552035853, - 4.367579870283229, - 10.18771836172564, - 9.546609925194428, - 3.268280023564391, - 1.3641510634203449, - -8.959144626756714, - 3.5239748566941724, - 8.09574152733288, - -5.997380117994976, - 1.0110406969635937, - 3.785140734566966, - -9.092556100865856, - -2.093224510897799, - 2.313017712713081, - -9.083314846698999, - -7.18731043401891, - -6.72232805979924, - -0.6159066258325677, - -0.9248226691956368, - -8.241936643448538, - -4.3512784393720105, - 5.314985585514629, - -0.634269111601688, - 3.6412780541396295, - -7.021759781450271, - -2.7879093551636958, - 5.21499904167344, - -0.5048660224959203, - 8.250222773015107, - -0.028671322512146755, - -10.239720758557572, - 6.6673450911698335, - -6.599811015887725, - -9.075382520720181, - -6.347740337271529, - 1.5171468658736802, - -1.362051712582059, - -2.037529218145667, - 7.7171716974342415, - -0.32339256579989906, - -1.1282922477603508, - 0.21885241143556144, - -2.31846231910023, - 11.205339947133302, - -7.361576598086409, - 4.790180540615756, - -4.5948425662985315, - -1.0509994004013095, - 9.471266868062813, - 7.273411428415602, - -2.89776118464564, - 7.513645783446732, - -2.9246343517899582, - 4.019324064053343, - 11.673093122705474, - 8.071012495061858, - 2.0699265091720696, - -8.997003865902158, - -9.445122207161843, - 5.101055040274665, - -0.21329078140762486, - -8.094356897681944, - -3.150516903237161, - 6.676900922868478, - -3.9195141665897246, - 1.8540679921789573, - -0.15376162236217705, - -5.648809454506782, - -9.218137325881434, - -6.979610366647846, - -7.885858153911095, - -1.4386898053967137, - -0.6233359470292887, - -1.7593857873642393, - -2.344725313882094, - -1.0534229727076467, - 0.6654668206170398, - 7.887735069516859, - -6.38882597077141, - -5.6397559950521154, - 0.8455120890389446, - -5.250593981881587, - -4.112425219809245, - -4.681161480300002, - 6.522331966196904, - -4.023280194965856, - 7.742324860246996, - 7.352758852805935, - -8.962687133514889, - 4.31975979988239, - 9.705167181253548, - -2.484810312171407, - 7.561131630567262, - -4.017490652651401, - 11.093618100157807, - 2.9909862541842602, - 0.28649873104569173, - -6.073720961255064, - -8.067662923582365, - -9.228762918666252, - -2.572977914669187, - 4.497489633099259, - 7.555490434805937, - -1.3302502498349542, - 0.889150114494581, - 0.6492983535956304, - 5.372302012623104, - -0.3836415766204878, - -2.3372990413558163, - -0.7046133426963144, - -2.7672631482209042, - 0.7779732705161404, - 7.679315506536789, - 7.212648012351597, - -3.717171194809398, - -3.443218965335672, - -7.9884355813861525, - -0.15437542776681207, - -0.3252988281038899, - -0.04736873525040834, - -0.06581490480457773, - 4.876186643841398, - 0.36241169157713116, - 8.959685526258205, - 1.3513180314927358, - 6.150885694615564, - 5.4789655277814875 + 8.660925373030112, + 7.36931441039187, + -3.287323774612212, + -5.811149454434806, + -3.917583415567073, + -2.9338836176267127, + -1.3634938862156527, + -8.95719329714002, + -8.061102744290617, + -2.0143966716041626, + 9.518730559247157, + -8.83584022021243, + 8.101931475789897, + 9.390221152113469, + 3.7920355348972397, + -4.533593406252421, + -4.360984547988361, + -3.512033805407493, + -2.693074770407761, + -5.187274735018256, + 0.15634895125033405, + 1.327148550243007, + -0.5915289853603273, + -3.651639000535378, + -4.506113576087406, + 3.628606254985032, + 0.4220843585147365, + 12.680295441880574, + -7.967091555964078, + -4.194571416724602, + -2.275633082655154, + 8.366996344735632, + 7.300290963343796, + 8.926587475681194, + 5.557030223889865, + -0.5358221359930567, + 7.389569574497576, + 2.0766840364109256, + -2.7277717561457733, + -3.4448467607081747, + -6.890528980649567, + -7.618664623703192, + -8.793588332529188, + 5.030759542043407, + -0.8133041827675904, + -9.048869551995287, + -0.3224865164826, + -0.11270737710548943, + -0.21558223396562975, + 1.706906391115579, + 8.221535016841335, + -7.165904482034793, + 8.698629888933718, + -9.324042146819695, + -0.4363534601027277, + -7.403491382198684, + 2.137625968690034, + -3.2522807297427105, + 4.437806353565892, + 7.79742829320645, + 6.402924375558027, + -6.254361937358284, + -0.24840011661343397, + -5.9743490272615185, + 4.43695230324791, + 1.9170572823945042, + 4.8116043036306495, + 13.0309792014796, + 4.56048084557989, + 10.124601509654775, + 9.464432864991926, + 3.332273020710495, + 1.334235774731984, + -8.677829019013167, + 3.566237278455438, + 8.102761520239902, + -5.990221066605703, + 1.0212879422538246, + 3.799763414682114, + -8.961571182924617, + -2.0116371496465018, + 2.3452054287494515, + -8.998425604718578, + -7.192573125538015, + -6.69926394386997, + -0.6197240202964007, + -0.8871561941125945, + -8.160398493492213, + -4.375289152115088, + 5.384390076964414, + -0.7368806438860154, + 3.8542782035866976, + -6.959926349684004, + -2.912873268210257, + 5.11979306543985, + -0.8804355671773069, + 8.221737810503074, + -0.2959305965655109, + -6.745582778644443, + 6.607815336068809, + -6.665402163818996, + -10.22624722126282, + -6.383178963211472, + 1.6061607812886278, + -0.923072842836434, + -2.1045430816121162, + 7.742286990664149, + -0.3249499652865133, + -1.0545194081043436, + -0.8793745013263952, + -2.315936291337086, + 11.171093857013355, + -7.8586969724826385, + 5.10063014743016, + -4.671798219956093, + -1.1803364178532467, + 9.495409105780801, + 7.274862403400485, + -2.911851692298899, + 7.592600143178789, + -3.3239892133995954, + 4.158783210879166, + 11.659981452804312, + 8.106398056165082, + 2.082527374112786, + -8.882946614749645, + -9.40254395453483, + 4.960564659314305, + -0.45350878877708417, + -8.014766635562616, + -3.0901261913689666, + 6.6922476770592, + -4.707503064696859, + 0.7571221305201962, + -0.1676971430788005, + -5.956312959493098, + -9.131681154878528, + -6.951574928918852, + -7.842286950536407, + -1.0862985842844837, + -0.553407667927873, + -1.7756904913436278, + -2.284128602738151, + -1.0306200985471243, + 0.686392192277255, + 7.906495025661678, + -6.386655189132161, + -5.633462144999635, + 0.9379578630417558, + -5.340853869141102, + -4.103711688500217, + -4.6722331884107575, + 6.607486409903617, + -4.249172805924318, + 7.790891416947143, + 7.429333092490025, + -8.868267693969555, + 4.2617616621659735, + 9.739984460067737, + -1.2982756071941874, + 6.6997542096048734, + -4.044533181810401, + 11.37384943365761, + 2.9989471218939494, + 0.43928328385974, + -6.105665614117344, + -8.093367975526451, + -9.18598504844754, + -2.4745554817040665, + 4.837127138188524, + 7.551378070484332, + -1.2843924193698983, + 0.820800722755169, + 0.7473458450490823, + 5.365815568843352, + -0.7597514742681404, + -2.4373413206171173, + -0.7138582435099253, + -2.705881390776994, + 0.31476642032113766, + 7.71333296545075, + 7.186259568291081, + -3.7978657071531776, + -3.3979525762581626, + -7.75882743273823, + -0.5968403419780313, + -0.5838730219221859, + -0.007916107619517623, + -0.2041579547749093, + 4.88102780317078, + 0.015884491794824422, + 9.204539209440739, + 1.4973101808646454, + 6.220218114546498, + 5.442054166264446 ], "y": [ - 3.7710930700377343, - -2.6068122483972225, - 2.1759998382097714, - -1.477608137355973, - 0.5244193644395958, - 3.2222414860008306, - -0.6243507824918618, - -1.4525585842358641, - -1.2454320204471736, - -0.3197983036265329, - -4.237515714493149, - -0.1745838558446209, - -2.3382043958696803, - -1.0001205825719757, - 1.111163226050274, - 0.36914800662604874, - -2.0102061441495627, - -0.9596191176361444, - 4.114508502610506, - -1.1129139648683155, - -0.04650566248863399, - -2.1837767842915867, - 1.59071944797144, - 0.47363682832027093, - -0.3555564643680984, - -0.9772141578971821, - 0.6748364196510189, - -1.852551303827457, - -1.094111964537084, - -0.13869719837063346, - 4.666867362889493, - 0.3930464320109389, - -2.5179622696437893, - -0.32093333784864697, - -0.7806349080444681, - -0.32532378405232726, - 2.3955915410128226, - 1.141869680698887, - 0.5613822279535107, - 0.6199184188077461, - -0.7912863453361185, - -1.170416996964333, - -1.0513991436296977, - 0.9590765521071616, - -0.27242128263812415, - -1.5085111318643667, - -0.3085735129738893, - 4.131216809952003, - -1.3260495297428727, - 4.055202391554322, - -2.2912905440280102, - -1.047719714753937, - -3.561210590917475, - -0.7534647588303485, - 0.521686291212891, - 0.5153572670991243, - 3.2982843117596703, - 0.5439050810776036, - -0.6139123293485069, - 1.24966754387615, - 0.08283426661537716, - 2.4205953671128966, - -0.8620360299059099, - 1.218415315069224, - -3.702394948803829, - 0.44234956852966806, - 3.479352348848602, - -2.0858220432044217, - 0.825736813216072, - -1.019524143668798, - -2.2098267746146685, - -0.18289127807047462, - 1.783105542226986, - -1.584742368267455, - -0.7885358100669981, - 1.4064754574794345, - -0.06121314024465169, - 0.20217916802341582, - 7.474788743257434, - -1.7492366112861004, - 4.134103049603261, - 5.069571363427267, - -3.3111111011242764, - -1.1911420440298752, - 2.085456665139465, - 0.3894734380967889, - 5.668793789144949, - -0.558134535111894, - -1.6454439632996396, - -2.4859943981787693, - 1.00599629341969, - 0.3529129239948134, - -0.6009508210767537, - -1.4789193673035914, - -3.7748892487898007, - 3.100974039846159, - -2.897771044300527, - 4.298572957046066, - -1.4505126708965623, - 1.3700733845045165, - -0.8428440174556316, - -1.4758330493933198, - -0.6967325849637968, - 3.1668800161854556, - -0.23873747865098688, - 2.4856432233399466, - -4.9921256886164125, - 0.16518106042801775, - -0.11171481571152728, - 1.0949247613992086, - 2.3302094506334567, - -1.0266218714485222, - -0.5290480011503546, - -0.8344495211845591, - 2.2091109582233983, - -0.10605596618334574, - -3.553147300917237, - 2.8083552413960424, - 0.6479697790399467, - -3.1489644344238026, - -0.7947752862465971, - 1.8864930596369995, - -2.5018749242658975, - 2.1472048210075987, - -1.066223004796494, - -2.0100362941854204, - -2.0854082137719283, - -3.6505235452230527, - 0.26715344146391684, - -2.0767724469085764, - -0.4691622797649216, - 3.3168574113875895, - 0.1298244177503368, - -3.4670042691428264, - 5.789549947812651, - -3.3078602462065954, - -2.569035394856519, - 1.0984202420704408, - -0.8037305700455658, - -0.6636828835248264, - 1.6051363516612027, - 1.671485719666504, - -1.518616085313642, - -2.5393250747189424, - 0.3120517966570482, - 1.787750467826509, - -0.19724141628178163, - -0.4943330634356602, - -1.4935380283703719, - -4.649245770451785, - 0.4345804417117196, - 0.3501036381156643, - -4.949826018260202, - 1.5072756647611225, - 3.7003818677917777, - 2.1195330631670433, - -3.4129919044965176, - -4.081080733037543, - -0.8384739520601534, - -0.7090983114822332, - 2.5751290648726224, - 1.735506551700908, - -0.39572799630852773, - 1.2958850189263693, - 1.8970870955716783, - -0.308620040638708, - -0.7589333585377616, - -1.4816789461586213, - -0.904686467685462, - 4.104837364351266, - -3.1929672847548205, - -1.8159181491685787, - 4.114620300186373, - -0.7798587119233383, - -1.5540163204104063, - -0.44774452331140946, - -0.42143364507485415, - 3.9104734934031127, - 2.3841210940811215, - 0.3755125442647771, - -4.223587424909779, - -2.4465322252575294, - 1.6310274245445033, - -0.0028246323915299227, - 0.6216299027178394, - -0.014010365301508051, - 1.594650154053963, - 1.0192261832505012, - -2.4447386976502656, - -1.2992800194951994, - 3.5555081366304244, - 6.260046043530518, - 4.073331971606124, - -1.6167691506707658, - -3.5674654661302934 + 3.855079906757912, + -2.6161491630776537, + 2.0858745119674817, + -0.7387070741223745, + 0.6762411291710205, + 3.2612679019976785, + -0.7597612819340497, + -1.5132518573724385, + -1.3141138830707781, + -0.4603812837704453, + -4.176798797673366, + -0.17145939483571848, + -2.3552322867774333, + -1.0701328964463008, + 0.8702761364071849, + 0.44236870815169077, + -1.9358357839167768, + -0.875340692809248, + 4.195432845157893, + -1.2376211881520474, + 0.12409870870908377, + -2.1090907082786456, + 1.685357352839291, + 0.6758843835084245, + -0.4501378746479654, + -1.066802136251232, + 0.6634853407176996, + -1.7623622324432309, + -1.1554207697853431, + -0.06063224652875297, + 3.3758883351636406, + 0.4512264379143446, + -2.456638459417957, + -0.43679824747548973, + -0.7382688818718947, + -0.2520532911701617, + 2.750530427456509, + 1.125146783490973, + 0.7892791006156633, + 0.5765882273665656, + -0.6854056696141374, + -1.279685771976567, + -1.1825978137443862, + 1.3222286500907285, + 0.11745983009345888, + -1.542437895261929, + -0.2782375674209586, + 3.937745783870189, + -1.3320803207099212, + 4.169856066791524, + -2.2477669326690903, + -1.150485868378693, + -3.8712181278625093, + -0.7022335549777545, + 0.5420963052877399, + 0.4678926303972273, + 3.4403713919822887, + 0.4863806254463764, + -0.6238378308545871, + 1.343829136369984, + 0.14409084055530824, + 2.5343927756276208, + -0.5526272432620005, + 1.2248475756753234, + -3.7675068565379526, + 0.5204095710567854, + 3.8376607993496474, + -2.0760363685139835, + 0.5644581810204278, + -1.1459815564721405, + -2.041770332342056, + -0.004542519610788543, + 1.7031900127353492, + -1.3742218685131904, + -0.7147485799669284, + 1.4881169379618266, + -0.19050682488166057, + -0.14475243097201565, + 6.360968776083738, + -2.078243330810698, + 4.14095498498866, + 5.227145165733215, + -3.3612305563899025, + -1.1255274387743974, + 2.1625635432281007, + 0.4396840547361396, + 5.751134848691572, + -0.9000703677554964, + -1.7793354355241953, + -2.4139451622163732, + 0.8205060831759692, + 0.7039545268141139, + -0.7726894334348547, + -1.5249528470778462, + -3.868128583078392, + 3.202644291798853, + -2.962651113308873, + 4.430985736188797, + -2.4314062342671723, + 1.3807182968551648, + -0.7449036382521768, + -0.8069094474868895, + -0.6759332084903706, + 3.0748583207932327, + 0.1405897640815465, + 2.392095650546612, + -5.076137879528213, + 0.1097660154101488, + -0.09293666247089191, + 0.6138002511135776, + 2.3161173720426596, + -1.071210529898813, + -0.19711844410400842, + -0.9021406931148502, + 2.1673739587776044, + -0.19060429757994993, + -3.685341829885408, + 2.7174995622045883, + 0.7284784869926412, + -3.0190587027490117, + -0.7989147485071739, + 2.285982156684738, + -2.7164474192473183, + 2.0824407561550218, + -0.9369710311952276, + -2.092283307281979, + -2.184152854419849, + -4.04801007971819, + 0.6121619616567823, + -2.096903142825676, + -0.449370857527864, + 3.246313330481864, + 0.2072632677881261, + -3.526650756993172, + 5.912620412600382, + -3.2069462495949463, + -2.6355919352137627, + 1.1560393798870965, + -0.810113928458289, + -0.6400475866863665, + 1.6073144480858566, + 1.6113479644274307, + -1.4330355400833183, + -2.5206657362497764, + 0.1919453712673838, + 1.7782666634371787, + -0.22981003650282733, + -0.4233650947364542, + -1.3739336823800556, + -4.744448285298656, + 0.41078286576938844, + 0.2198503546716565, + -4.949990753643417, + 1.599525194878205, + 3.9230026623695577, + 2.094233447191595, + -3.308099746818885, + -4.3619148663696246, + -0.8069372215208568, + -0.31619288705894766, + 2.2622864147208737, + 1.7822106604828154, + -0.3317218431250816, + 1.3483469394442205, + 2.043981599768457, + -0.3321920606619467, + -0.7869185545988505, + -1.5467856892585568, + -0.25708760611913994, + 4.164638837124573, + -3.2659992541243765, + -1.9281826961005137, + 4.136589214412604, + -0.6703021414154839, + -1.5635311694465044, + -0.44951947244297247, + -0.3761386627777358, + 3.9764711009886002, + 2.3419627959783003, + 0.6089978678769945, + -4.338425696323253, + -2.5225599257667737, + 1.6428508719923456, + 0.12007001975983786, + 0.24402194312458406, + -0.2773245069485185, + 1.628382866958211, + 1.0067797556961153, + -2.66536832367383, + -1.4735063838523634, + 3.620964936225569, + 6.36265096697167, + 4.446035444782243, + -1.7217799443553965, + -3.5869356337268425 ], "z": [ - -0.1332549867679698, - 0.27291870991937145, - -0.611012267273753, - -0.2845008401372673, - -0.23934607177095424, - -0.989885897143346, - -0.5913310286751665, - 0.4918869311562419, - 0.02152328028019099, - 0.6527665776802173, - 1.1779896925604951, - -0.4708636708683358, - 0.2956680935739928, - 0.12663008610763415, - 1.6942631052609094, - -0.5648214463106873, - -0.6120798399280499, - -0.8126412115199241, - -1.5597933259979462, - -0.20172561562931726, - -1.047714000388964, - 0.2833283572551736, - 1.947988206124562, - -1.0940250957737738, - -0.700773597471764, - 0.08586194539976243, - -1.4115772304017518, - -0.08515973093916311, - 0.4719010931574096, - -0.34883138810793957, - 20.966696914227896, - -0.3274459280580112, - 0.29271254763675336, - 0.6196062751105513, - -0.8265694284581329, - 0.19528142811908988, - -1.0421850982750887, - -1.4945598964235005, - -0.7987533916470302, - -0.4414013258569815, - -0.9833877041475038, - -0.014107758956231299, - 1.6975210715833469, - -1.3432928236773465, - -0.5691741516819883, - -0.016253383951449877, - -0.5609881949423667, - -0.7751208351141133, - 0.14824692386359914, - 0.00927794382893188, - 0.3179320083242132, - -0.3576194520490325, - 2.211293354678765, - -0.40672738166071587, - -0.6075844208483971, - 0.6765252189015973, - -2.020671173119792, - -0.8020750071696471, - 0.04334994483721389, - -1.086057747558172, - -0.8670786486384499, - -0.7475883165995749, - -0.4853487655958365, - -0.83531533811805, - 0.734179483116577, - -0.6696378871532909, - -2.387871871639611, - 0.2056780509902696, - -0.6271404340494551, - -0.3105679935654806, - 0.15434282400106256, - -0.041509387748856236, - -1.3518728132750355, - -0.30979810793800244, - -0.4076506800725293, - -1.4368858634288142, - -0.439837150288455, - 3.284033180624785, - 16.222883878857836, - -0.2875562816754254, - -0.3592141552110312, - -0.881347200571578, - 0.3237967022487916, - -0.299888668387499, - 0.3693021525982787, - -1.0364700904846793, - -2.1255576816295476, - 2.884194216499615, - 0.41893252327431185, - 0.7829805728736471, - -0.9766590654787115, - -0.6126549175381664, - 0.8963086398057337, - -0.06684147199340751, - 0.9059223522651572, - -1.166783909012498, - 0.2518120483509488, - -1.9813558678292322, - 0.5081446050076046, - -2.179479213880696, - -0.521918288574282, - -1.5747977920674756, - -0.43605984926764313, - -0.2761240963499135, - -0.6427173988235734, - -1.2010568896745912, - 1.0894778343477631, - -0.10654044827049912, - 1.4019091347689008, - -1.0265730834270628, - -1.3601266111839339, - 0.15646587786945004, - -0.9382090527097158, - 0.5053523595060782, - -1.4667694835270888, - -1.1604490104069942, - 0.5150624787802507, - -1.1027292782221167, - -0.839105415019863, - 0.2776553132752387, - 0.21503674198584916, - -2.0704733945969522, - 0.9887373441404025, - 1.5217215798291597, - -0.3220752306439535, - 0.28679952660295216, - 0.1587864411478722, - 1.290438072221053, - -0.6496490293445591, - 0.002632923994493077, - -0.3760188151867507, - 2.7115844513505634, - -0.9828777983239214, - 1.083015524634079, - -2.2842817622522524, - 0.4199335852091797, - 0.2738786451283277, - 0.10490493644303588, - 1.1314337861119779, - -0.35609718357504794, - -1.5997477704369565, - -0.5515669205914375, - -0.3926802265739144, - 0.701690271753913, - 0.7841817274062157, - -0.2575142832338854, - 0.3640003794240824, - -0.05399275040865746, - -0.06743258867823183, - 0.3163775826699158, - -0.1462798928758705, - 1.9942716803820362, - 0.9359999525922797, - -0.5623556831453945, - -0.8469774592591988, - -0.8779543598166668, - -0.011611440250646143, - 1.0550222920744383, - -0.3923020976890576, - -0.7590866947217605, - -0.9289791128963573, - -0.9967515617595287, - -0.47678452948726496, - -1.2313850212238613, - -1.0910139483733312, - -0.6840759561480423, - -0.31184363442804364, - 0.004357646405543191, - -0.7426386538844539, - -1.3054675039254935, - 0.1566801829898895, - 1.368288604939323, - -1.3662589846026323, - -0.3096134232154807, - 0.1770769632986913, - -0.5471472073762823, - -0.6466621016673111, - -1.248041765770726, - 0.3939036866698287, - -0.6813283958170979, - 1.2742176082427117, - 0.744638907265581, - -0.39433976596142706, - -0.5799619618503732, - 7.060467356342127, - -0.2211897015322298, - -1.5809316826536888, - -0.80120105935382, - 2.0811505851585417, - 0.26892275492564527, - -1.480500416081267, - -1.3618537976781397, - -1.970376263321043, - -0.34348196426888905, - 0.18144894292221464 + -0.08056349942375234, + 0.028786945680939334, + -0.44937341234933376, + -0.16683803717768994, + -0.27111339571903237, + -0.8468634775668962, + -0.5502370446162456, + 0.4632032199948351, + 0.017798805062920902, + 0.571825806803118, + 0.8610933028242622, + -0.41718466981031516, + 0.16298126000548457, + 0.0792910898343927, + 1.8128567388748213, + -0.5915769084573904, + -0.6600713773871396, + -0.8618537879404934, + -1.3168602103635203, + -0.22386892528755858, + -1.2014952503028953, + 0.07455281400208881, + 1.9166641224580574, + -1.2135979612511367, + -0.6447006133013446, + -0.12282338586990073, + -1.3674628062622514, + -0.20112813577814573, + 0.4490646679359641, + -0.30546512881185595, + 21.075170142106238, + -0.3461901436442329, + 0.028559610619205766, + 0.7183639705212421, + -0.8787618828044577, + 0.12710832268882694, + -0.9921767250704747, + -1.4080096325967255, + -0.8219680893706439, + -0.16275604933559068, + -1.1533739954894404, + -0.0337917509503916, + 1.6799185317306151, + -1.6512330639034185, + -0.7302482647171719, + -0.057800768168225665, + -0.5793990807734664, + -0.3083473792952449, + 0.06495870174368194, + 0.26669955368531123, + 0.47673235544920756, + -0.36120354829237655, + 2.041100499830188, + -0.4143178336544228, + -0.6015274112600257, + 0.7478337195452976, + -1.8828288126503987, + -0.7500800314342261, + -0.12798166920024825, + -1.1867701805068653, + -1.1196282252328504, + -0.6493267239833703, + -0.43756308949814743, + -0.7533933663839528, + 0.4302699784022649, + -0.7319465680265883, + -2.2780987950134697, + -0.06682395650749673, + -0.4419775297297648, + -0.3445492341786709, + -0.11254660017123147, + -0.18043721696072984, + -1.2245532604994218, + -0.4508568352724152, + -0.5245332955884419, + -1.4928953735994406, + -0.3800223951921673, + 3.3336995724742122, + 16.58065950845859, + -0.10654620050939036, + 0.008192649556087934, + -0.6423735209945476, + 0.152297292659392, + -0.36560878774545547, + 0.49191207289471833, + -1.1099575355724403, + -1.7319271224030202, + 2.7873230415457813, + 0.3265442671574655, + 0.5945942363898596, + -0.9086831018342412, + -0.5682675113500351, + 0.9809557365077258, + -0.2955124194307041, + 0.5430900659914154, + -0.875669448080304, + 0.015598845436188625, + -1.7110254542689507, + 0.7159377765562558, + -2.110361788242772, + -0.6140117208816129, + -2.2213300253252046, + -0.4855754948258507, + -0.020397403392057183, + -0.6674547258338281, + -1.0322834653250168, + 0.841765444160154, + -0.16573915738325531, + 1.3532932568239346, + -1.0306742402186815, + -1.1673487760360945, + 0.055627548134166156, + -1.0807593074388993, + 0.2489197605289283, + -1.2548555213743704, + -1.1222118522352336, + 0.37543854283410066, + -0.8582102171406314, + -0.8972184590567043, + 0.0557980004106488, + 0.2075716873589323, + -2.118355272391502, + 0.8960991584668245, + 1.619249911125445, + -0.48425138660513256, + 0.20873183469256704, + 0.13649392700008095, + 1.1043508356549243, + -0.7750278052214509, + -0.03990788295694306, + -0.3456984462829911, + 2.7893599209321964, + -0.8224626080103721, + 1.0510801949597657, + -1.8906202128098901, + 0.1765606034112846, + 0.19411942215412678, + 0.1833261337225062, + 1.0667397058802544, + -0.2867439193164209, + -1.4817313055181802, + -0.43374751726861893, + -0.4787152619003991, + 0.48739304026159463, + 0.7960149868538892, + -0.1506706339034871, + 0.342128657741293, + -0.10838198808233662, + -0.2324437265816887, + 0.23903962222116307, + -0.10779761283468126, + 1.9882259122761288, + 0.7137857571409595, + -0.38100084383652005, + -0.7369267139576294, + -0.5866833474499575, + -0.2601928237466296, + 0.7355547951926357, + -0.3915917226406725, + -0.6608095025394163, + -0.4820656942062738, + -0.890245666833216, + -0.2379812125363602, + -1.1282432906633666, + -0.9714088903315417, + -0.6977684963639371, + -0.3478849704694565, + -0.01431592220793677, + -0.9684387291859726, + -0.6535085737776317, + -0.06077323296566774, + 1.2253400465197497, + -1.088695983798872, + -0.3074576335207782, + 0.06497871050632115, + -0.5866301760000734, + -0.6938107871003306, + -0.9784676321572428, + 0.5843442289245169, + -0.7814587484817813, + 1.068832535600227, + 0.5516405641878677, + -0.37564916203542914, + -0.6494838123648803, + 7.139715614122555, + 0.003818061511962325, + -1.5702626196716343, + -0.6436689069476974, + 1.7452512841104453, + 0.21842118139393515, + -1.1804007080469503, + -0.48422171683216453, + -1.8277115375834077, + -0.3770607910856339, + -0.14827461968716107 ] } ], @@ -2512,17 +2298,17 @@ }, "xaxis": { "title": { - "text": "PC1 49.70%" + "text": "PC1 49.73%" } }, "yaxis": { "title": { - "text": "PC2 7.85%" + "text": "PC2 7.93%" } }, "zaxis": { "title": { - "text": "PC3 7.20%" + "text": "PC3 7.18%" } } }, @@ -3333,7 +3119,7 @@ } }, "title": { - "text": "Total Explained Variance: 64.75%" + "text": "Total Explained Variance: 64.84%" } } } @@ -3349,7 +3135,7 @@ "print(l_dict)\n", "\n", "fig = px.scatter_3d(\n", - " pComponents_imputed, x=0, y=1, z=2,\n", + " pComponents_final, x=0, y=1, z=2,\n", " title=f'Total Explained Variance: {total_explained_percentage:.2f}%',\n", " labels=l_dict\n", ")\n", @@ -3358,26 +3144,26 @@ ] }, { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], "source": [ - "imputed_df.to_csv(\"../data/unlabeled/preprocessed/hdro_preprocessed.csv\")" - ] + "## Conversion of the Dataset to CSV" + ], + "cell_type": "markdown", + "metadata": {} }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "final_df.to_csv(\"../data/unlabeled/preprocessed/hdro_preprocessed.csv\")" + ] } ], "metadata": { "kernelspec": { - "name": "python394jvsc74a57bd01e002e48d41ce7b93ab532133c559bfbfa167161e2109b258903f10473d9f54b", - "display_name": "Python 3.9.4 64-bit ('wsenv': conda)" + "name": "python374jvsc74a57bd0dca0ade3e726a953b501b15e8e990130d2b7799f14cfd9f4271676035ebe5511", + "display_name": "Python 3.7.4 64-bit ('base': conda)" }, "language_info": { "codemirror_mode": { @@ -3389,7 +3175,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.4" + "version": "3.7.4" } }, "nbformat": 4, From 12eb7eef99c821d7882ef0b890f4350038055f58 Mon Sep 17 00:00:00 2001 From: Joachim Bache-Mathiesen Date: Fri, 21 May 2021 11:39:46 +0200 Subject: [PATCH 5/6] Generate and link to notebooks --- documentation/WaterSecurity/index.html | 5 +- .../notebooks/Cities Test Set Processing.html | 14373 +++ .../Dataset Normalization and Imputation.html | 19070 ++++ .../notebooks/Merge Unlabeled to Labeled.html | 15192 +++ .../WaterSecurity/notebooks/app.html | 13269 +++ .../notebooks/combine_unlabeled.html | 13712 +++ .../notebooks/prep_aquastat-checkpoint.html | 15521 +++ .../notebooks/prep_aquastat.html | 15522 +++ .../notebooks/prep_economic_v2.html | 14474 +++ .../notebooks/prep_edstats-checkpoint.html | 16922 +++ .../WaterSecurity/notebooks/prep_edstats.html | 16665 +++ .../notebooks/prep_hdro_v2-checkpoint.html | 85672 ++++++++++++++++ .../WaterSecurity/notebooks/prep_hdro_v2.html | 85511 +++++++++++++++ .../unlabeled_preprocessing/index.html | 30 +- generate_documentation.py | 16 + unlabeled_preprocessing/__init__.py | 8 + 16 files changed, 325959 insertions(+), 3 deletions(-) create mode 100644 documentation/WaterSecurity/notebooks/Cities Test Set Processing.html create mode 100644 documentation/WaterSecurity/notebooks/Dataset Normalization and Imputation.html create mode 100644 documentation/WaterSecurity/notebooks/Merge Unlabeled to Labeled.html create mode 100644 documentation/WaterSecurity/notebooks/app.html create mode 100644 documentation/WaterSecurity/notebooks/combine_unlabeled.html create mode 100644 documentation/WaterSecurity/notebooks/prep_aquastat-checkpoint.html create mode 100644 documentation/WaterSecurity/notebooks/prep_aquastat.html create mode 100644 documentation/WaterSecurity/notebooks/prep_economic_v2.html create mode 100644 documentation/WaterSecurity/notebooks/prep_edstats-checkpoint.html create mode 100644 documentation/WaterSecurity/notebooks/prep_edstats.html create mode 100644 documentation/WaterSecurity/notebooks/prep_hdro_v2-checkpoint.html create mode 100644 documentation/WaterSecurity/notebooks/prep_hdro_v2.html diff --git a/documentation/WaterSecurity/index.html b/documentation/WaterSecurity/index.html index 9503205..5b34c57 100644 --- a/documentation/WaterSecurity/index.html +++ b/documentation/WaterSecurity/index.html @@ -36,7 +36,10 @@

Sub-modules

WaterSecurity.unlabeled_preprocessing
-
+

Notebooks below +* Combining datasets +* Econ preprocessing +* [Education …

WaterSecurity.utils
diff --git a/documentation/WaterSecurity/notebooks/Cities Test Set Processing.html b/documentation/WaterSecurity/notebooks/Cities Test Set Processing.html new file mode 100644 index 0000000..06b3d02 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/Cities Test Set Processing.html @@ -0,0 +1,14373 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
+

Retrieve the list of biggest world population

+
+
+
+
+
+
In [1]:
+
+
+
import pandas as pd
+import sys
+sys.path.append('..')
+from data.unlabeled import WORLD_CITIES as wct
+
+ +
+
+
+ +
+
+
+
In [2]:
+
+
+
wct.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[2]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citycity_asciilatlngcountryiso2iso3admin_namecapitalpopulationid
0TokyoTokyo35.6897139.6922JapanJPJPNTōkyōprimary37977000.01392685764
1JakartaJakarta-6.2146106.8451IndonesiaIDIDNJakartaprimary34540000.01360771077
2DelhiDelhi28.660077.2300IndiaININDDelhiadmin29617000.01356872604
3MumbaiMumbai18.966772.8333IndiaININDMahārāshtraadmin23355000.01356226629
4ManilaManila14.5958120.9772PhilippinesPHPHLManilaprimary23088000.01608618140
+
+
+ +
+ +
+
+ +
+
+
+
+

Dataset normalization

+
+
+
+
+
+
In [3]:
+
+
+
wct.isnull().sum()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[3]:
+ + + + +
+
city              0
+city_ascii        0
+lat               0
+lng               0
+country           0
+iso2             31
+iso3              0
+admin_name       76
+capital       18943
+population      973
+id                0
+dtype: int64
+
+ +
+ +
+
+ +
+
+
+
In [4]:
+
+
+
len(wct[wct.columns].drop_duplicates()), len(wct[wct.columns]) # no duplicates
+
+ +
+
+
+ +
+
+ + +
+ +
Out[4]:
+ + + + +
+
(26569, 26569)
+
+ +
+ +
+
+ +
+
+
+
In [5]:
+
+
+
wct = wct.drop(columns=["city", "iso2", "iso3", "admin_name", "capital", "id"])
+
+ +
+
+
+ +
+
+
+
In [6]:
+
+
+
wct.columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[6]:
+ + + + +
+
Index(['city_ascii', 'lat', 'lng', 'country', 'population'], dtype='object')
+
+ +
+ +
+
+ +
+
+
+
In [7]:
+
+
+
wct = wct.rename(columns={'city_ascii':'city'})
+
+ +
+
+
+ +
+
+
+
In [8]:
+
+
+
wct.columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[8]:
+ + + + +
+
Index(['city', 'lat', 'lng', 'country', 'population'], dtype='object')
+
+ +
+ +
+
+ +
+
+
+
+

Missing values

+
+
+
+
+
+
In [9]:
+
+
+
to_drop = wct[wct.population.isnull()]
+to_drop
+
+ +
+
+
+ +
+
+ + +
+ +
Out[9]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citylatlngcountrypopulation
824Al Quds31.776435.2269West BankNaN
827Ngerulmud7.5006134.6242PalauNaN
6255Un'goofaaru5.668173.0302MaldivesNaN
6393Banqiao25.0143121.4672TaiwanNaN
7568Naifaru5.444273.3662MaldivesNaN
..................
9469We-20.9000167.2667New CaledoniaNaN
9470Presevo42.306721.6500SerbiaNaN
9471Bujanovac42.466721.7667SerbiaNaN
9472Kitamilo0.222233.2061UgandaNaN
9473Tarrafal15.2833-23.7667Cabo VerdeNaN
+

973 rows × 5 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [10]:
+
+
+
# dropping missing values from the dataset
+wctc = wct.copy(deep=False) 
+wctc.dropna(inplace=True)
+
+ +
+
+
+ +
+
+
+
In [11]:
+
+
+
wctc.isnull().sum() # cleaned dataset
+
+ +
+
+
+ +
+
+ + +
+ +
Out[11]:
+ + + + +
+
city          0
+lat           0
+lng           0
+country       0
+population    0
+dtype: int64
+
+ +
+ +
+
+ +
+
+
+
+

Look at the position of the cities with missing data in a map to see if some country is not represented

+
+
+
+
+
+
+

We need to verify if the null values that have been dropped are randomly distributed or there's a hidden pattern. In this way we see if all geographic areas are represented.

+ +
+
+
+
+
+
In [12]:
+
+
+
import geojson
+import folium
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
+---------------------------------------------------------------------------
+ModuleNotFoundError                       Traceback (most recent call last)
+<ipython-input-12-6a09eb20f703> in <module>
+----> 1 import geojson
+      2 import folium
+
+ModuleNotFoundError: No module named 'geojson'
+
+
+ +
+
+ +
+
+
+
In [14]:
+
+
+
data_to_plot = list(wctc.columns[3:5])
+
+ +
+
+
+ +
+
+
+
In [15]:
+
+
+
def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
+    """
+    Turn a dataframe containing point data into a geojson formatted python dictionary
+    
+    df : the dataframe to convert to geojson
+    properties : a list of columns in the dataframe to turn into geojson feature properties
+    lat : the name of the column in the dataframe that contains latitude data
+    lon : the name of the column in the dataframe that contains longitude data
+    """
+    
+    # create a new python dict to contain our geojson data, using geojson format
+    geojson = {'type':'FeatureCollection', 'features':[]}
+
+    # loop through each row in the dataframe and convert each row to geojson format
+    for _, row in df.iterrows():
+        # create a feature template to fill in
+        feature = {'type':'Feature',
+                   'properties':{},
+                   'geometry':{'type':'Point',
+                               'coordinates':[]}}
+
+        # fill in the coordinates
+        feature['geometry']['coordinates'] = [row[lon],row[lat]]
+
+        # for each column, get the value and add it as a new feature property
+        for prop in properties:
+            feature['properties'][prop] = row[prop]
+        
+        # add this feature (aka, converted dataframe row) to the list of features inside our dict
+        geojson['features'].append(feature)
+    
+    return geojson
+
+ +
+
+
+ +
+
+
+
In [16]:
+
+
+
geo = df_to_geojson(to_drop, data_to_plot, lat = "lat", lon = "lng")
+
+ +
+
+
+ +
+
+
+
In [17]:
+
+
+
m = folium.Map([9,9], zoom_start=2)
+
+folium.GeoJson(geo).add_to(m)
+
+# uncomment below to see the map
+# m
+
+ +
+
+
+ +
+
+ + +
+ +
Out[17]:
+ + + + +
+
<folium.features.GeoJson at 0x7f905d316a30>
+
+ +
+ +
+
+ +
+
+
+
In [18]:
+
+
+
to_drop[to_drop["country"]== "Malta"] # all null values
+
+ +
+
+
+ +
+
+ + +
+ +
Out[18]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citylatlngcountrypopulation
7901Sliema35.912514.5019MaltaNaN
8144Fgura35.870314.5133MaltaNaN
8151Hamrun35.884714.4844MaltaNaN
8238Senglea35.887514.5169MaltaNaN
8264Tarxien35.865814.5150MaltaNaN
..................
8883Santa Lucija36.043114.2172MaltaNaN
8935Zebbug36.072214.2358MaltaNaN
8946Imgarr35.920614.3664MaltaNaN
8971Gharb36.060014.2089MaltaNaN
9035San Lawrenz36.055614.2036MaltaNaN
+

65 rows × 5 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

It looks like some countries have more missing information than others.

+ +
+
+
+
+
+
+

Retrieve the world biggest cities

A big city is categorized as >500,000 residents in the given year, 2020.

+ +
+
+
+
+
+
In [19]:
+
+
+
big_cities = wctc.sort_values(by=["population"], ascending=False).head(1000)
+
+ +
+
+
+ +
+
+
+
In [20]:
+
+
+
big_cities.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[20]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citylatlngcountrypopulation
0Tokyo35.6897139.6922Japan37977000.0
1Jakarta-6.2146106.8451Indonesia34540000.0
2Delhi28.660077.2300India29617000.0
3Mumbai18.966772.8333India23355000.0
4Manila14.5958120.9772Philippines23088000.0
+
+
+ +
+ +
+
+ +
+
+
+
In [21]:
+
+
+
big_cities.to_csv('bigcities.csv',index=False)
+
+ +
+
+
+ +
+
+
+
In [22]:
+
+
+
geo2 = df_to_geojson(big_cities, data_to_plot, lat = "lat", lon = "lng")
+m2 = folium.Map([9,9], zoom_start=2)
+folium.GeoJson(geo2).add_to(m2)
+
+# uncomment below to see the map
+# m2
+
+ +
+
+
+ +
+
+ + +
+ +
Out[22]:
+ + + + +
+
<folium.features.GeoJson at 0x7f905e015a30>
+
+ +
+ +
+
+ +
+
+
+
+

We can see that some countries are not represented with this approach. For example, the African State of Namibia.

+ +
+
+
+
+
+
+

Retrieve the most populated cities (6 max) for every country

+
+
+
+
+
+
In [23]:
+
+
+
countries = list(wctc["country"].unique())
+top_cities = pd.DataFrame(columns = wctc.columns)
+
+ +
+
+
+ +
+
+
+
In [24]:
+
+
+
top_cities = top_cities.append([wct[wct["country"] == country].sort_values(by=["population"], ascending=False).head(6) for country in countries], ignore_index = True)
+
+ +
+
+
+ +
+
+
+
In [25]:
+
+
+
top_cities
+
+ +
+
+
+ +
+
+ + +
+ +
Out[25]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citylatlngcountrypopulation
0Tokyo35.6897139.6922Japan37977000.0
1Osaka34.6936135.5019Japan14977000.0
2Nagoya35.1167136.9333Japan9113000.0
3Yokohama35.4333139.6333Japan3748781.0
4Fukuoka33.5903130.4019Japan2128000.0
..................
1131Grand Turk21.4664-71.1360Turks And Caicos Islands5801.0
1132Avarua-21.2070-159.7710Cook Islands5445.0
1133Vatican City41.900012.4478Vatican City825.0
1134Stanley-51.7000-57.8500Falkland Islands (Islas Malvinas)2213.0
1135Grytviken-54.2806-36.5080South Georgia And South Sandwich Islands99.0
+

1136 rows × 5 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [26]:
+
+
+
top_cities.to_csv('bigcities_allcountries.csv',index=False)
+
+ +
+
+
+ +
+
+
+
In [27]:
+
+
+
geo3 = df_to_geojson(top_cities, data_to_plot, lat = "lat", lon = "lng")
+m3 = folium.Map([9, 9], zoom_start=2)
+folium.GeoJson(geo3).add_to(m3)
+
+# uncomment below to see the map
+m3
+
+ +
+
+
+ +
+
+ + +
+ +
Out[27]:
+ + + +
+
Make this Notebook Trusted to load map: File -> Trust Notebook
+
+ +
+ +
+
+ +
+
+
+
+

In this way every country should be represented. Probably also those containing a lot of cities that have been dropped.

+ +
+
+
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/Dataset Normalization and Imputation.html b/documentation/WaterSecurity/notebooks/Dataset Normalization and Imputation.html new file mode 100644 index 0000000..17715d2 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/Dataset Normalization and Imputation.html @@ -0,0 +1,19070 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
import pandas as pd
+import numpy as np
+from langdetect import detect as langdetect
+import random
+seed = 42
+np.random.seed(seed)
+random.seed(seed)
+import sys
+sys.path.append("..")
+from data.labeled.raw import CWA as cwa, CWR as cwr
+from data.labeled.preprocessed import LABELED_CITIES_PATH, RISKS_MAPPING_PATH, SEVERITY_MAPPING_PATH, IMPUTATION_REPORT_PATH
+
+ +
+
+
+ +
+
+
+
+

Preprocess whole dataset

+
+
+
+
+
+
In [2]:
+
+
+
cwa.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[2]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Account NoOrganisationCityCountryRegionAccessC40Reporting YearRisks to city's water supplyAdaptation actionAction descriptionCity Location
01093City of AtlantaAtlantaUnited States of AmericaNorth AmericapublicNaN2018Increased water stress or scarcityInvestment in existing water supply infrastruc...The city is building a new reservoir, fixing t...(33.749, -84.388)
11184City of AustinAustinUnited States of AmericaNorth AmericapublicC402018Increased water stress or scarcityConservation awareness and educationNaN(30.2672, -97.7431)
21184City of AustinAustinUnited States of AmericaNorth AmericapublicC402018Increased water stress or scarcityConservation incentivesNaN(30.2672, -97.7431)
31184City of AustinAustinUnited States of AmericaNorth AmericapublicC402018Increased water stress or scarcityWater use restrictionsNaN(30.2672, -97.7431)
41184City of AustinAustinUnited States of AmericaNorth AmericapublicC402018Increased water stress or scarcityEfficiency regulations or standardsNaN(30.2672, -97.7431)
+
+
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
cwr.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[3]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Account NoOrganisationCityCountryRegionAccessC40Reporting yearRisks to city's water supplyTimescaleMagnitudeRisk descriptionCurrent populationPopulation yearCity Location
01093City of AtlantaAtlantaUnited States of AmericaNorth AmericapublicNaN2018Increased water stress or scarcityShort-termSeriousThe city depends on one river for water supply...450182.02017(33.749, -84.388)
11093City of AtlantaAtlantaUnited States of AmericaNorth AmericapublicNaN2018Higher water pricesCurrentSeriousThe water cost burden is felt across all ZIP c...450182.02017(33.749, -84.388)
21184City of AustinAustinUnited States of AmericaNorth AmericapublicC402018Increased water stress or scarcityMedium-termExtremely seriousNaN1265974.02017(30.2672, -97.7431)
31499Ajuntament de BarcelonaBarcelonaSpainEuropepublicC402018Increased water stress or scarcityCurrentExtremely seriousThe downscaling of the climate projections at ...1604555.02015(41.3823, 2.1775)
41499Ajuntament de BarcelonaBarcelonaSpainEuropepublicC402018Declining water qualityMedium-termSeriousA reduction in water resources availability c...1604555.02015(41.3823, 2.1775)
+
+
+ +
+ +
+
+ +
+
+
+
+

Normalize columns names

+
+
+
+
+
+
In [4]:
+
+
+
# Convert columns names to lowercase
+cwa.columns = cwa.columns.str.lower().str.replace(" ", "_").str.replace("'","")
+cwr.columns = cwr.columns.str.lower().str.replace(" ", "_").str.replace("'","")
+
+ +
+
+
+ +
+
+
+
+

Find counts of nulls per column

+
+
+
+
+
+
In [5]:
+
+
+
cwr.isnull().sum()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[5]:
+ + + + +
+
account_no                       0
+organisation                     0
+city                             0
+country                          0
+region                           0
+access                           0
+c40                            504
+reporting_year                   0
+risks_to_citys_water_supply     40
+timescale                       11
+magnitude                      159
+risk_description               107
+current_population               0
+population_year                  0
+city_location                    0
+dtype: int64
+
+ +
+ +
+
+ +
+
+
+
In [6]:
+
+
+
cwa.isnull().sum()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[6]:
+ + + + +
+
account_no                       0
+organisation                     0
+city                             0
+country                          0
+region                           0
+access                           0
+c40                            536
+reporting_year                   0
+risks_to_citys_water_supply     75
+adaptation_action               19
+action_description             146
+city_location                    0
+dtype: int64
+
+ +
+ +
+
+ +
+
+
+
+

Find columns that exist in both datasets

+
+
+
+
+
+
In [7]:
+
+
+
id_columns = list(set(cwr.columns) & set(cwa.columns))
+id_columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[7]:
+ + + + +
+
['region',
+ 'city',
+ 'access',
+ 'country',
+ 'risks_to_citys_water_supply',
+ 'c40',
+ 'organisation',
+ 'city_location',
+ 'account_no',
+ 'reporting_year']
+
+ +
+ +
+
+ +
+
+
+
+

Check how many nulls exist in these columns

+
+
+
+
+
+
In [8]:
+
+
+
cwr[id_columns].isnull().sum()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[8]:
+ + + + +
+
region                           0
+city                             0
+access                           0
+country                          0
+risks_to_citys_water_supply     40
+c40                            504
+organisation                     0
+city_location                    0
+account_no                       0
+reporting_year                   0
+dtype: int64
+
+ +
+ +
+
+ +
+
+
+
In [9]:
+
+
+
cwa[id_columns].isnull().sum()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[9]:
+ + + + +
+
region                           0
+city                             0
+access                           0
+country                          0
+risks_to_citys_water_supply     75
+c40                            536
+organisation                     0
+city_location                    0
+account_no                       0
+reporting_year                   0
+dtype: int64
+
+ +
+ +
+
+ +
+
+
+
+

Replace nulls there with "nan" strings, so that merging is done correctly

+
+
+
+
+
+
In [10]:
+
+
+
cwr['c40'] = cwr['c40'] == 'C40'
+cwr[id_columns] = cwr[id_columns].fillna('nan')
+
+ +
+
+
+ +
+
+
+
In [11]:
+
+
+
cwa['c40'] = cwa['c40'] == 'C40'
+cwa[id_columns] = cwa[id_columns].fillna('nan')
+
+ +
+
+
+ +
+
+
+
+

Check id_columns uniqueness

+
+
+
+
+
+
In [12]:
+
+
+
len(cwa[id_columns]), len(cwa[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[12]:
+ + + + +
+
(685, 484)
+
+ +
+ +
+
+ +
+
+
+
In [13]:
+
+
+
len(cwr[id_columns]), len(cwr[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[13]:
+ + + + +
+
(608, 578)
+
+ +
+ +
+
+ +
+
+
+
+

Aggregate by grouping by id_columns and applying set operations to other colums

+
+
+
+
+
+
In [14]:
+
+
+
cwa_agg = cwa.groupby(id_columns).agg(set).reset_index()
+cwr_agg = cwr.groupby(id_columns).agg(set).reset_index()
+
+ +
+
+
+ +
+
+
+
In [15]:
+
+
+
len(cwa_agg[id_columns]), len(cwa_agg[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[15]:
+ + + + +
+
(484, 484)
+
+ +
+ +
+
+ +
+
+
+
In [16]:
+
+
+
len(cwr_agg[id_columns]), len(cwr_agg[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[16]:
+ + + + +
+
(578, 578)
+
+ +
+ +
+
+ +
+
+
+
+

Make sure that risks magnitude (which is going to be used as an entry label) has a single value

+
+
+
+
+
+
In [17]:
+
+
+
cwr_agg.magnitude = cwr_agg.magnitude.apply(lambda x: 'Extremely serious' if 'Extremely serious' in x  else ('Serious' if 'Serious' in x else list(x)[0]))
+
+ +
+
+
+ +
+
+
+
In [18]:
+
+
+
cwr_agg.magnitude.unique()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[18]:
+ + + + +
+
array([nan, 'Serious', 'Extremely serious', 'Less Serious'], dtype=object)
+
+ +
+ +
+
+ +
+
+
+
+

Merge (outer) datasets and find the columns that are set (list_columns)

filling the non-merged rows with {np.nan} on list_columns to make sure there is contingency in each column

+ +
+
+
+
+
+
In [19]:
+
+
+
cw_data = pd.merge(cwa_agg, cwr_agg, on=id_columns, how='outer')
+list_columns = cw_data.iloc[0].apply(lambda x: isinstance(x, set))
+list_columns = list_columns[list_columns].index
+cw_data[list_columns] = cw_data[list_columns].applymap(lambda x: {np.nan} if pd.isna(x) else x)
+
+ +
+
+
+ +
+
+
+
In [20]:
+
+
+
len(cw_data[id_columns]), len(cw_data[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[20]:
+ + + + +
+
(677, 677)
+
+ +
+ +
+
+ +
+
+
+
In [21]:
+
+
+
cw_data.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[21]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
regioncityaccesscountryrisks_to_citys_water_supplyc40organisationcity_locationaccount_noreporting_yearadaptation_actionaction_descriptiontimescalemagnituderisk_descriptioncurrent_populationpopulation_year
0AfricaAbujapublicNigeriaHigher water pricesFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018{Water metering}{nan}{Short-term}NaN{nan}{2440000.0}{2017}
1AfricaAbujapublicNigeriaInadequate or aging infrastructureFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018{Investment in existing water supply infrastru...{nan}{nan}NaN{nan}{nan}{nan}
2AfricaAbujapublicNigeriaIncreased water stress or scarcityFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018{Efficiency regulations or standards}{nan}{Short-term}NaN{nan}{2440000.0}{2017}
3AfricaBouaképublicCôte d'IvoireDeclining water qualityFalseCommune de Bouaké(7.4137, -5.0149)8316172018{Diversifying water supply (including new sour...{Approvisionnement en eau potable par la const...{Current}Extremely serious{La ville manque d'eau depuis plus de 5 mois}{715435.0}{2013}
4AfricaCape TownpublicSouth AfricaIncreased water stress or scarcityTrueCity of Cape Town(-33.9253, 18.4239)358582018{Water use restrictions, Water metering, Use o...{In terms of diversifying water supply (includ...{Current}Extremely serious{The City has generally been able to successfu...{4174510.0}{2017}
+
+
+ +
+ +
+
+ +
+
+
+
+

Extract longitude and latitude from city_location

+
+
+
+
+
+
In [22]:
+
+
+
cw_data['city_location'] = cw_data['city_location'].apply(eval)
+cw_data['latitude'] = cw_data['city_location'].apply(lambda x: x[0])
+cw_data['longitude'] = cw_data['city_location'].apply(lambda x: x[1])
+id_columns = [x for x in id_columns] + ['latitude','longitude']
+len(cw_data[id_columns]), len(cw_data[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[22]:
+ + + + +
+
(677, 677)
+
+ +
+ +
+
+ +
+
+
+
+

Find list columns that are singular (every element contains a length 1 list)

+
+
+
+
+
+
In [23]:
+
+
+
cw_value_columns = [ x for x in cw_data.columns if x not in id_columns]
+# Values that can be converted back to strings from sets
+
+singular_cols = cw_data.set_index(id_columns)[list_columns].applymap(lambda x: len(x)==1).product() > 0
+singular_cols
+len(cw_data[id_columns]), len(cw_data[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[23]:
+ + + + +
+
(677, 677)
+
+ +
+ +
+
+ +
+
+
+
+

Convert singular columns to non list ones

+
+
+
+
+
+
In [24]:
+
+
+
# magnitude is nicely singular per id
+cw_data.loc[:, singular_cols[singular_cols].index] = cw_data.loc[:, singular_cols[singular_cols].index].applymap(lambda x: list(x)[0])
+list_columns = [x for x in list_columns if x not in singular_cols[singular_cols].index]
+
+ +
+
+
+ +
+
+
+
+

Remove entries that are invalid or in different language than English from the list columns

also convert sets to lists

+ +
+
+
+
+
+
In [25]:
+
+
+
def try_lang_detect_except(x):
+    try:
+        return (langdetect(x)=='en')
+    except:
+        return False
+cw_data = cw_data.applymap(lambda x: [t for t in x if not pd.isnull(t)  and  (t!='') and (t!='ERROR: #NAME?') and try_lang_detect_except(t)] if isinstance(x,set) else x)
+len(cw_data[id_columns]), len(cw_data[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[25]:
+ + + + +
+
(677, 677)
+
+ +
+ +
+
+ +
+
+
+
+

Fix specific risks annotations by inspection

+
+
+
+
+
+
In [26]:
+
+
+
cw_data.risks_to_citys_water_supply.unique()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[26]:
+ + + + +
+
array(['Higher water prices', 'Inadequate or aging infrastructure',
+       'Increased water stress or scarcity', 'Declining water quality',
+       'nan', 'Increased water demand', 'Regulatory',
+       'Energy supply issues', 'Inadequate or ageing infrastructure',
+       'Declining water quality: Declining water quality'], dtype=object)
+
+ +
+ +
+
+ +
+
+
+
In [27]:
+
+
+
cw_data.risks_to_citys_water_supply = cw_data.risks_to_citys_water_supply.apply(lambda x: x.replace('Inadequate or ageing infrastructure','Inadequate or aging infrastructure'))
+cw_data['risks_to_citys_water_supply'] = cw_data['risks_to_citys_water_supply'].apply(lambda x: x.replace('Declining water quality: Declining water quality', 'Declining water quality'))
+cw_data.risks_to_citys_water_supply.unique()
+len(cw_data[id_columns]), len(cw_data[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[27]:
+ + + + +
+
(677, 598)
+
+ +
+ +
+
+ +
+
+
+
In [28]:
+
+
+
cw_data.drop_duplicates(subset=id_columns,inplace=True)
+
+ +
+
+
+ +
+
+
+
In [29]:
+
+
+
cw_data.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[29]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
regioncityaccesscountryrisks_to_citys_water_supplyc40organisationcity_locationaccount_noreporting_yearadaptation_actionaction_descriptiontimescalemagnituderisk_descriptioncurrent_populationpopulation_yearlatitudelongitude
0AfricaAbujapublicNigeriaHigher water pricesFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018[][][Short-term]NaN[]2440000.02017.09.076477.39857
1AfricaAbujapublicNigeriaInadequate or aging infrastructureFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018[Investment in existing water supply infrastru...[][]NaN[]NaNNaN9.076477.39857
2AfricaAbujapublicNigeriaIncreased water stress or scarcityFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018[Efficiency regulations or standards][][Short-term]NaN[]2440000.02017.09.076477.39857
3AfricaBouaképublicCôte d'IvoireDeclining water qualityFalseCommune de Bouaké(7.4137, -5.0149)8316172018[Diversifying water supply (including new sour...[][]Extremely serious[]715435.02013.07.41370-5.01490
4AfricaCape TownpublicSouth AfricaIncreased water stress or scarcityTrueCity of Cape Town(-33.9253, 18.4239)358582018[Water use restrictions, Use of non-potable wa...[In terms of diversifying water supply (includ...[]Extremely serious[The City has generally been able to successfu...4174510.02017.0-33.9253018.42390
+
+
+ +
+ +
+
+ +
+
+
+
In [30]:
+
+
+
to_drop = ['timescale','current_population','population_year']
+cw_data['population_year'] = cw_data['population_year'].fillna(0)
+t = cw_data.sort_values('population_year' ).groupby([x for x in cw_data.columns if (x !='population_year') and x not in list_columns],dropna=False).last().reset_index()
+
+ +
+
+
+ +
+
+
+
In [31]:
+
+
+
cw_data
+
+ +
+
+
+ +
+
+ + +
+ +
Out[31]:
+ + + +
+

regioncityaccesscountryrisks_to_citys_water_supplyc40organisationcity_locationaccount_noreporting_yearadaptation_actionaction_descriptiontimescalemagnituderisk_descriptioncurrent_populationpopulation_yearlatitudelongitude
0AfricaAbujapublicNigeriaHigher water pricesFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018[][][Short-term]NaN[]2440000.02017.09.076477.39857
1AfricaAbujapublicNigeriaInadequate or aging infrastructureFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018[Investment in existing water supply infrastru...[][]NaN[]NaN0.09.076477.39857
2AfricaAbujapublicNigeriaIncreased water stress or scarcityFalseAbuja Federal Capital Territory(9.07647, 7.39857)360432018[Efficiency regulations or standards][][Short-term]NaN[]2440000.02017.09.076477.39857
3AfricaBouaképublicCôte d'IvoireDeclining water qualityFalseCommune de Bouaké(7.4137, -5.0149)8316172018[Diversifying water supply (including new sour...[][]Extremely serious[]715435.02013.07.41370-5.01490
4AfricaCape TownpublicSouth AfricaIncreased water stress or scarcityTrueCity of Cape Town(-33.9253, 18.4239)358582018[Water use restrictions, Use of non-potable wa...[In terms of diversifying water supply (includ...[]Extremely serious[The City has generally been able to successfu...4174510.02017.0-33.9253018.42390
............................................................
669Southeast Asia and OceaniaCity of SydneypublicAustraliaRegulatoryTrueCity of Sydney(-33.8675, 151.207)311142018[][][]Serious[Regulatory, pricing and institutional arrange...233219.02017.0-33.86750151.20700
670Southeast Asia and OceaniaHo Chi MinhpublicViet NamDeclining water qualityTrueHo Chi Minh City(10.7626, 106.66)311642018[][][]Serious[]8611000.02017.010.76260106.66000
671Southeast Asia and OceaniaHo Chi MinhpublicViet NamHigher water pricesTrueHo Chi Minh City(10.7626, 106.66)311642018[][][Short-term]Less Serious[]8611000.02017.010.76260106.66000
672Southeast Asia and OceaniaHo Chi MinhpublicViet NamInadequate or aging infrastructureTrueHo Chi Minh City(10.7626, 106.66)311642018[][][Short-term]Less Serious[]8611000.02017.010.76260106.66000
673Southeast Asia and OceaniaHo Chi MinhpublicViet NamIncreased water demandTrueHo Chi Minh City(10.7626, 106.66)311642018[][][]Less Serious[]8611000.02017.010.76260106.66000
+

598 rows × 19 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Drop metadata columns that will not be used

+
+
+
+
+
+
In [32]:
+
+
+
cw_data.drop(columns=['timescale','population_year'],inplace=True)
+cw_data.rename(columns={'current_population':'population'},inplace=True)
+
+ +
+
+
+ +
+
+
+
In [33]:
+
+
+
len(cw_data[id_columns]), len(cw_data[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[33]:
+ + + + +
+
(598, 598)
+
+ +
+ +
+
+ +
+
+
+
+

Generate for each of the list columns a new one that contains the number of elements

Create value_columns that contains the names of all the list columns + the created ones

+ +
+
+
+
+
+
In [34]:
+
+
+
value_columns = set([x for x in cw_value_columns if x not in to_drop] + ['risks_to_citys_water_supply'])
+list_columns = cw_data.iloc[0].apply(lambda x: isinstance(x, list))
+list_columns = list_columns[list_columns].index
+len_columns = set()
+for x in value_columns.copy():
+    if x in list_columns:
+        cw_data[x+'_n'] = cw_data[x].apply(lambda x: len(x) if x else 1)
+        len_columns.add(x+'_n')
+value_columns = value_columns.union(len_columns)
+value_columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[34]:
+ + + + +
+
{'action_description',
+ 'action_description_n',
+ 'adaptation_action',
+ 'adaptation_action_n',
+ 'magnitude',
+ 'risk_description',
+ 'risk_description_n',
+ 'risks_to_citys_water_supply'}
+
+ +
+ +
+
+ +
+
+
+
In [35]:
+
+
+
len(cw_data[id_columns]), len(cw_data[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[35]:
+ + + + +
+
(598, 598)
+
+ +
+ +
+
+ +
+
+
+
+

Filter by C40

We are making the assumption that, given that those cities participate in a specific program, they will also have better structured data. So this subset will be used as a reference for the imputation

+ +
+
+
+
+
+
In [36]:
+
+
+
c40_data = cw_data[cw_data['c40']].copy()
+c40_data.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[36]:
+ + + + +
+
(97, 20)
+
+ +
+ +
+
+ +
+
+
+
+

Find the null percentage of list columns

+
+
+
+
+
+
In [37]:
+
+
+
list_nulls = c40_data[list_columns].applymap(lambda x: len(x) == 0)
+list_nulls.mean()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[37]:
+ + + + +
+
adaptation_action     0.268041
+action_description    0.381443
+risk_description      0.402062
+dtype: float64
+
+ +
+ +
+
+ +
+
+
+
In [38]:
+
+
+
c40_data['magnitude'].unique()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[38]:
+ + + + +
+
array(['Extremely serious', nan, 'Serious', 'Less Serious'], dtype=object)
+
+ +
+ +
+
+ +
+
+
+
+

Create a report of the values of magnitude

+
+
+
+
+
+
In [39]:
+
+
+
c40_risks = c40_data[id_columns+['magnitude']].copy()
+c40_risks['low'] = c40_risks['magnitude'] == 'Less Serious'
+c40_risks['medium'] = c40_risks['magnitude'] == 'Serious'
+c40_risks['high'] = c40_risks['magnitude'] == 'Extremely serious'
+c40_risks['unknown'] = c40_risks['magnitude'].isna()
+c40_risks = c40_risks.drop(columns=['magnitude']).groupby([x for x in id_columns if x!="risks_to_citys_water_supply"]).agg(sum).reset_index()
+c40_risks
+
+ +
+
+
+ +
+
+ + +
+ +
Out[39]:
+ + + +
+

regioncityaccesscountryc40organisationcity_locationaccount_noreporting_yearlatitudelongitudelowmediumhighunknown
0AfricaAddis AbabapublicEthiopiaTrueAddis Ababa City Administration(9.02887, 38.7544)3114620189.0288738.754400300
1AfricaCape TownpublicSouth AfricaTrueCity of Cape Town(-33.9253, 18.4239)358582018-33.9253018.423900010
2AfricaDar es SalaampublicUnited Republic of TanzaniaTrueCity of Dar es Salaam(-6.79235, 39.2083)358932018-6.7923539.208300101
3AfricaDurbanpublicSouth AfricaTrueCity of Durban(-29.8587, 31.0218)358632018-29.8587031.021800001
4AfricaJohannesburgpublicSouth AfricaTrueCity of Johannesburg(-26.2041, 28.0473)311152018-26.2041028.047300001
5AfricaLagospublicNigeriaTrueCity of Lagos(6.52437, 3.3792)3116720186.524373.379200421
6AfricaNairobipublicKenyaTrueCity of Nairobi(-1.29206, 36.8219)359132018-1.2920636.821900200
7East AsiaChangwonpublicRepublic of KoreaTrueChangwon City(35.1542, 126.949)31186201835.15420126.949000100
8East AsiaDalianpublicChinaTrueDalian Municipal People's Government(38.9389, 121.567)54298201838.93890121.567000002
9East AsiaHong KongpublicChina, Hong Kong Special Administrative RegionTrueGovernment of Hong Kong Special Administrative...(22.2881, 114.14)31169201822.28810114.140000101
10East AsiaTokyopublicJapanTrueTokyo Metropolitan Government(35.41, 139.41)31111201835.41000139.410000301
11East AsiaYokohamapublicJapanTrueCity of Yokohama(35.4437, 139.638)31113201835.44370139.638000100
12EuropeBarcelonapublicSpainTrueAjuntament de Barcelona(41.3823, 2.1775)1499201841.382302.177500211
13EuropeLondonpublicUnited Kingdom of Great Britain and Northern I...TrueGreater London Authority(51.5048, -0.07868)3422201851.50480-0.078681200
14EuropeOslopublicNorwayTrueCity of Oslo(59.9139, 10.7522)14088201859.9139010.752201011
15EuropeParispublicFranceTrueCity of Paris(48.8566, 2.35222)31175201848.856602.352220002
16EuropeRomapublicItalyTrueRoma Capitale(41.53, 12.2858)31110201841.5300012.285800011
17EuropeStockholmpublicSwedenTrueCity of Stockholm(59.3293, 18.0686)3429201859.3293018.068601100
18EuropeWarsawpublicPolandTrueCity of Warsaw(52.2297, 21.0122)31185201852.2297021.012200100
19Latin AmericaCuritibapublicBrazilTrueMunicipality of Curitiba(-25.4311, -49.2647)311562018-25.43110-49.264701000
20Latin AmericaGuadalajarapublicMexicoTrueRegion Metropolitana de Guadalajara(20.6597, -103.35)35867201820.65970-103.350000001
21Latin AmericaMedellínpublicColombiaTrueMunicipality of Medellín(6.22729, -75.5735)3587320186.22729-75.573500200
22Latin AmericaMexico CitypublicMexicoTrueMexico City(19.4326, -99.1332)31172201819.43260-99.133200011
23Latin AmericaQuitopublicEcuadorTrueDistrito Metropolitano de Quito(-0.2333, -78.5167)421782018-0.23330-78.516700100
24Latin AmericaRio de JaneiropublicBrazilTruePrefeitura do Rio de Janeiro(-22.9054, -43.5614)311762018-22.90540-43.561400201
25Latin AmericaSantiagopublicChileTrueRegión Metropolitana de Santiago(-33.4489, -70.6693)311802018-33.44890-70.669300300
26Latin AmericaSão PaulopublicBrazilTruePrefeitura de São Paulo(-23.5595, -46.7382)311842018-23.55950-46.738200100
27Middle EastAmmanpublicJordanTrueGreater Amman Municipality(31.9454, 35.9284)54337201831.9454035.928400020
28North AmericaAustinpublicUnited States of AmericaTrueCity of Austin(30.2672, -97.7431)1184201830.26720-97.743100010
29North AmericaPortland, ORpublicUnited States of AmericaTrueCity of Portland, OR(45.52, -122.682)14874201845.52000-122.682000004
30North AmericaSan FranciscopublicUnited States of AmericaTrueCity of San Francisco(37.7749, -122.419)31182201837.77490-122.419000100
31North AmericaSeattlepublicUnited States of AmericaTrueCity of Seattle(47.6062, -122.332)16581201847.60620-122.332000001
32North AmericaVancouverpublicCanadaTrueCity of Vancouver(49.2612, -123.114)20113201849.26120-123.114001000
33North AmericaWashington, DCpublicUnited States of AmericaTrueDistrict of Columbia(38.9072, -77.0369)31090201838.90720-77.036900201
34South and West AsiaDhakapublicBangladeshTrueDhaka City(23.8103, 90.4125)50782201823.8103090.412500004
35South and West AsiaKarachipublicPakistanTrueCity District Government Karachi(24.9108, 67.1281)31168201824.9108067.128100010
36South and West AsiaKolkatapublicIndiaTrueKolkata Metropolitan Area(22.5726, 88.3639)35904201822.5726088.363900001
37Southeast Asia and OceaniaAucklandpublicNew ZealandTrueAuckland Council(-36.8153, 174.742)439322018-36.81530174.742000201
38Southeast Asia and OceaniaBangkokpublicThailandTrueBangkok Metropolitan Administration(13.7712, 100.556)31150201813.77120100.556001000
39Southeast Asia and OceaniaCity of MelbournepublicAustraliaTrueCity of Melbourne(-37.8141, 144.963)311092018-37.81410144.963000200
40Southeast Asia and OceaniaCity of SydneypublicAustraliaTrueCity of Sydney(-33.8675, 151.207)311142018-33.86750151.207000401
41Southeast Asia and OceaniaHo Chi MinhpublicViet NamTrueHo Chi Minh City(10.7626, 106.66)31164201810.76260106.660003100
42Southeast Asia and OceaniaJakartapublicIndonesiaTrueJakarta City Government(-6.1751, 106.865)311662018-6.17510106.865000005
+
+
+ +
+ +
+
+ +
+
+
+
In [40]:
+
+
+
from sklearn.preprocessing import LabelEncoder
+
+ +
+
+
+ +
+
+
+
In [41]:
+
+
+
from utils.nlp import SimilarityAnalysis, create_sim_vector
+
+ +
+
+
+ +
+
+
+
In [42]:
+
+
+
c40_data.risks_to_citys_water_supply = c40_data.risks_to_citys_water_supply.apply(lambda x: np.nan if x=='nan' else x)
+
+ +
+
+
+ +
+
+
+
In [43]:
+
+
+
c40_data[c40_data.risks_to_citys_water_supply.isnull()]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[43]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
regioncityaccesscountryrisks_to_citys_water_supplyc40organisationcity_locationaccount_noreporting_yearadaptation_actionaction_descriptionmagnituderisk_descriptionpopulationlatitudelongitudeadaptation_action_naction_description_nrisk_description_n
21AfricaJohannesburgpublicSouth AfricaNaNTrueCity of Johannesburg(-26.2041, 28.0473)311152018[Water use restrictions][A by-law that effects water restrictions is i...NaN[]NaN-26.204128.0473111
71East AsiaTokyopublicJapanNaNTrueTokyo Metropolitan Government(35.41, 139.41)311112018[][Implementing measures against flood]Serious[]13857664.035.4100139.4100111
108EuropeOslopublicNorwayNaNTrueCity of Oslo(59.9139, 10.7522)140882018[Stormwater management (natural or man-made in...[Small scale project to implement water meeter...NaN[]NaN59.913910.7522341
404North AmericaPortland, ORpublicUnited States of AmericaNaNTrueCity of Portland, OR(45.52, -122.682)148742018[Watershed preservation][Portland seeks to manage the watershed in way...NaN[Portland has a secondary, redundant, groundwa...639863.045.5200-122.6820111
454South and West AsiaDhakapublicBangladeshNaNTrueDhaka City(23.8103, 90.4125)507822018[Stormwater management (natural or man-made in...[Embankment is constructed around Dhaka City t...NaN[Increases water born diseases]4500000.023.810390.4125111
456South and West AsiaKolkatapublicIndiaNaNTrueKolkata Metropolitan Area(22.5726, 88.3639)359042018[Efficiency regulations or standards, Stormwat...[Channelisation of drains Construction of new ...NaN[53% of the boroughs within Kolkata Municipal ...14030000.022.572688.3639222
476Southeast Asia and OceaniaJakartapublicIndonesiaNaNTrueJakarta City Government(-6.1751, 106.865)311662018[Other: Stormwater management (natural or man ...[- Restore damaged and lost water ecosystems (...NaN[Increased frequency of rain affecting the are...10075310.0-6.1751106.8650111
+
+
+ +
+ +
+
+ +
+
+
+
In [44]:
+
+
+
c40_data.risks_to_citys_water_supply.unique()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[44]:
+ + + + +
+
array(['Increased water stress or scarcity',
+       'Inadequate or aging infrastructure', nan,
+       'Declining water quality', 'Energy supply issues',
+       'Higher water prices', 'Increased water demand', 'Regulatory'],
+      dtype=object)
+
+ +
+ +
+
+ +
+
+
+
+

Fit Similarity Analysis

per string list column of the value columns, by concatenating strings together. Vectorize all the string list columns based on their in between similarities

+ +
+
+
+
+
+
In [45]:
+
+
+
analyses = {x:SimilarityAnalysis() for x in value_columns if x in list_columns}
+
+ +
+
+
+ +
+
+
+
In [46]:
+
+
+
vectorized = np.hstack([analyses[x].fit_transform(c40_data[x]) for x in analyses])
+
+ +
+
+
+ +
+
+
+
+

Encode magnitude and risks into numerical labels

+
+
+
+
+
+
In [47]:
+
+
+
cols_to_encode =  [x for x in value_columns if x not in list_columns and x not in len_columns]
+c40_data[cols_to_encode] = c40_data[cols_to_encode].fillna('nan')
+encoders = {x:LabelEncoder() for x in cols_to_encode}
+
+ +
+
+
+ +
+
+
+
In [48]:
+
+
+
encoded = np.array([encoders[x].fit_transform(c40_data[x]) for x in cols_to_encode]).T.astype(float)
+for t in range(len(cols_to_encode)):
+    encoded[encoded[:,t] == np.where(encoders[cols_to_encode[t]].classes_=='nan')[0],t] = np.nan
+
+ +
+
+
+ +
+
+
+
In [49]:
+
+
+
lab_freqs = c40_data[cols_to_encode].groupby(cols_to_encode,as_index=False).size()
+lab_freqs
+
+ +
+
+
+ +
+
+ + +
+ +
Out[49]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
risks_to_citys_water_supplymagnitudesize
0Declining water qualityLess Serious2
1Declining water qualitySerious9
2Declining water qualitynan3
3Energy supply issuesExtremely serious1
4Energy supply issuesSerious1
5Higher water pricesExtremely serious1
6Higher water pricesLess Serious1
7Higher water pricesSerious1
8Higher water pricesnan3
9Inadequate or aging infrastructureLess Serious1
10Inadequate or aging infrastructureSerious1
11Inadequate or aging infrastructurenan15
12Increased water demandExtremely serious1
13Increased water demandLess Serious1
14Increased water demandSerious6
15Increased water stress or scarcityExtremely serious8
16Increased water stress or scarcityLess Serious3
17Increased water stress or scarcitySerious21
18Increased water stress or scarcitynan6
19RegulatoryLess Serious1
20RegulatorySerious3
21Regulatorynan1
22nanSerious1
23nannan6
+
+
+ +
+ +
+
+ +
+
+
+
+

Create dataset that contains the encoded labels, the vectorized columns and the columns referring the lists lengths

+
+
+
+
+
+
In [50]:
+
+
+
whole = np.hstack([encoded, vectorized, c40_data[len_columns]])
+
+ +
+
+
+ +
+
+
+
In [51]:
+
+
+
import sys
+sys.path.append('..')
+from labeled_preprocessing.imputation import LabeledDatasetImputer
+
+ +
+
+
+ +
+
+
+
+

Fit LabeledDatasetImputer and transform dataset

(LabeledDatasetImputer is a union of two Iterative Imputers , one for labels and one for features)

+ +
+
+
+
+
+
In [52]:
+
+
+
imputer = LabeledDatasetImputer(verbose=2,k_features_per_label=0, seed=seed)
+continuous_imputed, y = imputer.fit_transform(whole[:,2:], whole[:,:2], ret_imputed_x=True)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Applying feature selection..
+Creating imputed X using KNeighborsRegressor..
+[IterativeImputer] Completing matrix with shape (97, 192)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.45
+[IterativeImputer] Change: 16.78842081477814, scaled tolerance: 0.011 
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.88
+[IterativeImputer] Change: 7.823153835169504, scaled tolerance: 0.011 
+[IterativeImputer] Ending imputation round 3/10, elapsed time 1.57
+[IterativeImputer] Change: 3.379485724678483, scaled tolerance: 0.011 
+[IterativeImputer] Ending imputation round 4/10, elapsed time 2.09
+[IterativeImputer] Change: 0.021456167679847482, scaled tolerance: 0.011 
+[IterativeImputer] Ending imputation round 5/10, elapsed time 2.51
+[IterativeImputer] Change: 3.3306690738754696e-16, scaled tolerance: 0.011 
+[IterativeImputer] Early stopping criterion reached.
+Creating imputed Y using Pipeline..
+[IterativeImputer] Completing matrix with shape (97, 194)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.13
+[IterativeImputer] Change: 5.0, scaled tolerance: 0.011 
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.23
+[IterativeImputer] Change: 0.0, scaled tolerance: 0.011 
+[IterativeImputer] Early stopping criterion reached.
+
+
+
+ +
+
+ +
+
+
+
+

Use the prediction power on the imputed data as measurement of coherent imputation.

+
+
+
+
+
+
In [53]:
+
+
+
from sklearn.svm import SVC
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import cross_val_score
+from sklearn.preprocessing import StandardScaler
+from sklearn.pipeline import make_pipeline
+simple_model = make_pipeline(StandardScaler(), SVC(kernel='linear', C=0.01, random_state=seed))
+scoring = 'accuracy' 
+scores = cross_val_score(simple_model, continuous_imputed, y[:,cols_to_encode.index('magnitude')], cv=5,scoring=scoring)
+np.mean(scores)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[53]:
+ + + + +
+
0.773684210526316
+
+ +
+ +
+
+ +
+
+
+
+

Plot features correlation

+
+
+
+
+
+
In [54]:
+
+
+
import matplotlib.pyplot as plt
+flg = np.all(~np.isnan(whole),axis=1), np.hstack([[True, True], imputer.selection_mask[:]])
+corr = np.corrcoef(whole[flg[0],:][:,flg[1]].T)
+print(corr.shape)
+plt.matshow(corr)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
(194, 194)
+
+
+
+ +
+ +
Out[54]:
+ + + + +
+
<matplotlib.image.AxesImage at 0x7f9b93e80190>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

Retrieve the categorical imputed labels

+
+
+
+
+
+
In [55]:
+
+
+
imputed = np.array([
+    encoders[x].inverse_transform(
+    y[:, c].astype(int)
+ ) for c,x in enumerate(cols_to_encode)]).T
+
+ +
+
+
+ +
+
+
+
In [56]:
+
+
+
c40_data_imputed = c40_data.copy()
+
+ +
+
+
+ +
+
+
+
In [57]:
+
+
+
c40_data_imputed[cols_to_encode] = imputed
+
+ +
+
+
+ +
+
+
+
In [58]:
+
+
+
c40_data_imputed.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[58]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
regioncityaccesscountryrisks_to_citys_water_supplyc40organisationcity_locationaccount_noreporting_yearadaptation_actionaction_descriptionmagnituderisk_descriptionpopulationlatitudelongitudeadaptation_action_naction_description_nrisk_description_n
4AfricaCape TownpublicSouth AfricaIncreased water stress or scarcityTrueCity of Cape Town(-33.9253, 18.4239)358582018[Water use restrictions, Use of non-potable wa...[In terms of diversifying water supply (includ...Extremely serious[The City has generally been able to successfu...4174510.0-33.9253018.42399101
7AfricaDar es SalaampublicUnited Republic of TanzaniaInadequate or aging infrastructureTrueCity of Dar es Salaam(-6.79235, 39.2083)358932018[Investment in existing water supply infrastru...[City provide some % of the total money correc...Serious[]NaN-6.7923539.2083111
8AfricaDurbanpublicSouth AfricaInadequate or aging infrastructureTrueCity of Durban(-29.8587, 31.0218)358632018[Other, Investment in existing water supply in...[improvements to infrastructure are being done...Serious[]NaN-29.8587031.0218331
21AfricaJohannesburgpublicSouth AfricaIncreased water stress or scarcityTrueCity of Johannesburg(-26.2041, 28.0473)311152018[Water use restrictions][A by-law that effects water restrictions is i...Serious[]NaN-26.2041028.0473111
24AfricaLagospublicNigeriaDeclining water qualityTrueCity of Lagos(6.52437, 3.3792)311672018[Water use restrictions][Application of water abstraction rules, surch...Serious[Increased rainfall will result to greater amo...24821418.06.524373.3792111
+
+
+ +
+ +
+
+ +
+
+
+
+

Find which labels have been updated after imputation

+
+
+
+
+
+
In [59]:
+
+
+
imp_lab_freqs = c40_data_imputed[cols_to_encode].groupby(cols_to_encode,as_index=False).size()
+imp_ret = pd.merge(lab_freqs, imp_lab_freqs,suffixes=('','_imp'), how='right',on=cols_to_encode)
+imp_ret['increase'] = (imp_ret['size_imp'] - imp_ret['size'])/imp_ret['size']
+imp_ret
+
+ +
+
+
+ +
+
+ + +
+ +
Out[59]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
risks_to_citys_water_supplymagnitudesizesize_impincrease
0Declining water qualityExtremely seriousNaN1NaN
1Declining water qualityLess Serious2.020.000000
2Declining water qualitySerious9.0120.333333
3Energy supply issuesExtremely serious1.010.000000
4Energy supply issuesSerious1.010.000000
5Higher water pricesExtremely serious1.010.000000
6Higher water pricesLess Serious1.010.000000
7Higher water pricesSerious1.043.000000
8Inadequate or aging infrastructureLess Serious1.010.000000
9Inadequate or aging infrastructureSerious1.01716.000000
10Increased water demandExtremely serious1.010.000000
11Increased water demandLess Serious1.010.000000
12Increased water demandSerious6.060.000000
13Increased water stress or scarcityExtremely serious8.090.125000
14Increased water stress or scarcityLess Serious3.030.000000
15Increased water stress or scarcitySerious21.0310.476190
16RegulatoryLess Serious1.010.000000
17RegulatorySerious3.040.333333
+
+
+ +
+ +
+
+ +
+
+
+
+

Make sure that all risks have been encoded and are imputation outcome candidates

+
+
+
+
+
+
In [60]:
+
+
+
set([x for x in cw_data['risks_to_citys_water_supply'] if x not in encoders['risks_to_citys_water_supply'].classes_])
+
+ +
+
+
+ +
+
+ + +
+ +
Out[60]:
+ + + + +
+
set()
+
+ +
+ +
+
+ +
+
+
+
+

Impute whole dataset

+
+
+
+
+
+
In [61]:
+
+
+
cw_data['risks_to_citys_water_supply'].value_counts()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[61]:
+ + + + +
+
Increased water stress or scarcity    205
+Declining water quality               127
+Inadequate or aging infrastructure    101
+nan                                    54
+Increased water demand                 49
+Higher water prices                    38
+Regulatory                             17
+Energy supply issues                    7
+Name: risks_to_citys_water_supply, dtype: int64
+
+ +
+ +
+
+ +
+
+
+
+

Apply encoding and vectorization

based on fitted encoders and SimilarityAnalysis objects

+ +
+
+
+
+
+
In [62]:
+
+
+
cw_data[cols_to_encode] = cw_data[cols_to_encode].fillna('nan')
+encoded = np.array([encoders[x].transform(cw_data[x]) for x in cols_to_encode]).T.astype(float)
+for t in range(len(cols_to_encode)):
+    encoded[encoded[:,t] == np.where(encoders[cols_to_encode[t]].classes_=='nan')[0],t] = np.nan
+
+ +
+
+
+ +
+
+
+
In [63]:
+
+
+
all_vectorized = np.hstack([analyses[x].transform(cw_data[x]) for x in value_columns if x in list_columns])
+
+ +
+
+
+ +
+
+
+
+

Apply imputation

using the trained imputer

+ +
+
+
+
+
+
In [64]:
+
+
+
all_imputed_x, all_imputed_y  = imputer.transform(np.hstack([all_vectorized,cw_data[len_columns]]), encoded, ret_imputed_x=True)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (598, 192)
+[IterativeImputer] Ending imputation round 1/5, elapsed time 0.48
+[IterativeImputer] Ending imputation round 2/5, elapsed time 0.99
+[IterativeImputer] Ending imputation round 3/5, elapsed time 1.49
+[IterativeImputer] Ending imputation round 4/5, elapsed time 2.11
+[IterativeImputer] Ending imputation round 5/5, elapsed time 2.72
+[IterativeImputer] Completing matrix with shape (598, 194)
+[IterativeImputer] Ending imputation round 1/2, elapsed time 0.02
+[IterativeImputer] Ending imputation round 2/2, elapsed time 0.03
+
+
+
+ +
+
+ +
+
+
+
In [65]:
+
+
+
all_imputed_y_dec = np.array([
+    encoders[x].inverse_transform(
+    all_imputed_y[:, c].astype(int)
+ ) for c,x in enumerate(cols_to_encode)]).T
+
+ +
+
+
+ +
+
+
+
In [66]:
+
+
+
len(cw_data[id_columns]), len(cw_data[id_columns].drop_duplicates())
+
+ +
+
+
+ +
+
+ + +
+ +
Out[66]:
+ + + + +
+
(598, 598)
+
+ +
+ +
+
+ +
+
+
+
In [67]:
+
+
+
cw_data_imputed = cw_data.copy()
+cw_data_imputed[cols_to_encode] = all_imputed_y_dec
+cw_data_imputed.drop_duplicates(id_columns,inplace=True)
+
+ +
+
+
+ +
+
+
+
+

Plot features correlation

+
+
+
+
+
+
In [68]:
+
+
+
import matplotlib.pyplot as plt
+corr = np.corrcoef(np.hstack([all_imputed_y, all_imputed_x]).T)
+print(corr.shape)
+plt.matshow(corr)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
(194, 194)
+
+
+
+ +
+ +
Out[68]:
+ + + + +
+
<matplotlib.image.AxesImage at 0x7f9b92426c70>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [69]:
+
+
+
scores = cross_val_score(simple_model, all_imputed_x, all_imputed_y[:,cols_to_encode.index('magnitude')], cv=5,scoring=scoring)
+np.mean(scores)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[69]:
+ + + + +
+
0.7407983193277311
+
+ +
+ +
+
+ +
+
+
+
+

The score did not have significant drop, so we can assume that the imputation was cohesive across data

+ +
+
+
+
+
+
+

Create output dataset

+
+
+
+
+
+
In [205]:
+
+
+
final_labeled_data = cw_data_imputed[['city','latitude','longitude','country','population','c40','magnitude','risks_to_citys_water_supply']].copy()
+final_labeled_data.rename(columns={'magnitude': 'risk','risks_to_citys_water_supply': 'description'},inplace=True)
+
+ +
+
+
+ +
+
+
+
In [206]:
+
+
+
final_labeled_data[['city','latitude','longitude','c40', 'population','description','risk']]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[206]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citylatitudelongitudec40populationdescriptionrisk
0Abuja9.076477.39857False2440000.0Higher water pricesSerious
1Abuja9.076477.39857FalseNaNInadequate or aging infrastructureSerious
2Abuja9.076477.39857False2440000.0Increased water stress or scarcitySerious
3Bouaké7.41370-5.01490False715435.0Declining water qualityExtremely serious
4Cape Town-33.9253018.42390True4174510.0Increased water stress or scarcityExtremely serious
........................
669City of Sydney-33.86750151.20700True233219.0RegulatorySerious
670Ho Chi Minh10.76260106.66000True8611000.0Declining water qualitySerious
671Ho Chi Minh10.76260106.66000True8611000.0Higher water pricesLess Serious
672Ho Chi Minh10.76260106.66000True8611000.0Inadequate or aging infrastructureLess Serious
673Ho Chi Minh10.76260106.66000True8611000.0Increased water demandLess Serious
+

575 rows × 7 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [207]:
+
+
+
cw_data_imputed[id_columns].drop_duplicates().shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[207]:
+ + + + +
+
(575, 12)
+
+ +
+ +
+
+ +
+
+
+
+

Pivoting risks description, so that each row is unique per city

+
+
+
+
+
+
In [208]:
+
+
+
risks = final_labeled_data.description.unique()
+risks_description = {risk: f'risk{c}' for c,risk in enumerate(risks)}
+risks_df = pd.Series(risks_description).to_frame()
+risks_df.reset_index(inplace=True)
+risks_df.columns=['description','code']
+final_labeled_data['description'] = final_labeled_data['description'].apply(lambda x: risks_description[x])
+risks_df
+
+ +
+
+
+ +
+
+ + +
+ +
Out[208]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
descriptioncode
0Higher water pricesrisk0
1Inadequate or aging infrastructurerisk1
2Increased water stress or scarcityrisk2
3Declining water qualityrisk3
4Increased water demandrisk4
5Regulatoryrisk5
6Energy supply issuesrisk6
+
+
+ +
+ +
+
+ +
+
+
+
In [209]:
+
+
+
main_index=['city','latitude','longitude', 'country']
+
+ +
+
+
+ +
+
+
+
+

Check if there are cities that are listed to be both in and not in C40

and drop the ones that are not

+ +
+
+
+
+
+
In [210]:
+
+
+
c40_check = final_labeled_data.groupby(main_index)['c40'].nunique()
+c40_check[c40_check>1]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[210]:
+ + + + +
+
city      latitude  longitude  country
+Santiago  -33.4489  -70.6693   Chile      2
+Name: c40, dtype: int64
+
+ +
+ +
+
+ +
+
+
+
In [211]:
+
+
+
final_labeled_data.city.nunique()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[211]:
+ + + + +
+
282
+
+ +
+ +
+
+ +
+
+
+
In [212]:
+
+
+
final_labeled_data[final_labeled_data.city == 'Santiago']
+
+ +
+
+
+ +
+
+ + +
+ +
Out[212]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citylatitudelongitudecountrypopulationc40riskdescription
563Santiago-33.4489-70.6693Chile404495.0FalseExtremely seriousrisk3
564Santiago-33.4489-70.6693Chile7112808.0TrueSeriousrisk3
565Santiago-33.4489-70.6693Chile404495.0FalseExtremely seriousrisk4
566Santiago-33.4489-70.6693Chile404495.0FalseExtremely seriousrisk2
567Santiago-33.4489-70.6693Chile7112808.0TrueSeriousrisk2
568Santiago-33.4489-70.6693Chile7112808.0TrueSeriousrisk5
+
+
+ +
+ +
+
+ +
+
+
+
In [221]:
+
+
+
final_labeled_data = final_labeled_data.sort_values(by='c40').groupby(main_index + ['description']).last().reset_index()
+final_labeled_data = final_labeled_data.drop(columns=['c40']).merge(final_labeled_data.groupby(main_index)['c40'].max().reset_index(),on=main_index)
+final_labeled_data[final_labeled_data.city=='Santiago']
+
+ +
+
+
+ +
+
+ + +
+ +
Out[221]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citylatitudelongitudecountrydescriptionpopulationriskc40
473Santiago-33.4489-70.6693Chilerisk27112808.0SeriousTrue
474Santiago-33.4489-70.6693Chilerisk37112808.0SeriousTrue
475Santiago-33.4489-70.6693Chilerisk4404495.0Extremely seriousTrue
476Santiago-33.4489-70.6693Chilerisk57112808.0SeriousTrue
+
+
+ +
+ +
+
+ +
+
+
+
In [222]:
+
+
+
final_labeled_data = final_labeled_data.sort_values(by='c40').groupby(main_index + ['description']).last().reset_index()
+
+ +
+
+
+ +
+
+
+
+

Fix population

+
+
+
+
+
+
In [223]:
+
+
+
pop_df = final_labeled_data.groupby(index)[['population']
+                                          ].max().reset_index().drop_duplicates()
+final_labeled_data = final_labeled_data.drop(columns=['population']).merge(pop_df.reset_index(), on=index)
+
+ +
+
+
+ +
+
+
+
In [224]:
+
+
+
final_labeled_data = final_labeled_data.pivot(index= index + ['c40','population'], columns='description', values='risk').reset_index()
+
+ +
+
+
+ +
+
+
+
+

Encode risks from 0 to 1

+
+
+
+
+
+
In [225]:
+
+
+
severity_mapping = {'Less Serious':1, "Serious":2, 'Extremely serious':3}
+pd.DataFrame([severity_mapping]).to_csv(SEVERITY_MAPPING_PATH,index=False)
+nrisks = list(risks_description.values())
+final_labeled_data[nrisks] = final_labeled_data[nrisks].replace(severity_mapping)
+
+ +
+
+
+ +
+
+
+
+

Add 0 to risks belonging to C40, assuming that those have reported no risks because indeed they were none, not because they were unknown

+
+
+
+
+
+
In [226]:
+
+
+
final_labeled_data.loc[final_labeled_data['c40'], nrisks] = final_labeled_data.loc[final_labeled_data['c40'], nrisks].fillna(0)
+
+ +
+
+
+ +
+
+
+
In [227]:
+
+
+
final_labeled_data.to_csv(LABELED_CITIES_PATH,index=False)
+risks_df.to_csv(RISKS_MAPPING_PATH,index=False)
+
+ +
+
+
+ +
+
+
+
In [228]:
+
+
+
final_labeled_data.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[228]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
descriptioncitylatitudelongitudecountryc40populationrisk0risk1risk2risk3risk4risk5risk6
0Abasan Al-Kabira31.3231034.34400State of PalestineFalse32000.0NaN2.02.02.0NaNNaNNaN
1Abington40.12410-75.11950United States of AmericaFalse55310.0NaN1.01.02.0NaNNaNNaN
2Abuja9.076477.39857NigeriaFalse2440000.02.02.02.0NaNNaNNaNNaN
3Addis Ababa9.0288738.75440EthiopiaTrue3103374.00.02.02.00.02.00.00.0
4Adelaide-34.92850138.60100AustraliaFalse23916.02.02.02.0NaNNaN2.0NaN
+
+
+ +
+ +
+
+ +
+
+
+
+

See a report of the filled labels

+
+
+
+
+
+
In [229]:
+
+
+
cw_lab_freqs = cw_data[cols_to_encode].groupby(cols_to_encode,as_index=False).size()
+imp_cw_lab_freqs = cw_data_imputed[cols_to_encode].groupby(cols_to_encode,as_index=False).size()
+imp_cw_ret = pd.merge(cw_lab_freqs, imp_cw_lab_freqs,suffixes=('','_imp'), how='right',on=cols_to_encode)
+imp_cw_ret['increase'] = (imp_cw_ret['size_imp'] - imp_cw_ret['size'])/imp_cw_ret['size']
+imp_cw_ret
+
+ +
+
+
+ +
+
+ + +
+ +
Out[229]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
risks_to_citys_water_supplymagnitudesizesize_impincrease
0Declining water qualityExtremely serious23240.043478
1Declining water qualityLess Serious18180.000000
2Declining water qualitySerious57890.561404
3Energy supply issuesExtremely serious110.000000
4Energy supply issuesLess Serious110.000000
5Energy supply issuesSerious550.000000
6Higher water pricesExtremely serious220.000000
7Higher water pricesLess Serious10110.100000
8Higher water pricesSerious14250.785714
9Inadequate or aging infrastructureExtremely serious340.333333
10Inadequate or aging infrastructureLess Serious880.000000
11Inadequate or aging infrastructureSerious11987.909091
12Increased water demandExtremely serious770.000000
13Increased water demandLess Serious12120.000000
14Increased water demandSerious30320.066667
15Increased water stress or scarcityExtremely serious39420.076923
16Increased water stress or scarcityLess Serious2120-0.047619
17Increased water stress or scarcitySerious941590.691489
18RegulatoryLess Serious440.000000
19RegulatorySerious8130.625000
+
+
+ +
+ +
+
+ +
+
+
+
In [230]:
+
+
+
imp_report = pd.merge(imp_cw_ret, imp_ret, suffixes=('','_c40'), on=('magnitude','risks_to_citys_water_supply'))
+imp_report.to_csv(IMPUTATION_REPORT_PATH,index=False)
+imp_report
+
+ +
+
+
+ +
+
+ + +
+ +
Out[230]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
risks_to_citys_water_supplymagnitudesizesize_impincreasesize_c40size_imp_c40increase_c40
0Declining water qualityExtremely serious23240.043478NaN1NaN
1Declining water qualityLess Serious18180.0000002.020.000000
2Declining water qualitySerious57890.5614049.0120.333333
3Energy supply issuesExtremely serious110.0000001.010.000000
4Energy supply issuesSerious550.0000001.010.000000
5Higher water pricesExtremely serious220.0000001.010.000000
6Higher water pricesLess Serious10110.1000001.010.000000
7Higher water pricesSerious14250.7857141.043.000000
8Inadequate or aging infrastructureLess Serious880.0000001.010.000000
9Inadequate or aging infrastructureSerious11987.9090911.01716.000000
10Increased water demandExtremely serious770.0000001.010.000000
11Increased water demandLess Serious12120.0000001.010.000000
12Increased water demandSerious30320.0666676.060.000000
13Increased water stress or scarcityExtremely serious39420.0769238.090.125000
14Increased water stress or scarcityLess Serious2120-0.0476193.030.000000
15Increased water stress or scarcitySerious941590.69148921.0310.476190
16RegulatoryLess Serious440.0000001.010.000000
17RegulatorySerious8130.6250003.040.333333
+
+
+ +
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/Merge Unlabeled to Labeled.html b/documentation/WaterSecurity/notebooks/Merge Unlabeled to Labeled.html new file mode 100644 index 0000000..7062014 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/Merge Unlabeled to Labeled.html @@ -0,0 +1,15192 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
import sys
+sys.path.append('..')
+import numpy as np
+import pandas as pd
+import country_converter as coco
+from haversine import haversine
+
+ +
+
+
+ +
+
+
+
+

Use world_cities csv as the unlabeled dataset, as it is more complete

From there population will be filled

+ +
+
+
+
+
+
In [2]:
+
+
+
from data.unlabeled import WORLD_CITIES
+from data.labeled.preprocessed import LABELED_CITIES
+WORLD_CITIES.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[2]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
citycity_asciilatlngcountryiso2iso3admin_namecapitalpopulationid
0TokyoTokyo35.6897139.6922JapanJPJPNTōkyōprimary37977000.01392685764
1JakartaJakarta-6.2146106.8451IndonesiaIDIDNJakartaprimary34540000.01360771077
2DelhiDelhi28.660077.2300IndiaININDDelhiadmin29617000.01356872604
3MumbaiMumbai18.966772.8333IndiaININDMahārāshtraadmin23355000.01356226629
4ManilaManila14.5958120.9772PhilippinesPHPHLManilaprimary23088000.01608618140
+
+
+ +
+ +
+
+ +
+
+
+
+

Create the 3 letter symbol code mapping for all the countries existing inside

+
+
+
+
+
+
In [3]:
+
+
+
code_dict = {x: coco.convert(x) for x in set(WORLD_CITIES['country'].unique()).union(LABELED_CITIES['country'].unique())}
+
+ +
+
+
+ +
+
+
+
+

Add the corresponding column to both datasets

+
+
+
+
+
+
In [4]:
+
+
+
WORLD_CITIES['country_code'] = WORLD_CITIES['country'].apply(lambda x:  code_dict[x])
+LABELED_CITIES['country_code']= LABELED_CITIES['country'].apply(lambda x:  code_dict[x])
+
+ +
+
+
+ +
+
+
+
+

Merge those datasets together using the following steps:

    +
  1. Create a dictionary that contains the distances between cities that belong to the same country (with the same code). For every city whose name exists as is in both datasets, set the distance 0
  2. +
  3. Compute the minimum distance and its location per city, that resides in the labeled cities
  4. +
  5. If this distance is less than a threshold, that has been set to 10 kilometers, assume that the cities are the same. + Use this mapping to fill the unlabeled cities with the labeled risks. +If not, expand the unlabeled dataset with the city
  6. +
  7. Drop any row that has no recorded population.
  8. +
+ +
+
+
+
+
+
In [5]:
+
+
+
mapping_dists = {x['city']:{y['city']: (haversine((x['latitude'],x['longitude']),(y['lat'],y['lng'])) if x['city']!=y['city'] else 0)
+                            for (_, y) in WORLD_CITIES[WORLD_CITIES['country_code']==x['country_code']].iterrows()} for (_, x) in LABELED_CITIES.iterrows()}
+
+ +
+
+
+ +
+
+
+
In [6]:
+
+
+
WORLD_CITIES.set_index('city', inplace=True)
+LABELED_CITIES.set_index('city', inplace=True)
+
+ +
+
+
+ +
+
+
+
+

Fix some cases where the latitude and the longitude are in the wrong place

+
+
+
+
+
+
In [7]:
+
+
+
mapping_dists_fixed = {
+    x:(
+        {y: val for y,val in d.items()} if min(d.values()) < 2000 else 
+        {y: haversine((LABELED_CITIES.loc[[x], 'longitude'].values[0],LABELED_CITIES.loc[[x], 'latitude'].values[0]),
+                      (WORLD_CITIES.loc[[y], 'lat'].values[0],WORLD_CITIES.loc[[y], 'lng'].values[0]))
+        for  y,val in d.items()}) for x,d in mapping_dists.items()}
+
+ +
+
+
+ +
+
+
+
In [8]:
+
+
+
mapping_df = pd.DataFrame(mapping_dists_fixed).T
+mapping_df.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[8]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Al QudsNew YorkLos AngelesChicagoMiamiDallasPhiladelphiaHoustonAtlantaWashington...WumKribiBatouriKalfouBélaboEsékaAbong MbangKontchaAiyomojokMbé
Abasan Al-Kabira97.672711NaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AbingtonNaN119.3720963857.1727361070.9452491752.117379154.87331492.555842167.7390611865.0512821400.845831...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AbujaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
Addis AbabaNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AdelaideNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
+

5 rows × 17304 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [9]:
+
+
+
closest_match_df = pd.DataFrame(mapping_df.fillna(100000000).apply(lambda x: {'match':mapping_df.columns[np.argmin(x)], 'distance':np.min(x)}, axis=1).tolist())
+
+ +
+
+
+ +
+
+
+
In [10]:
+
+
+
closest_match_df.index = mapping_df.index
+closest_match_df
+
+ +
+
+
+ +
+
+ + +
+ +
Out[10]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
matchdistance
Abasan Al-KabiraAl Quds97.672711
AbingtonAbington0.000000
AbujaAbuja0.000000
Addis AbabaAddis Ababa0.000000
AdelaideAdelaide0.000000
.........
XalapaXalapa0.000000
Yaoundé 6Yaoundé0.459487
YokohamaYokohama0.000000
ÍlhavoEstremoz1.115491
ÖrebroÖrebro0.985037
+

282 rows × 2 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [11]:
+
+
+
dist_thres = 10 # the distance threshold for a city to be considered mapped correctly, in km
+
+ +
+
+
+ +
+
+
+
In [12]:
+
+
+
unmapped = closest_match_df[closest_match_df['distance']>dist_thres]
+
+ +
+
+
+ +
+
+
+
In [13]:
+
+
+
to_add_whole = LABELED_CITIES.loc[unmapped.index].copy() # add those rows as a whole to the augmented dataset
+to_add_labels = LABELED_CITIES.loc[closest_match_df[closest_match_df['distance']<=dist_thres].index].copy() # only add the labels from these rows
+to_add_labels.index = closest_match_df.loc[closest_match_df['distance']<=dist_thres,'match']
+
+ +
+
+
+ +
+
+
+
+

Creating an augmented dataset with all the cities, annotated or not

+ +
+
+
+
+
+
In [14]:
+
+
+
from data.labeled.preprocessed import RISKS_MAPPING
+risks_cols = list(RISKS_MAPPING.keys())
+
+ +
+
+
+ +
+
+
+
In [15]:
+
+
+
augmented_cities_dataset = WORLD_CITIES[['lat','lng','country','population','country_code']].rename(columns={'lat':'latitude','lng':'longitude'}).copy()
+augmented_cities_dataset = augmented_cities_dataset.merge(to_add_labels[['c40'] + risks_cols],left_index=True, right_index=True,how='left')
+
+ +
+
+
+ +
+
+
+
In [16]:
+
+
+
augmented_cities_dataset = pd.concat([augmented_cities_dataset, to_add_whole],axis=0)
+augmented_cities_dataset['c40']= augmented_cities_dataset['c40'].fillna(False)
+
+ +
+
+
+ +
+
+
+
In [17]:
+
+
+
augmented_cities_dataset = augmented_cities_dataset[~pd.isna(augmented_cities_dataset['population'])]
+
+ +
+
+
+ +
+
+
+
In [18]:
+
+
+
augmented_cities_dataset
+
+ +
+
+
+ +
+
+ + +
+ +
Out[18]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
latitudelongitudecountrypopulationcountry_codec40risk0risk1risk2risk3risk4risk5risk6
A Coruña43.3713-8.4188Spain370610.000ESPFalseNaNNaNNaNNaNNaNNaNNaN
Aachen50.77626.0838Germany247380.000DEUFalseNaNNaNNaNNaNNaNNaNNaN
Aalborg57.03379.9166Denmark122219.000DNKFalseNaNNaNNaNNaNNaNNaNNaN
Aalen48.837210.0936Germany68456.000DEUFalseNaNNaNNaNNaNNaNNaNNaN
Aalst50.93334.0333Belgium85715.000BELFalseNaNNaNNaNNaNNaNNaNNaN
..........................................
Sekhukhune District Municipality-24.833529.9740South Africa1076840.000ZAFFalseNaNNaNNaN2.0NaNNaNNaN
Sopó4.9075-73.9384Colombia27932.000COLFalseNaNNaNNaNNaN1.0NaNNaN
Tangará da Serra-14.6194-57.4858Brazil83.431BRAFalseNaNNaN2.0NaNNaNNaNNaN
Tuzla44.537518.6735Turkey235000.000TURFalseNaNNaN2.0NaNNaNNaNNaN
Vhembe-22.769529.9740South Africa1441178.000ZAFFalseNaNNaN3.03.0NaNNaNNaN
+

25633 rows × 13 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Fix population for specific cities with a float one

which probably is cause by used "."

+ +
+
+
+
+
+
In [19]:
+
+
+
augmented_cities_dataset.population = augmented_cities_dataset.population.apply(lambda x: x * 1000 if int(x)!=x else x)
+
+ +
+
+
+ +
+
+
+
+

Filter out cities that are unlabeled and are not big enough, consulting BIG CITIES ALL COUNTRIES csv

+
+
+
+
+
+
In [20]:
+
+
+
from data.unlabeled import BIG_CITIES_ALL_COUNTRIES
+
+ +
+
+
+ +
+
+
+
In [21]:
+
+
+
unlab_mask = augmented_cities_dataset[risks_cols].isnull().all(axis=1)
+not_in_big_cities_mask = augmented_cities_dataset.index.to_series().apply(lambda x: x not in BIG_CITIES_ALL_COUNTRIES.city.values)
+augmented_cities_dataset = augmented_cities_dataset[~(unlab_mask&not_in_big_cities_mask)]
+
+ +
+
+
+ +
+
+
+
In [22]:
+
+
+
augmented_cities_dataset
+
+ +
+
+
+ +
+
+ + +
+ +
Out[22]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
latitudelongitudecountrypopulationcountry_codec40risk0risk1risk2risk3risk4risk5risk6
Aalborg57.03379.9166Denmark122219.0DNKFalseNaNNaNNaNNaNNaNNaNNaN
Aarhus56.157210.2107Denmark237551.0DNKFalseNaNNaNNaNNaNNaNNaNNaN
Aasiaat68.7097-52.8694Greenland3134.0GRLFalseNaNNaNNaNNaNNaNNaNNaN
Abidjan5.3364-4.0267Côte D’Ivoire4980000.0CIVFalseNaNNaNNaNNaN1.0NaNNaN
Abington40.1108-75.1146United States55573.0USAFalseNaN1.01.02.0NaNNaNNaN
..........................................
Sekhukhune District Municipality-24.833529.9740South Africa1076840.0ZAFFalseNaNNaNNaN2.0NaNNaNNaN
Sopó4.9075-73.9384Colombia27932.0COLFalseNaNNaNNaNNaN1.0NaNNaN
Tangará da Serra-14.6194-57.4858Brazil83431.0BRAFalseNaNNaN2.0NaNNaNNaNNaN
Tuzla44.537518.6735Turkey235000.0TURFalseNaNNaN2.0NaNNaNNaNNaN
Vhembe-22.769529.9740South Africa1441178.0ZAFFalseNaNNaN3.03.0NaNNaNNaN
+

1437 rows × 13 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Load and merge countries features

+
+
+
+
+
+
In [23]:
+
+
+
from data.unlabeled import COUNTRIES_DATASET
+
+ +
+
+
+ +
+
+
+
In [24]:
+
+
+
set(augmented_cities_dataset.columns).intersection(COUNTRIES_DATASET.columns)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[24]:
+ + + + +
+
set()
+
+ +
+ +
+
+ +
+
+
+
In [25]:
+
+
+
augmented_cities_dataset = augmented_cities_dataset.merge(COUNTRIES_DATASET, left_on='country_code', right_index=True)
+
+ +
+
+
+ +
+
+
+
In [26]:
+
+
+
all(augmented_cities_dataset.Country == augmented_cities_dataset.country)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[26]:
+ + + + +
+
False
+
+ +
+ +
+
+ +
+
+
+
In [27]:
+
+
+
augmented_cities_dataset.drop(columns='country',inplace=True)
+augmented_cities_dataset.rename(columns={'Country':'country'},inplace=True)
+
+ +
+
+
+ +
+
+
+
In [28]:
+
+
+
augmented_cities_dataset= augmented_cities_dataset.reset_index().rename(columns={'index':'city'}).drop_duplicates(subset=['city','country'])
+
+ +
+
+
+ +
+
+
+
In [29]:
+
+
+
augmented_cities_dataset
+
+ +
+
+
+ +
+
+ + +
+ +
Out[29]:
+ + + +
+

citylatitudelongitudepopulationcountry_codec40risk0risk1risk2risk3...SDG 6.4.1. Services Water Use EfficiencySDG 6.4.1. Water Use EfficiencySDG 6.4.2. Water StressSeasonal variability (WRI)Total internal renewable water resources per capitaTotal population with access to safe drinking-water (JMP)Total renewable water resources per capitaTotal water withdrawal per capitaUrban population with access to safe drinking-water (JMP)country
0Aalborg57.03379.9166122219.0DNKFalseNaNNaNNaNNaN...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
1Aarhus56.157210.2107237551.0DNKFalseNaNNaNNaNNaN...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
2Copenhagen55.678612.56351085000.0DNKFalseNaN2.0NaN2.0...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
3Esbjerg55.46708.450072205.0DNKFalseNaNNaNNaNNaN...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
4Frederikshavn57.433710.533324103.0DNKFalseNaN2.0NaNNaN...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
..................................................................
1380Teaoraereke1.3333173.00005105.0KIRFalseNaNNaNNaNNaN...110.28810480.30283168.5059252.532741-1779.60943466.90.000000414.12703887.3Kiribati
1381Trindade0.30006.66676636.0STPFalseNaNNaNNaNNaN...17.1622547.4896411.8761472.50289110526.87491997.110526.874919197.49962698.9Sao Tome and Principe
1382Victoria-4.623655.454426450.0SYCFalseNaNNaNNaNNaN...120.47102389.32303569.2432271.601611-2157.59656695.70.000000142.08965195.7Seychelles
1383Warsaw52.216721.03331790658.0POLTrue0.00.02.00.0...153.69155043.57022234.8909660.6000001412.26637798.31594.069324265.61674199.3Poland
1384Abasan Al-Kabira31.323134.344032000.0PSEFalseNaN2.02.02.0...51.88716334.74897041.0783063.100000171.04722458.4176.31345679.03561450.7Palestine
+

1211 rows × 157 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [30]:
+
+
+
from data.dataset import DATASET_PATH
+augmented_cities_dataset.to_csv(DATASET_PATH,index=False)
+
+ +
+
+
+ +
+
+
+
In [31]:
+
+
+
augmented_cities_dataset
+
+ +
+
+
+ +
+
+ + +
+ +
Out[31]:
+ + + +
+

citylatitudelongitudepopulationcountry_codec40risk0risk1risk2risk3...SDG 6.4.1. Services Water Use EfficiencySDG 6.4.1. Water Use EfficiencySDG 6.4.2. Water StressSeasonal variability (WRI)Total internal renewable water resources per capitaTotal population with access to safe drinking-water (JMP)Total renewable water resources per capitaTotal water withdrawal per capitaUrban population with access to safe drinking-water (JMP)country
0Aalborg57.03379.9166122219.0DNKFalseNaNNaNNaNNaN...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
1Aarhus56.157210.2107237551.0DNKFalseNaNNaNNaNNaN...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
2Copenhagen55.678612.56351085000.0DNKFalseNaN2.0NaN2.0...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
3Esbjerg55.46708.450072205.0DNKFalseNaNNaNNaNNaN...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
4Frederikshavn57.433710.533324103.0DNKFalseNaN2.0NaNNaN...558.335628368.61290220.0405621.3000001046.705025100.01046.705025129.285516100.0Denmark
..................................................................
1380Teaoraereke1.3333173.00005105.0KIRFalseNaNNaNNaNNaN...110.28810480.30283168.5059252.532741-1779.60943466.90.000000414.12703887.3Kiribati
1381Trindade0.30006.66676636.0STPFalseNaNNaNNaNNaN...17.1622547.4896411.8761472.50289110526.87491997.110526.874919197.49962698.9Sao Tome and Principe
1382Victoria-4.623655.454426450.0SYCFalseNaNNaNNaNNaN...120.47102389.32303569.2432271.601611-2157.59656695.70.000000142.08965195.7Seychelles
1383Warsaw52.216721.03331790658.0POLTrue0.00.02.00.0...153.69155043.57022234.8909660.6000001412.26637798.31594.069324265.61674199.3Poland
1384Abasan Al-Kabira31.323134.344032000.0PSEFalseNaN2.02.02.0...51.88716334.74897041.0783063.100000171.04722458.4176.31345679.03561450.7Palestine
+

1211 rows × 157 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [32]:
+
+
+
(~(augmented_cities_dataset[risks_cols].isnull()).all(axis=1)).sum() # the labeled samples
+
+ +
+
+
+ +
+
+ + +
+ +
Out[32]:
+ + + + +
+
324
+
+ +
+ +
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/app.html b/documentation/WaterSecurity/notebooks/app.html new file mode 100644 index 0000000..874fe03 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/app.html @@ -0,0 +1,13269 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
from ipyleaflet import Map, basemaps, CircleMarker
+from ipywidgets import HTML
+import pandas as pd
+from data.labeled.preprocessed import LABELED_CITIES as cities, RISKS_MAPPING as risks
+import numpy as np
+from ipyleaflet import Icon, Marker, MarkerCluster, Popup
+
+# First is latitude and second is longitude; both in degrees
+
+
+m = Map(interpolation='nearest', basemap=basemaps.Stamen.Terrain,world_copy_jump=True)
+
+markers = ()
+
+
+popup = Popup(
+            name='popup',
+            close_button=False,
+            auto_close=False,
+            close_on_escape_key=False,auto_pan=False
+        )
+def mouseover_callback(marker,location, html):
+    def callback(*args, **kwargs):
+        popup.child = html
+        popup.location = location
+        m.add_layer(popup)
+    return callback
+
+def mouseout_callback():
+    def callback(*args, **kwargs):
+        m.remove_layer(popup)
+    return callback
+
+
+
+def populate_per_city(city, country, risks_dict):
+    html =  f"""
+    <p> <h4><b> {city}, {country} </b></h4>
+    
+    """
+    
+    for risk in risks:
+        if risk not in risks_dict:
+            continue
+        risk_value = risks_dict[risk]
+        if not pd.isnull(risk_value) and risk_value !=0:
+            html += f"""
+      <h4><b>{risks[risk]}</b>:{risk_value}</h4>
+   
+    """
+    html += '</p>'
+    return HTML(html)
+    
+
+for _,city in cities.iterrows():
+    location=(city['latitude'],city['longitude'])
+    html = populate_per_city(city.city, city.country, city)
+    marker = CircleMarker(location=location,fill_color = "blue",color='blue',radius=2)
+    marker.on_mouseover(mouseover_callback(marker,location, html))
+    marker.on_mouseout(mouseout_callback())
+    markers = markers + (marker,)
+    
+from utils.geo import  is_close
+
+ +
+
+
+ +
+
+
+
In [2]:
+
+
+
from classification.model_handler import ModelHandler, InvalidCoordinates
+from data.labeled.preprocessed import RISKS_MAPPING
+
+handler = ModelHandler()
+
+if not handler.is_fitted:
+    handler.train()
+    
+risks_ids = sorted(RISKS_MAPPING)
+
+
+manually_added_markers = []
+def handle_click(**kwargs):
+    if kwargs.get('type') == 'click':
+        coords = kwargs.get('coordinates')
+        try:
+            output,mask = handler.test(coords[0], coords[1])
+        except InvalidCoordinates:
+            return
+        html = populate_per_city("Close To:" + output['city'], output['country'],{k:v for k,v in output.items() if k in risks_ids})
+        marker = CircleMarker(location=coords,fill_color = "red",color='red',radius=2)
+        check = [is_close(marker.location,mark.location) for mark in manually_added_markers]
+        if not any(check):
+            
+            marker.on_mouseover(mouseover_callback(marker,coords, html))
+            marker.on_mouseout(mouseout_callback())
+            manually_added_markers.append(marker)
+            m.add_layer(marker)
+        else:
+            to_rem = [cnt for cnt,(m,c) in enumerate(zip(manually_added_markers,check)) if c][0]
+            m.remove_layer(manually_added_markers[to_rem])
+            manually_added_markers.pop(to_rem)
+            m.remove_layer(popup)
+
+m.on_interaction(handle_click)
+m.add_layer(MarkerCluster(markers = markers))
+
+ +
+
+
+ +
+
+
+
In [3]:
+
+
+
m.layout.width = '80%'
+m.layout.height = '1000px'
+m.zoom=2.2
+m.center = (0, 0)
+display(m)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + + + + +
+
+ + +
+ +
+ +
+ +
+ + +
+
ROU
+
+
+
+ +
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/combine_unlabeled.html b/documentation/WaterSecurity/notebooks/combine_unlabeled.html new file mode 100644 index 0000000..4965a90 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/combine_unlabeled.html @@ -0,0 +1,13712 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [13]:
+
+
+
import pandas as pd
+import sys
+sys.path.append("..")
+from data.unlabeled.preprocessed import econ, aqua, edu, humdev
+
+ +
+
+
+ +
+
+
+
+

Find out the differences in country representation

+
+
+
+
+
+
In [14]:
+
+
+
print("Aqua shape:",aqua.shape,"Econ shape:",econ.shape,"Edu shape:",edu.shape,"Humdev shape:",humdev.shape)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Aqua shape: (200, 23) Econ shape: (149, 33) Edu shape: (241, 85) Humdev shape: (195, 67)
+
+
+
+ +
+
+ +
+
+
+
In [15]:
+
+
+
countries_diff_edu_humdev= edu.loc[set(edu.index) - set(humdev.index)]['Short Name']
+countries_diff_aqua_humd = aqua.loc[set(aqua.index) - set(humdev.index)]['Country']
+countries_not_in_humdev = countries_diff_edu_humdev.append(countries_diff_aqua_humd)
+
+ +
+
+
+ +
+
+
+
In [16]:
+
+
+
print("Extra not needed indicators in edu dataset",countries_not_in_humdev)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Extra not needed indicators in edu dataset NAC                                     North America
+SSA              Sub-Saharan Africa (developing only)
+LIC                                        Low income
+ABW                                             Aruba
+ASM                                    American Samoa
+HIC                                       High income
+EAS           East Asia & Pacific (all income levels)
+VGB                                               NaN
+PRI                                       Puerto Rico
+ECA           Europe & Central Asia (developing only)
+LMY                               Low & middle income
+FRO                                    Faeroe Islands
+CYM                                    Cayman Islands
+UMC                               Upper middle income
+TCA                          Turks and Caicos Islands
+LDC      Least developed countries: UN classification
+OED                                      OECD members
+WLD                                             World
+CHI                                   Channel Islands
+ECS         Europe & Central Asia (all income levels)
+EAP             East Asia & Pacific (developing only)
+LMC                               Lower middle income
+VIR                                    Virgin Islands
+CUW                                           Curaçao
+GUM                                              Guam
+SAS                                        South Asia
+PYF                                  French Polynesia
+HPC            Heavily indebted poor countries (HIPC)
+LAC       Latin America & Caribbean (developing only)
+SXM                         Sint Maarten (Dutch part)
+MIC                                     Middle income
+GIB                                         Gibraltar
+LCN     Latin America & Caribbean (all income levels)
+XKX                                            Kosovo
+BMU                                           Bermuda
+ARB                                        Arab World
+SSF            Sub-Saharan Africa (all income levels)
+EMU                                         Euro area
+EUU                                    European Union
+GRL                                         Greenland
+MNP                          Northern Mariana Islands
+MNA      Middle East & North Africa (developing only)
+IMN                                       Isle of Man
+MEA    Middle East & North Africa (all income levels)
+NCL                                     New Caledonia
+MAC                                  Macao SAR, China
+NIU                                              Niue
+VAT                                          Holy See
+PRI                                       Puerto Rico
+FRO                                     Faroe Islands
+COK                                      Cook Islands
+TKL                                           Tokelau
+dtype: object
+
+
+
+ +
+
+ +
+
+
+
In [17]:
+
+
+
countries_diff_humdev_econ = set(humdev.index) - set(econ.index) 
+print("Not in humdev",countries_diff_humdev_econ)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Not in humdev {'LKA', 'FJI', 'PLW', 'LCA', 'JAM', 'SSD', 'SMR', 'GNQ', 'TLS', 'CUB', 'LIE', 'HKG', 'GRD', 'SWZ', 'LUX', 'VUT', 'ATG', 'WSM', 'FSM', 'KNA', 'MHL', 'MCO', 'BRB', 'HTI', 'DOM', 'MDA', 'PNG', 'COM', 'SLB', 'NRU', 'TTO', 'BHS', 'VCT', 'DJI', 'BWA', 'TUV', 'MUS', 'CPV', 'SYC', 'KIR', 'MDV', 'LSO', 'STP', 'DMA', 'TON', 'NAM', 'PSE'}
+
+
+
+ +
+
+ +
+
+
+
+

Econ contains to little information and is not used in the final dataset

+
+
+
+
+
+
In [18]:
+
+
+
big_table = humdev.join(edu, how="inner").join(aqua, how="inner")
+
+ +
+
+
+ +
+
+
+
In [19]:
+
+
+
big_table.dropna(axis=1, inplace=True)
+
+ +
+
+
+ +
+
+
+
In [20]:
+
+
+
name_columns = set(big_table.columns) - set(big_table.select_dtypes(include="number").columns)
+print("Info columns",name_columns)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Info columns {'Table Name', 'Country', 'Short Name', 'Long Name'}
+
+
+
+ +
+
+ +
+
+
+
+

Not needed labels are removed

+
+
+
+
+
+
In [21]:
+
+
+
big_table.drop(['Short Name','Long Name','Table Name'],inplace=True, axis=1)
+
+ +
+
+
+ +
+
+
+
In [22]:
+
+
+
big_table
+
+ +
+
+
+ +
+
+ + +
+ +
Out[22]:
+ + + +
+

Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...SDG 6.4.1. Services Water Use EfficiencySDG 6.4.1. Water Use EfficiencySDG 6.4.2. Water StressSeasonal variability (WRI)Total internal renewable water resources per capitaTotal population with access to safe drinking-water (JMP)Total renewable water resources per capitaTotal water withdrawal per capitaUrban population with access to safe drinking-water (JMP)Country
AFG26.08013.22036.9201.948006.00600027.24468.95700079.72600038.04225.8...57.1486220.92377854.7570192.5000001299.03717255.31799.917253561.29701878.2Afghanistan
AGO30.23223.13338.0564.023006.35900030.000150.52600065.99500031.82566.2...167.030879142.4678361.8718833.1000004963.65031749.04977.06558823.67124675.4Angola
ALB93.17493.70092.4979.7020010.61400029.50819.64200052.8520002.88161.2...21.8522396.6569077.1394232.4000009326.77662195.110470.953679492.27351194.9Albania
AND72.32771.48473.32710.4390010.56400046.42918.2663344.4610350.07788.0...146.63270986.30042669.0338091.6000004098.648070100.04098.648070422.680401100.0Andorra
ARG57.15859.16154.82811.1230010.72900039.87762.78200021.80500044.78192.0...65.05495613.61656410.4566641.8000006645.85815199.119943.036802859.86479899.0Argentina
..................................................................
WSM74.94279.12771.58311.1628710.41524910.00023.88600029.9830000.19718.1...157.70594689.40181169.0281702.092752-2094.18671599.00.000000413.38130697.5Samoa
YEM28.02019.92036.9182.880005.1460000.97160.35200045.62700029.16237.3...47.0234115.219357169.7619052.40000075.44507554.975.445075128.07699672.0Yemen
ZAF75.47874.97778.20710.0310010.29100045.33367.90800010.29800058.55866.9...53.51851414.65909762.0557162.100000785.83041193.2900.723027339.94181699.6South Africa
ZMB44.44038.48854.0686.283008.17600017.964120.11200078.13400017.86144.1...43.21736612.7648942.8354984.4000004758.62751965.46218.25640993.27384685.6Zambia
ZWE64.93559.79270.7838.066008.92300034.57186.13500064.73900014.64532.2...27.1944885.21332931.3462263.700000861.16097376.91404.830298234.54344297.0Zimbabwe
+

194 rows × 144 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [23]:
+
+
+
big_table.to_csv("../data/unlabeled/preprocessed_countries_dataset.csv")
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/prep_aquastat-checkpoint.html b/documentation/WaterSecurity/notebooks/prep_aquastat-checkpoint.html new file mode 100644 index 0000000..aea5b84 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/prep_aquastat-checkpoint.html @@ -0,0 +1,15521 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
import pandas as pd
+import sklearn
+from helpers import *
+import matplotlib.pyplot as plt
+import sys
+sys.path.append("..")
+from data.unlabeled import aquastat_eah, aquastat_wr, aquastat_wu, aquastat_cc
+import seaborn as sns
+import os
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
..\data\unlabeled\__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)
+..\data\unlabeled\__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)
+..\data\unlabeled\__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)
+
+
+
+ +
+
+ +
+
+
+
In [2]:
+
+
+
aquastat_wr
+
+ +
+
+
+ +
+
+ + +
+ +
Out[2]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
0Afghanistan4National Rainfall Index (NRI)44722017185.000000I0
1Afghanistan4Total internal renewable water resources per c...415820171299.037172E0
2Afghanistan4Dependency ratio4192201728.722600E0
3Afghanistan4Total renewable water resources per capita419020171799.917253E0
4Afghanistan4Interannual variability (WRI)454120172.500000I0
...........................
1224Zimbabwe716Dependency ratio4192201738.700000E0
1225Zimbabwe716Total renewable water resources per capita419020171404.830298E0
1226Zimbabwe716Interannual variability (WRI)454120173.100000I0
1227Zimbabwe716Seasonal variability (WRI)454220173.700000I0
1228Zimbabwe716Dam capacity per capita447120177019.230792E0
+

1229 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
aquastat_wu
+
+ +
+
+
+ +
+
+ + +
+ +
Out[3]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
0Afghanistan4Agricultural water withdrawal as % of total wa...4254201798.169627E0
1Afghanistan4Industrial water withdrawal as % of total wate...425620170.831988E0
2Afghanistan4Municipal water withdrawal as % of total withd...425520170.998385E0
3Afghanistan4Total water withdrawal per capita42572017561.297018E0
4Afghanistan4MDG 7.5. Freshwater withdrawal as % of total r...4275201731.045462I0
...........................
1946Zimbabwe716SDG 6.4.2. Water Stress4550201731.346226E0
1947Zimbabwe716SDG 6.4.1. Water Use Efficiency455120175.213329I0
1948Zimbabwe716SDG 6.4.1. Irrigated Agriculture Water Use Eff...455220170.050244E0
1949Zimbabwe716SDG 6.4.1. Industrial Water Use Efficiency4553201749.212523E0
1950Zimbabwe716SDG 6.4.1. Services Water Use Efficiency4554201727.194488E0
+

1951 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [4]:
+
+
+
aquastat_eah
+
+ +
+
+
+ +
+
+ + +
+ +
Out[4]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
0Afghanistan4Total population4104201736296.113X0
1Afghanistan4Rural population4105201726558.609X0
2Afghanistan4Urban population410620178971.472X0
3Afghanistan4Flood occurrence (WRI)454320173.700I0
4Afghanistan4Total population with access to safe drinking-...4114201755.300I0
...........................
1440Zimbabwe716Urban population410620175328.766X0
1441Zimbabwe716Flood occurrence (WRI)454320172.800I0
1442Zimbabwe716Total population with access to safe drinking-...4114201776.900I0
1443Zimbabwe716Rural population with access to safe drinking-...4115201767.300I0
1444Zimbabwe716Urban population with access to safe drinking-...4116201797.000I0
+

1445 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [5]:
+
+
+
aquastat_cc
+
+ +
+
+
+ +
+
+ + +
+ +
Out[5]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeCountry
0AFGAfghanistan
1ALBAlbania
2DZAAlgeria
3ANDAndorra
4AGOAngola
.........
195VENVenezuela (Bolivarian Republic of)
196VNMViet Nam
197YEMYemen
198ZMBZambia
199ZWEZimbabwe
+

200 rows × 2 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

All data should be fairly similar - everything can be merged

+
+
+
+
+
+
In [6]:
+
+
+
alldf = pd.concat([aquastat_eah,aquastat_wu,aquastat_wr])
+alldf
+
+ +
+
+
+ +
+
+ + +
+ +
Out[6]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
0Afghanistan4Total population4104201736296.113000X0
1Afghanistan4Rural population4105201726558.609000X0
2Afghanistan4Urban population410620178971.472000X0
3Afghanistan4Flood occurrence (WRI)454320173.700000I0
4Afghanistan4Total population with access to safe drinking-...4114201755.300000I0
...........................
1224Zimbabwe716Dependency ratio4192201738.700000E0
1225Zimbabwe716Total renewable water resources per capita419020171404.830298E0
1226Zimbabwe716Interannual variability (WRI)454120173.100000I0
1227Zimbabwe716Seasonal variability (WRI)454220173.700000I0
1228Zimbabwe716Dam capacity per capita447120177019.230792E0
+

4625 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [7]:
+
+
+
alldf.info()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
<class 'pandas.core.frame.DataFrame'>
+Int64Index: 4625 entries, 0 to 1228
+Data columns (total 8 columns):
+ #   Column         Non-Null Count  Dtype  
+---  ------         --------------  -----  
+ 0   Area           4625 non-null   object 
+ 1   Area Id        4625 non-null   int64  
+ 2   Variable Name  4625 non-null   object 
+ 3   Variable Id    4625 non-null   int64  
+ 4   Year           4625 non-null   int64  
+ 5   Value          4622 non-null   float64
+ 6   Symbol         4603 non-null   object 
+ 7   Md             4625 non-null   int64  
+dtypes: float64(1), int64(4), object(3)
+memory usage: 325.2+ KB
+
+
+
+ +
+
+ +
+
+
+
+

These are the different symbols found in the dataset

    +
  • No symbol: Official value, data come from one national source (FAO questionnaire on water and agriculture, report, publication, official website, etc.) .
  • +
  • E: Estimate either calculated as sum or identify (yield) from official values or from an AQUASTAT estimation.
  • +
  • I: Imputed (carry forward, vertical imputation, linear interpolation)*.
  • +
  • X: External value (reported by FAOSTAT or another international agency)**.
  • +
+ +
+
+
+
+
+
In [8]:
+
+
+
alldf[alldf['Symbol'] == "I"]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[8]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
3Afghanistan4Flood occurrence (WRI)454320173.7I0
4Afghanistan4Total population with access to safe drinking-...4114201755.3I0
5Afghanistan4Rural population with access to safe drinking-...4115201747.0I0
6Afghanistan4Urban population with access to safe drinking-...4116201778.2I0
11Albania8Flood occurrence (WRI)454320172.7I0
...........................
1219Zambia894Interannual variability (WRI)454120172.1I0
1220Zambia894Seasonal variability (WRI)454220174.4I0
1222Zimbabwe716National Rainfall Index (NRI)44722017722.5I0
1226Zimbabwe716Interannual variability (WRI)454120173.1I0
1227Zimbabwe716Seasonal variability (WRI)454220173.7I0
+

1939 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Explore the three missing values

+
+
+
+
+
+
In [9]:
+
+
+
alldf[alldf['Value'].isna()]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[9]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
841Monaco492Rural population with access to safe drinking-...41152000NaNNaN0
865Morocco504Population affected by water related disease44032000NaNNaN5293
22Andorra20Dependency ratio41922007NaNNaN0
+
+
+ +
+ +
+
+ +
+
+
+
+

Create country x idicator dataframe

+
+
+
+
+
+
In [10]:
+
+
+
countryXindicator = alldf.pivot_table('Value',['Area'],'Variable Name')
+countryXindicator
+
+ +
+
+
+ +
+
+ + +
+ +
Out[10]:
+ + + +
+

Variable Name% of area equipped for irrigation salinizedAgricultural water withdrawal as % of total renewable water resourcesAgricultural water withdrawal as % of total water withdrawalDam capacity per capitaDependency ratioFlood occurrence (WRI)Industrial water withdrawal as % of total water withdrawalInterannual variability (WRI)MDG 7.5. Freshwater withdrawal as % of total renewable water resourcesMunicipal water withdrawal as % of total withdrawal...SDG 6.4.1. Water Use EfficiencySDG 6.4.2. Water StressSeasonal variability (WRI)Total internal renewable water resources per capitaTotal populationTotal population with access to safe drinking-water (JMP)Total renewable water resources per capitaTotal water withdrawal per capitaUrban populationUrban population with access to safe drinking-water (JMP)
Area
AfghanistanNaN30.61380798.16962755.35028028.7226003.70.8319882.531.0454620.998385...0.92377854.7570192.51299.03717236296.11355.31799.917253561.2970188971.47278.2
Albania3.0224722.99668963.7413721397.28289210.9271522.716.3262431.23.93377519.932385...6.6569077.1394232.49326.7766212884.16995.110470.953679492.2735111740.03294.9
AlgeriaNaN57.17836663.764099208.2488263.5998972.81.8256552.384.01474234.410247...14.354620137.9203601.9271.73762741389.18983.6281.885204252.77132229770.61084.3
AndorraNaNNaNNaNNaNNaN3.3NaN1.5NaNNaN...NaNNaN1.64098.64807077.001100.04098.648070NaN67.845100.0
AngolaNaN0.09885420.784925316.7751320.2695421.733.9472942.50.47553945.267781...142.4678361.8718833.14963.65031729816.76649.04977.06558823.67124619311.63975.4
..................................................................
Venezuela (Bolivarian Republic of)NaN1.26113273.8521105360.96542039.2452832.73.5060972.71.70725322.641793...9.5571907.5403672.327378.63916529402.48493.145064.219744769.53702328198.25195.0
Viet Nam6.5429338.79405594.782397472.67741859.3471473.83.7474090.79.2591501.470194...2.34944818.1303153.23799.33972594600.64897.69345.813361867.11879633642.49899.1
YemenNaN154.04761990.74333816.6158800.0000002.91.8232824.9169.7619057.433380...5.219357169.7619052.475.44507527834.81954.975.445075128.07699610174.61272.0
ZambiaNaN1.09923773.2824436000.70050323.4732822.58.2697202.11.50000018.447837...12.7648942.8354984.44758.62751916853.59965.46218.25640993.2738467346.42185.6
ZimbabweNaN13.85000082.9564857019.23079238.7000002.82.4377833.116.69500014.605732...5.21332931.3462263.7861.16097314236.59576.91404.830298234.5434425328.76697.0
+

200 rows × 27 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [11]:
+
+
+
countryXindicator.drop(['Urban population','Total population','Rural population'], inplace=True, axis=1)
+
+ +
+
+
+ +
+
+
+
In [12]:
+
+
+
countryXindicator.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[12]:
+ + + + +
+
(200, 24)
+
+ +
+ +
+
+ +
+
+
+
In [13]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 84.0 %
+Min: 1.5 %
+Mean: 16.208333333333332 %
+
+
+
+ +
+ +
Out[13]:
+ + + + +
+
(1.5, 84.0)
+
+ +
+ +
+
+ +
+
+
+
+

Drop columns that has to many missing values

+
+
+
+
+
+
In [14]:
+
+
+
dropColumnHalf(countryXindicator)
+
+ +
+
+
+ +
+
+
+
In [15]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 20.5 %
+Min: 1.5 %
+Mean: 10.545454545454545 %
+
+
+
+ +
+ +
Out[15]:
+ + + + +
+
(1.5, 20.5)
+
+ +
+ +
+
+ +
+
+
+
In [16]:
+
+
+
countryXindicator.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[16]:
+ + + + +
+
(200, 22)
+
+ +
+ +
+
+ +
+
+
+
+

Impute the missing values

+
+
+
+
+
+
In [17]:
+
+
+
imputed_countryXindicator = impute_df(countryXindicator, verbose=2, max_iter=20)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (200, 22)
+[IterativeImputer] Ending imputation round 1/20, elapsed time 0.04
+[IterativeImputer] Change: 17738.817690565473, scaled tolerance: 508.38384774800903 
+[IterativeImputer] Ending imputation round 2/20, elapsed time 0.08
+[IterativeImputer] Change: 240.05853395982552, scaled tolerance: 508.38384774800903 
+[IterativeImputer] Early stopping criterion reached.
+[IterativeImputer] Completing matrix with shape (200, 22)
+[IterativeImputer] Ending imputation round 1/2, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/2, elapsed time 0.01
+
+
+
+ +
+
+ +
+
+
+
+

Check the correlation for the imputed and non-imputed dataset

+
+
+
+
+
+
In [18]:
+
+
+
corr_calc = countryXindicator.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[18]:
+ + + + +
+
<AxesSubplot:xlabel='Variable Name', ylabel='Variable Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [19]:
+
+
+
corr_calc = imputed_countryXindicator.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[19]:
+ + + + +
+
<AxesSubplot:xlabel='Variable Name', ylabel='Variable Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+
In [20]:
+
+
+
merged = imputed_countryXindicator.merge(aquastat_cc, how='inner', left_on="Area", right_on="Country").set_index("Code")
+
+ +
+
+
+ +
+
+
+
In [21]:
+
+
+
merged
+
+ +
+
+
+ +
+
+ + +
+ +
Out[21]:
+ + + +
+

Agricultural water withdrawal as % of total renewable water resourcesAgricultural water withdrawal as % of total water withdrawalDam capacity per capitaDependency ratioFlood occurrence (WRI)Industrial water withdrawal as % of total water withdrawalInterannual variability (WRI)MDG 7.5. Freshwater withdrawal as % of total renewable water resourcesMunicipal water withdrawal as % of total withdrawalNational Rainfall Index (NRI)...SDG 6.4.1. Services Water Use EfficiencySDG 6.4.1. Water Use EfficiencySDG 6.4.2. Water StressSeasonal variability (WRI)Total internal renewable water resources per capitaTotal population with access to safe drinking-water (JMP)Total renewable water resources per capitaTotal water withdrawal per capitaUrban population with access to safe drinking-water (JMP)Country
Code
AFG30.61380798.16962755.35028028.7226003.70.8319882.531.0454620.998385185.000000...57.1486220.92377854.7570192.51299.03717255.31799.917253561.29701878.2Afghanistan
ALB2.99668963.7413721397.28289210.9271522.716.3262431.23.93377519.9323851136.000000...21.8522396.6569077.1394232.49326.77662195.110470.953679492.27351194.9Albania
DZA57.17836663.764099208.2488263.5998972.81.8256552.384.01474234.410247241.500000...23.06878714.354620137.9203601.9271.73762783.6281.885204252.77132284.3Algeria
AND57.77107853.096413996.17713620.8662623.318.1687561.560.12262628.7348301071.311544...146.63270986.30042669.0338091.64098.648070100.04098.648070422.680401100.0Andorra
AGO0.09885420.784925316.7751320.2695421.733.9472942.50.47553945.2677811137.000000...167.030879142.4678361.8718833.14963.65031749.04977.06558823.67124675.4Angola
..................................................................
VEN1.26113273.8521105360.96542039.2452832.73.5060972.71.70725322.6417931813.000000...25.3044729.5571907.5403672.327378.63916593.145064.219744769.53702395.0Venezuela (Bolivarian Republic of)
VNM8.79405594.782397472.67741859.3471473.83.7474090.79.2591501.4701941991.000000...83.8407912.34944818.1303153.23799.33972597.69345.813361867.11879699.1Viet Nam
YEM154.04761990.74333816.6158800.0000002.91.8232824.9169.7619057.433380232.700000...47.0234115.219357169.7619052.475.44507554.975.445075128.07699672.0Yemen
ZMB1.09923773.2824436000.70050323.4732822.58.2697202.11.50000018.4478371065.000000...43.21736612.7648942.8354984.44758.62751965.46218.25640993.27384685.6Zambia
ZWE13.85000082.9564857019.23079238.7000002.82.4377833.116.69500014.605732722.500000...27.1944885.21332931.3462263.7861.16097376.91404.830298234.54344297.0Zimbabwe
+

200 rows × 23 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [22]:
+
+
+
!dir
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
 Volume in drive C is Windows
+ Volume Serial Number is A8E6-B474
+
+ Directory of C:\Users\joach\code-projects\WaterSecurity\unlabeled_preprocessing
+
+12/05/2021  08:21    <DIR>          .
+12/05/2021  08:21    <DIR>          ..
+12/05/2021  08:11    <DIR>          .ipynb_checkpoints
+11/05/2021  15:21             1,709 helpers.py
+12/05/2021  08:21           136,834 prep_aquastat.ipynb
+11/05/2021  15:21            47,960 prep_economic_v2.ipynb
+11/05/2021  15:21           400,714 prep_edstats.ipynb
+11/05/2021  15:21         2,790,867 prep_hdro_v2.ipynb
+11/05/2021  15:21                 0 __init__.py
+11/05/2021  15:28    <DIR>          __pycache__
+               6 File(s)      3,378,084 bytes
+               4 Dir(s)  838,000,263,168 bytes free
+
+
+
+ +
+
+ +
+
+
+
In [23]:
+
+
+
merged.to_csv("../data/unlabeled/aquastat_preprocessed.csv")
+
+ +
+
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/prep_aquastat.html b/documentation/WaterSecurity/notebooks/prep_aquastat.html new file mode 100644 index 0000000..1218dcc --- /dev/null +++ b/documentation/WaterSecurity/notebooks/prep_aquastat.html @@ -0,0 +1,15522 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
import pandas as pd
+import sklearn
+from helpers import *
+import matplotlib.pyplot as plt
+import sys
+sys.path.append("..")
+from data.unlabeled.raw import aquastat_eah, aquastat_wr, aquastat_wu, aquastat_cc
+import seaborn as sns
+import os
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
..\data\unlabeled\raw\__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)
+..\data\unlabeled\raw\__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)
+..\data\unlabeled\raw\__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)
+
+
+
+ +
+
+ +
+
+
+
In [2]:
+
+
+
aquastat_wr
+
+ +
+
+
+ +
+
+ + +
+ +
Out[2]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
0Afghanistan4National Rainfall Index (NRI)44722017185.000000I0
1Afghanistan4Total internal renewable water resources per c...415820171299.037172E0
2Afghanistan4Dependency ratio4192201728.722600E0
3Afghanistan4Total renewable water resources per capita419020171799.917253E0
4Afghanistan4Interannual variability (WRI)454120172.500000I0
...........................
1224Zimbabwe716Dependency ratio4192201738.700000E0
1225Zimbabwe716Total renewable water resources per capita419020171404.830298E0
1226Zimbabwe716Interannual variability (WRI)454120173.100000I0
1227Zimbabwe716Seasonal variability (WRI)454220173.700000I0
1228Zimbabwe716Dam capacity per capita447120177019.230792E0
+

1229 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
aquastat_wu
+
+ +
+
+
+ +
+
+ + +
+ +
Out[3]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
0Afghanistan4Agricultural water withdrawal as % of total wa...4254201798.169627E0
1Afghanistan4Industrial water withdrawal as % of total wate...425620170.831988E0
2Afghanistan4Municipal water withdrawal as % of total withd...425520170.998385E0
3Afghanistan4Total water withdrawal per capita42572017561.297018E0
4Afghanistan4MDG 7.5. Freshwater withdrawal as % of total r...4275201731.045462I0
...........................
1946Zimbabwe716SDG 6.4.2. Water Stress4550201731.346226E0
1947Zimbabwe716SDG 6.4.1. Water Use Efficiency455120175.213329I0
1948Zimbabwe716SDG 6.4.1. Irrigated Agriculture Water Use Eff...455220170.050244E0
1949Zimbabwe716SDG 6.4.1. Industrial Water Use Efficiency4553201749.212523E0
1950Zimbabwe716SDG 6.4.1. Services Water Use Efficiency4554201727.194488E0
+

1951 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [4]:
+
+
+
aquastat_eah
+
+ +
+
+
+ +
+
+ + +
+ +
Out[4]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
0Afghanistan4Total population4104201736296.113X0
1Afghanistan4Rural population4105201726558.609X0
2Afghanistan4Urban population410620178971.472X0
3Afghanistan4Flood occurrence (WRI)454320173.700I0
4Afghanistan4Total population with access to safe drinking-...4114201755.300I0
...........................
1440Zimbabwe716Urban population410620175328.766X0
1441Zimbabwe716Flood occurrence (WRI)454320172.800I0
1442Zimbabwe716Total population with access to safe drinking-...4114201776.900I0
1443Zimbabwe716Rural population with access to safe drinking-...4115201767.300I0
1444Zimbabwe716Urban population with access to safe drinking-...4116201797.000I0
+

1445 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [5]:
+
+
+
aquastat_cc
+
+ +
+
+
+ +
+
+ + +
+ +
Out[5]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
CodeCountry
0AFGAfghanistan
1ALBAlbania
2DZAAlgeria
3ANDAndorra
4AGOAngola
.........
195VENVenezuela (Bolivarian Republic of)
196VNMViet Nam
197YEMYemen
198ZMBZambia
199ZWEZimbabwe
+

200 rows × 2 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

All data should be fairly similar - everything can be merged

+
+
+
+
+
+
In [6]:
+
+
+
alldf = pd.concat([aquastat_eah,aquastat_wu,aquastat_wr])
+alldf
+
+ +
+
+
+ +
+
+ + +
+ +
Out[6]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
0Afghanistan4Total population4104201736296.113000X0
1Afghanistan4Rural population4105201726558.609000X0
2Afghanistan4Urban population410620178971.472000X0
3Afghanistan4Flood occurrence (WRI)454320173.700000I0
4Afghanistan4Total population with access to safe drinking-...4114201755.300000I0
...........................
1224Zimbabwe716Dependency ratio4192201738.700000E0
1225Zimbabwe716Total renewable water resources per capita419020171404.830298E0
1226Zimbabwe716Interannual variability (WRI)454120173.100000I0
1227Zimbabwe716Seasonal variability (WRI)454220173.700000I0
1228Zimbabwe716Dam capacity per capita447120177019.230792E0
+

4625 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [7]:
+
+
+
alldf.info()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
<class 'pandas.core.frame.DataFrame'>
+Int64Index: 4625 entries, 0 to 1228
+Data columns (total 8 columns):
+ #   Column         Non-Null Count  Dtype  
+---  ------         --------------  -----  
+ 0   Area           4625 non-null   object 
+ 1   Area Id        4625 non-null   int64  
+ 2   Variable Name  4625 non-null   object 
+ 3   Variable Id    4625 non-null   int64  
+ 4   Year           4625 non-null   int64  
+ 5   Value          4622 non-null   float64
+ 6   Symbol         4603 non-null   object 
+ 7   Md             4625 non-null   int64  
+dtypes: float64(1), int64(4), object(3)
+memory usage: 325.2+ KB
+
+
+
+ +
+
+ +
+
+
+
+

These are the different symbols found in the dataset

    +
  • No symbol: Official value, data come from one national source (FAO questionnaire on water and agriculture, report, publication, official website, etc.) .
  • +
  • E: Estimate either calculated as sum or identify (yield) from official values or from an AQUASTAT estimation.
  • +
  • I: Imputed (carry forward, vertical imputation, linear interpolation)*.
  • +
  • X: External value (reported by FAOSTAT or another international agency)**.
  • +
+ +
+
+
+
+
+
In [8]:
+
+
+
alldf[alldf['Symbol'] == "I"]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[8]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
3Afghanistan4Flood occurrence (WRI)454320173.7I0
4Afghanistan4Total population with access to safe drinking-...4114201755.3I0
5Afghanistan4Rural population with access to safe drinking-...4115201747.0I0
6Afghanistan4Urban population with access to safe drinking-...4116201778.2I0
11Albania8Flood occurrence (WRI)454320172.7I0
...........................
1219Zambia894Interannual variability (WRI)454120172.1I0
1220Zambia894Seasonal variability (WRI)454220174.4I0
1222Zimbabwe716National Rainfall Index (NRI)44722017722.5I0
1226Zimbabwe716Interannual variability (WRI)454120173.1I0
1227Zimbabwe716Seasonal variability (WRI)454220173.7I0
+

1939 rows × 8 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Explore the three missing values

+
+
+
+
+
+
In [9]:
+
+
+
alldf[alldf['Value'].isna()]
+
+ +
+
+
+ +
+
+ + +
+ +
Out[9]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
AreaArea IdVariable NameVariable IdYearValueSymbolMd
841Monaco492Rural population with access to safe drinking-...41152000NaNNaN0
865Morocco504Population affected by water related disease44032000NaNNaN5293
22Andorra20Dependency ratio41922007NaNNaN0
+
+
+ +
+ +
+
+ +
+
+
+
+

Create country x idicator dataframe

+
+
+
+
+
+
In [10]:
+
+
+
countryXindicator = alldf.pivot_table('Value',['Area'],'Variable Name')
+countryXindicator
+
+ +
+
+
+ +
+
+ + +
+ +
Out[10]:
+ + + +
+

Variable Name% of area equipped for irrigation salinizedAgricultural water withdrawal as % of total renewable water resourcesAgricultural water withdrawal as % of total water withdrawalDam capacity per capitaDependency ratioFlood occurrence (WRI)Industrial water withdrawal as % of total water withdrawalInterannual variability (WRI)MDG 7.5. Freshwater withdrawal as % of total renewable water resourcesMunicipal water withdrawal as % of total withdrawal...SDG 6.4.1. Water Use EfficiencySDG 6.4.2. Water StressSeasonal variability (WRI)Total internal renewable water resources per capitaTotal populationTotal population with access to safe drinking-water (JMP)Total renewable water resources per capitaTotal water withdrawal per capitaUrban populationUrban population with access to safe drinking-water (JMP)
Area
AfghanistanNaN30.61380798.16962755.35028028.7226003.70.8319882.531.0454620.998385...0.92377854.7570192.51299.03717236296.11355.31799.917253561.2970188971.47278.2
Albania3.0224722.99668963.7413721397.28289210.9271522.716.3262431.23.93377519.932385...6.6569077.1394232.49326.7766212884.16995.110470.953679492.2735111740.03294.9
AlgeriaNaN57.17836663.764099208.2488263.5998972.81.8256552.384.01474234.410247...14.354620137.9203601.9271.73762741389.18983.6281.885204252.77132229770.61084.3
AndorraNaNNaNNaNNaNNaN3.3NaN1.5NaNNaN...NaNNaN1.64098.64807077.001100.04098.648070NaN67.845100.0
AngolaNaN0.09885420.784925316.7751320.2695421.733.9472942.50.47553945.267781...142.4678361.8718833.14963.65031729816.76649.04977.06558823.67124619311.63975.4
..................................................................
Venezuela (Bolivarian Republic of)NaN1.26113273.8521105360.96542039.2452832.73.5060972.71.70725322.641793...9.5571907.5403672.327378.63916529402.48493.145064.219744769.53702328198.25195.0
Viet Nam6.5429338.79405594.782397472.67741859.3471473.83.7474090.79.2591501.470194...2.34944818.1303153.23799.33972594600.64897.69345.813361867.11879633642.49899.1
YemenNaN154.04761990.74333816.6158800.0000002.91.8232824.9169.7619057.433380...5.219357169.7619052.475.44507527834.81954.975.445075128.07699610174.61272.0
ZambiaNaN1.09923773.2824436000.70050323.4732822.58.2697202.11.50000018.447837...12.7648942.8354984.44758.62751916853.59965.46218.25640993.2738467346.42185.6
ZimbabweNaN13.85000082.9564857019.23079238.7000002.82.4377833.116.69500014.605732...5.21332931.3462263.7861.16097314236.59576.91404.830298234.5434425328.76697.0
+

200 rows × 27 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [11]:
+
+
+
countryXindicator.drop(['Urban population','Total population','Rural population'], inplace=True, axis=1)
+
+ +
+
+
+ +
+
+
+
In [12]:
+
+
+
countryXindicator.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[12]:
+ + + + +
+
(200, 24)
+
+ +
+ +
+
+ +
+
+
+
In [13]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 84.0 %
+Min: 1.5 %
+Mean: 16.208333333333332 %
+
+
+
+ +
+ +
Out[13]:
+ + + + +
+
(1.5, 84.0)
+
+ +
+ +
+
+ +
+
+
+
+

Drop columns that has to many missing values

+
+
+
+
+
+
In [14]:
+
+
+
dropColumnHalf(countryXindicator)
+
+ +
+
+
+ +
+
+
+
In [15]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 20.5 %
+Min: 1.5 %
+Mean: 10.545454545454545 %
+
+
+
+ +
+ +
Out[15]:
+ + + + +
+
(1.5, 20.5)
+
+ +
+ +
+
+ +
+
+
+
In [16]:
+
+
+
countryXindicator.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[16]:
+ + + + +
+
(200, 22)
+
+ +
+ +
+
+ +
+
+
+
+

Impute the missing values

+
+
+
+
+
+
In [17]:
+
+
+
imputed_countryXindicator = impute_df(countryXindicator, verbose=2, max_iter=20)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (200, 22)
+[IterativeImputer] Ending imputation round 1/20, elapsed time 0.04
+[IterativeImputer] Change: 17738.817690565473, scaled tolerance: 508.38384774800903 
+[IterativeImputer] Ending imputation round 2/20, elapsed time 0.07
+[IterativeImputer] Change: 240.05853395982552, scaled tolerance: 508.38384774800903 
+[IterativeImputer] Early stopping criterion reached.
+[IterativeImputer] Completing matrix with shape (200, 22)
+[IterativeImputer] Ending imputation round 1/2, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/2, elapsed time 0.01
+
+
+
+ +
+
+ +
+
+
+
+

Check the correlation for the imputed and non-imputed dataset

+
+
+
+
+
+
In [18]:
+
+
+
corr_calc = countryXindicator.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[18]:
+ + + + +
+
<AxesSubplot:xlabel='Variable Name', ylabel='Variable Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [19]:
+
+
+
corr_calc = imputed_countryXindicator.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[19]:
+ + + + +
+
<AxesSubplot:xlabel='Variable Name', ylabel='Variable Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+
In [20]:
+
+
+
merged = imputed_countryXindicator.merge(aquastat_cc, how='inner', left_on="Area", right_on="Country").set_index("Code")
+
+ +
+
+
+ +
+
+
+
In [21]:
+
+
+
merged
+
+ +
+
+
+ +
+
+ + +
+ +
Out[21]:
+ + + +
+

Agricultural water withdrawal as % of total renewable water resourcesAgricultural water withdrawal as % of total water withdrawalDam capacity per capitaDependency ratioFlood occurrence (WRI)Industrial water withdrawal as % of total water withdrawalInterannual variability (WRI)MDG 7.5. Freshwater withdrawal as % of total renewable water resourcesMunicipal water withdrawal as % of total withdrawalNational Rainfall Index (NRI)...SDG 6.4.1. Services Water Use EfficiencySDG 6.4.1. Water Use EfficiencySDG 6.4.2. Water StressSeasonal variability (WRI)Total internal renewable water resources per capitaTotal population with access to safe drinking-water (JMP)Total renewable water resources per capitaTotal water withdrawal per capitaUrban population with access to safe drinking-water (JMP)Country
Code
AFG30.61380798.16962755.35028028.7226003.70.8319882.531.0454620.998385185.000000...57.1486220.92377854.7570192.51299.03717255.31799.917253561.29701878.2Afghanistan
ALB2.99668963.7413721397.28289210.9271522.716.3262431.23.93377519.9323851136.000000...21.8522396.6569077.1394232.49326.77662195.110470.953679492.27351194.9Albania
DZA57.17836663.764099208.2488263.5998972.81.8256552.384.01474234.410247241.500000...23.06878714.354620137.9203601.9271.73762783.6281.885204252.77132284.3Algeria
AND57.77107853.096413996.17713620.8662623.318.1687561.560.12262628.7348301071.311544...146.63270986.30042669.0338091.64098.648070100.04098.648070422.680401100.0Andorra
AGO0.09885420.784925316.7751320.2695421.733.9472942.50.47553945.2677811137.000000...167.030879142.4678361.8718833.14963.65031749.04977.06558823.67124675.4Angola
..................................................................
VEN1.26113273.8521105360.96542039.2452832.73.5060972.71.70725322.6417931813.000000...25.3044729.5571907.5403672.327378.63916593.145064.219744769.53702395.0Venezuela (Bolivarian Republic of)
VNM8.79405594.782397472.67741859.3471473.83.7474090.79.2591501.4701941991.000000...83.8407912.34944818.1303153.23799.33972597.69345.813361867.11879699.1Viet Nam
YEM154.04761990.74333816.6158800.0000002.91.8232824.9169.7619057.433380232.700000...47.0234115.219357169.7619052.475.44507554.975.445075128.07699672.0Yemen
ZMB1.09923773.2824436000.70050323.4732822.58.2697202.11.50000018.4478371065.000000...43.21736612.7648942.8354984.44758.62751965.46218.25640993.27384685.6Zambia
ZWE13.85000082.9564857019.23079238.7000002.82.4377833.116.69500014.605732722.500000...27.1944885.21332931.3462263.7861.16097376.91404.830298234.54344297.0Zimbabwe
+

200 rows × 23 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [22]:
+
+
+
!dir
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
 Volume in drive C is Windows
+ Volume Serial Number is A8E6-B474
+
+ Directory of C:\Users\joach\code-projects\WaterSecurity\unlabeled_preprocessing
+
+19/05/2021  19:45    <DIR>          .
+19/05/2021  19:45    <DIR>          ..
+12/05/2021  08:11    <DIR>          .ipynb_checkpoints
+18/05/2021  12:37            27,518 combine_unlabeled.ipynb
+11/05/2021  15:21             1,709 helpers.py
+18/05/2021  12:37           365,025 prep_aquastat.ipynb
+18/05/2021  12:37            50,892 prep_economic_v2.ipynb
+18/05/2021  12:37         1,533,606 prep_edstats.ipynb
+19/05/2021  19:45         2,857,923 prep_hdro_v2.ipynb
+21/05/2021  08:00                43 __init__.py
+21/05/2021  08:00    <DIR>          __pycache__
+               7 File(s)      4,836,716 bytes
+               4 Dir(s)  832,763,138,048 bytes free
+
+
+
+ +
+
+ +
+
+
+
In [23]:
+
+
+
merged.to_csv("../data/unlabeled/preprocessed/aquastat_preprocessed.csv")
+
+ +
+
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/prep_economic_v2.html b/documentation/WaterSecurity/notebooks/prep_economic_v2.html new file mode 100644 index 0000000..82dfba7 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/prep_economic_v2.html @@ -0,0 +1,14474 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
import pandas as pd
+from helpers import *
+import sys
+sys.path.append("..")
+from data.unlabeled.raw import econ_co as df1, econ_da as df2, econ_se as df3
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
..\data\unlabeled\raw\__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)
+..\data\unlabeled\raw\__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)
+..\data\unlabeled\raw\__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)
+
+
+
+ +
+
+ +
+
+
+
In [2]:
+
+
+
print(df2.columns)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
+       '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003',
+       '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012',
+       '2013', '2014', '2015', 'Unnamed: 25'],
+      dtype='object')
+
+
+
+ +
+
+ +
+
+
+
+

Only look at the years listed below

+
+
+
+
+
+
In [3]:
+
+
+
years = ['2010', '2011', '2012','2013', '2014', '2015']
+df2 = df2[['Country Code','Indicator Code',*years]]
+df2.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[3]:
+ + + + +
+
(298, 8)
+
+ +
+ +
+
+ +
+
+
+
In [4]:
+
+
+
df2.isna().sum()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[4]:
+ + + + +
+
Country Code      0
+Indicator Code    0
+2010              0
+2011              0
+2012              2
+2013              2
+2014              2
+2015              2
+dtype: int64
+
+ +
+ +
+
+ +
+
+
+
+

Same missing values in 2012-2015 -> Merge 2011 with 2015 and remove the rest

+
+
+
+
+
+
In [5]:
+
+
+
fill_missing_with_column(df2,'2015','2011')
+df2.isna().sum()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
c:\Users\joach\code-projects\WaterSecurity\unlabeled_preprocessing\helpers.py:15: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame.
+Try using .loc[row_indexer,col_indexer] = value instead
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  df[into] = df[into].combine_first(df[fro])
+C:\Users\joach\.conda\envs\wsenv\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  return super().drop(
+
+
+
+ +
+ +
Out[5]:
+ + + + +
+
Country Code      0
+Indicator Code    0
+2010              0
+2012              2
+2013              2
+2014              2
+2015              0
+dtype: int64
+
+ +
+ +
+
+ +
+
+
+
In [6]:
+
+
+
df2 = df2[['Country Code','Indicator Code','2015']]
+
+ +
+
+
+ +
+
+
+
In [7]:
+
+
+
df2
+
+ +
+
+
+ +
+
+ + +
+ +
Out[7]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Country CodeIndicator Code2015
0AFGEF.EFM.OVRL.XD5.369534e-02
1AFGEF.EFM.RANK.XD1.120000e+02
2AGOEF.EFM.OVRL.XD7.890000e-13
3AGOEF.EFM.RANK.XD1.460000e+02
4ALBEF.EFM.OVRL.XD3.746223e-01
............
293ZAFEF.EFM.RANK.XD4.500000e+01
294ZMBEF.EFM.OVRL.XD6.530739e-02
295ZMBEF.EFM.RANK.XD1.090000e+02
296ZWEEF.EFM.OVRL.XD1.078169e-01
297ZWEEF.EFM.RANK.XD1.000000e+02
+

298 rows × 3 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Join indicator names

+
+
+
+
+
+
In [8]:
+
+
+
comb1 = df2.merge(df3, how='inner', left_on='Indicator Code', right_on='Series Code')
+print(comb1.shape)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
(298, 24)
+
+
+
+ +
+
+ +
+
+
+
In [9]:
+
+
+
comb1
+
+ +
+
+
+ +
+
+ + +
+ +
Out[9]:
+ + + +
+

Country CodeIndicator Code2015Series CodeTopicIndicator NameShort definitionLong definitionUnit of measurePeriodicity...Notes from original sourceGeneral commentsSourceStatistical concept and methodologyDevelopment relevanceRelated source linksOther web linksRelated indicatorsLicense TypeUnnamed: 20
0AFGEF.EFM.OVRL.XD5.369534e-02EF.EFM.OVRL.XDEconomic Policy & DebtEconomic Fitness MetricNaNEconomic Fitness (EF) is both a measure of a c...NaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
1AGOEF.EFM.OVRL.XD7.890000e-13EF.EFM.OVRL.XDEconomic Policy & DebtEconomic Fitness MetricNaNEconomic Fitness (EF) is both a measure of a c...NaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
2ALBEF.EFM.OVRL.XD3.746223e-01EF.EFM.OVRL.XDEconomic Policy & DebtEconomic Fitness MetricNaNEconomic Fitness (EF) is both a measure of a c...NaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
3ANDEF.EFM.OVRL.XD3.694202e-01EF.EFM.OVRL.XDEconomic Policy & DebtEconomic Fitness MetricNaNEconomic Fitness (EF) is both a measure of a c...NaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
4AREEF.EFM.OVRL.XD3.438621e-01EF.EFM.OVRL.XDEconomic Policy & DebtEconomic Fitness MetricNaNEconomic Fitness (EF) is both a measure of a c...NaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
..................................................................
293VNMEF.EFM.RANK.XD3.600000e+01EF.EFM.RANK.XDEconomic Policy & DebtEconomic Fitness Ranking (1 = high, 149 = low)NaNNaNNaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
294YEMEF.EFM.RANK.XD1.070000e+02EF.EFM.RANK.XDEconomic Policy & DebtEconomic Fitness Ranking (1 = high, 149 = low)NaNNaNNaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
295ZAFEF.EFM.RANK.XD4.500000e+01EF.EFM.RANK.XDEconomic Policy & DebtEconomic Fitness Ranking (1 = high, 149 = low)NaNNaNNaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
296ZMBEF.EFM.RANK.XD1.090000e+02EF.EFM.RANK.XDEconomic Policy & DebtEconomic Fitness Ranking (1 = high, 149 = low)NaNNaNNaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
297ZWEEF.EFM.RANK.XD1.000000e+02EF.EFM.RANK.XDEconomic Policy & DebtEconomic Fitness Ranking (1 = high, 149 = low)NaNNaNNaNAnnual...NaNNaNWorld Bank, Economic Fitness project. For more...The new literature of Economic Fitness uses te...NaNNaNNaNNaNNaNNaN
+

298 rows × 24 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Pivot so that table is countrycodeXindicator

+
+
+
+
+
+
In [10]:
+
+
+
countryXindicator = comb1.pivot_table('2015',['Country Code'], 'Indicator Name')
+countryXindicator
+
+ +
+
+
+ +
+
+ + +
+ +
Out[10]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Indicator NameEconomic Fitness MetricEconomic Fitness Ranking (1 = high, 149 = low)
Country Code
AFG5.369534e-02112.0
AGO7.890000e-13146.0
ALB3.746223e-0173.0
AND3.694202e-0174.0
ARE3.438621e-0176.0
.........
VNM1.391330e+0036.0
YEM7.417100e-02107.0
ZAF9.784253e-0145.0
ZMB6.530739e-02109.0
ZWE1.078169e-01100.0
+

149 rows × 2 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

No missing values -> no imputation needed

+
+
+
+
+
+
In [11]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 0.0 %
+Min: 0.0 %
+Mean: 0.0 %
+
+
+
+ +
+ +
Out[11]:
+ + + + +
+
(0.0, 0.0)
+
+ +
+ +
+
+ +
+
+
+
+

Merge with country information

+
+
+
+
+
+
In [12]:
+
+
+
comb2 = df1.merge(countryXindicator, how='inner', left_on='Country Code', right_on='Country Code')
+
+ +
+
+
+ +
+
+
+
In [13]:
+
+
+
print(comb2.columns)
+print(comb2.shape)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Index(['Country Code', 'Short Name', 'Table Name', 'Long Name', '2-alpha code',
+       'Currency Unit', 'Special Notes', 'Region', 'Income Group', 'WB-2 code',
+       'National accounts base year', 'National accounts reference year',
+       'SNA price valuation', 'Lending category', 'Other groups',
+       'System of National Accounts', 'Alternative conversion factor',
+       'PPP survey year', 'Balance of Payments Manual in use',
+       'External debt Reporting status', 'System of trade',
+       'Government Accounting concept', 'IMF data dissemination standard',
+       'Latest population census', 'Latest household survey',
+       'Source of most recent Income and expenditure data',
+       'Vital registration complete', 'Latest agricultural census',
+       'Latest industrial data', 'Latest trade data',
+       'Latest water withdrawal data', 'Unnamed: 31',
+       'Economic Fitness Metric',
+       'Economic Fitness Ranking (1 = high, 149 = low)'],
+      dtype='object')
+(149, 34)
+
+
+
+ +
+
+ +
+
+
+
In [17]:
+
+
+
comb2.set_index('Country Code', inplace=True)
+display(comb2)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + +
+

Short NameTable NameLong Name2-alpha codeCurrency UnitSpecial NotesRegionIncome GroupWB-2 codeNational accounts base year...Latest household surveySource of most recent Income and expenditure dataVital registration completeLatest agricultural censusLatest industrial dataLatest trade dataLatest water withdrawal dataUnnamed: 31Economic Fitness MetricEconomic Fitness Ranking (1 = high, 149 = low)
Country Code
AFGAfghanistanAfghanistanIslamic State of AfghanistanAFAfghan afghaniFiscal year end: March 20; reporting period fo...South AsiaLow incomeAF2002/03...Demographic and Health Survey, 2015Integrated household survey (IHS), 2011NaNNaNNaN2016.02000.0NaN5.369534e-02112.0
AGOAngolaAngolaPeople's Republic of AngolaAOAngolan kwanzaNaNSub-Saharan AfricaLower middle incomeAO2002...Demographic and Health Survey, 2015/16Integrated household survey (IHS), 2008/09NaNNaNNaN2016.02005.0NaN7.890000e-13146.0
ALBAlbaniaAlbaniaRepublic of AlbaniaALAlbanian lekNaNEurope & Central AsiaUpper middle incomeALOriginal chained constant price data are resca......Demographic and Health Survey, 2008/09Living Standards Measurement Study Survey (LSM...Yes20122013.02016.02006.0NaN3.746223e-0173.0
ANDAndorraAndorraPrincipality of AndorraADEuroWB-3 code changed from ADO to AND to align wit...Europe & Central AsiaHigh incomeAD2000...NaNNaNYesNaNNaNNaNNaNNaN3.694202e-0174.0
AREUnited Arab EmiratesUnited Arab EmiratesUnited Arab EmiratesAEU.A.E. dirhamNaNMiddle East & North AfricaHigh incomeAE2010...World Health Survey, 2003NaNNaN20121985.02016.02005.0NaN3.438621e-0176.0
..................................................................
VNMVietnamVietnamSocialist Republic of VietnamVNVietnamese dongNaNEast Asia & PacificLower middle incomeVN2010...Multiple Indicator Cluster Survey, 2013/14Integrated household survey (IHS), 2014Yes20112013.02016.02005.0NaN1.391330e+0036.0
YEMYemenYemen, Rep.Republic of YemenYEYemeni rialBased on official government statistics and In...Middle East & North AfricaLower middle incomeRY1990...Demographic and Health Survey, 2013Expenditure survey/budget survey (ES/BS), 2005NaNNaN2012.02016.02005.0NaN7.417100e-02107.0
ZAFSouth AfricaSouth AfricaRepublic of South AfricaZASouth African randFiscal year end: March 31; reporting period fo...Sub-Saharan AfricaUpper middle incomeZA2010...Demographic and Health Survey, 2003; World Hea...Expenditure survey/budget survey (ES/BS), 2010/11NaN20072010.02016.02013.0NaN9.784253e-0145.0
ZMBZambiaZambiaRepublic of ZambiaZMNew Zambian kwachaThe base year is 2010. National accounts data ...Sub-Saharan AfricaLower middle incomeZM2010...Demographic and Health Survey, 2013/14Integrated household survey (IHS), 2010NaNNaN1994.02016.02002.0NaN6.530739e-02109.0
ZWEZimbabweZimbabweRepublic of ZimbabweZWU.S. dollarFiscal year end: June 30; reporting period for...Sub-Saharan AfricaLow incomeZW2009...Demographic and Health Survey, 2015Integrated household survey (IHS), 2011/12NaNNaNNaN2016.02007.0NaN1.078169e-01100.0
+

149 rows × 33 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [19]:
+
+
+
comb2.to_csv("../data/unlabeled/preprocessed/economy_preprocessed.csv")
+
+ +
+
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/prep_edstats-checkpoint.html b/documentation/WaterSecurity/notebooks/prep_edstats-checkpoint.html new file mode 100644 index 0000000..36ff7f7 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/prep_edstats-checkpoint.html @@ -0,0 +1,16922 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [65]:
+
+
+
import pandas as pd
+import sklearn
+from helpers import *
+import matplotlib.pyplot as plt
+import sys
+sys.path.append("..")
+from data.unlabeled import edstats_co as df_c, edstats_da as df_d, edstats_se as df_s
+import seaborn as sns
+
+ +
+
+
+ +
+
+
+
+

Remember to extract Edstats_csv.zip to Edstats_csv

+
+
+
+
+
+
In [2]:
+
+
+
df_d.columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[2]:
+ + + + +
+
Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
+       '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978',
+       '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987',
+       '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996',
+       '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005',
+       '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014',
+       '2015', '2016', '2017', '2020', '2025', '2030', '2035', '2040', '2045',
+       '2050', '2055', '2060', '2065', '2070', '2075', '2080', '2085', '2090',
+       '2095', '2100', 'Unnamed: 69'],
+      dtype='object')
+
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
years = ['2015', '2016', '2017', '2020']
+stripped_df_d = df_d[['Country Code','Indicator Code',*years]]
+print("Non na values in col 2020:",stripped_df_d['2020'].count())
+stripped_df_d
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Non na values in col 2020: 51436
+
+
+
+ +
+ +
Out[3]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Country CodeIndicator Code2015201620172020
0ARBUIS.NERA.2NaNNaNNaNNaN
1ARBUIS.NERA.2.FNaNNaNNaNNaN
2ARBUIS.NERA.2.GPINaNNaNNaNNaN
3ARBUIS.NERA.2.MNaNNaNNaNNaN
4ARBSE.PRM.TENRNaNNaNNaNNaN
.....................
886925ZWEUIS.LP.AG15T24.MNaNNaNNaNNaN
886926ZWESE.ADT.1524.LT.ZSNaNNaNNaNNaN
886927ZWESE.ADT.1524.LT.FE.ZSNaNNaNNaNNaN
886928ZWESE.ADT.1524.LT.FM.ZSNaNNaNNaNNaN
886929ZWESE.ADT.1524.LT.MA.ZSNaNNaNNaNNaN
+

886930 rows × 6 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Merge colums to remove null values

+
+
+
+
+
+
In [4]:
+
+
+
for year in years[:-1]:
+    fill_missing_with_column(stripped_df_d, '2020',year)
+print("Non na values in col 2020:", stripped_df_d['2020'].count())
+stripped_df_d
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Non na values in col 2020: 132991
+
+
+
+ +
+ +
+ + +
+
C:\Users\joach\code-projects\WaterSecurity\unlabeled_preprocessing\helpers.py:15: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame.
+Try using .loc[row_indexer,col_indexer] = value instead
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  df[into] = df[into].combine_first(df[fro])
+C:\Users\joach\.conda\envs\wsenv\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  return super().drop(
+
+
+
+ +
+ +
Out[4]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Country CodeIndicator Code2020
0ARBUIS.NERA.2NaN
1ARBUIS.NERA.2.FNaN
2ARBUIS.NERA.2.GPINaN
3ARBUIS.NERA.2.MNaN
4ARBSE.PRM.TENRNaN
............
886925ZWEUIS.LP.AG15T24.MNaN
886926ZWESE.ADT.1524.LT.ZSNaN
886927ZWESE.ADT.1524.LT.FE.ZSNaN
886928ZWESE.ADT.1524.LT.FM.ZSNaN
886929ZWESE.ADT.1524.LT.MA.ZSNaN
+

886930 rows × 3 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [5]:
+
+
+
df_d_withseries = stripped_df_d.merge(df_s, how='left', left_on='Indicator Code', right_on='Series Code')
+df_d_withseries.drop(['Indicator Code'], inplace=True, axis=1)
+
+ +
+
+
+ +
+
+
+
In [6]:
+
+
+
df_d_withseries.columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[6]:
+ + + + +
+
Index(['Country Code', '2020', 'Series Code', 'Topic', 'Indicator Name',
+       'Short definition', 'Long definition', 'Unit of measure', 'Periodicity',
+       'Base Period', 'Other notes', 'Aggregation method',
+       'Limitations and exceptions', 'Notes from original source',
+       'General comments', 'Source', 'Statistical concept and methodology',
+       'Development relevance', 'Related source links', 'Other web links',
+       'Related indicators', 'License Type', 'Unnamed: 20'],
+      dtype='object')
+
+ +
+ +
+
+ +
+
+
+
+

Rearange table to indicator on column

+
+
+
+
+
+
In [7]:
+
+
+
countryXindicator = df_d_withseries.pivot_table('2020',['Country Code'],'Indicator Name')
+countryXindicator
+
+ +
+
+
+ +
+
+ + +
+ +
Out[7]:
+ + + +
+

Indicator NameAdjusted net enrolment rate, lower secondary, both sexes (%)Adjusted net enrolment rate, lower secondary, female (%)Adjusted net enrolment rate, lower secondary, gender parity index (GPI)Adjusted net enrolment rate, lower secondary, male (%)Adjusted net enrolment rate, primary, both sexes (%)Adjusted net enrolment rate, primary, female (%)Adjusted net enrolment rate, primary, gender parity index (GPI)Adjusted net enrolment rate, primary, male (%)Adjusted net enrolment rate, upper secondary, both sexes (%)Adjusted net enrolment rate, upper secondary, female (%)...Under-age enrolment ratio in secondary education, female (%)Under-age enrolment ratio in secondary education, male (%)Unemployment, female (% of female labor force) (modeled ILO estimate)Unemployment, male (% of male labor force) (modeled ILO estimate)Unemployment, total (% of total labor force) (modeled ILO estimate)Youth illiterate population, 15-24 years, % femaleYouth literacy rate, population 15-24 years, both sexes (%)Youth literacy rate, population 15-24 years, female (%)Youth literacy rate, population 15-24 years, gender parity index (GPI)Youth literacy rate, population 15-24 years, male (%)
Country Code
ABWNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AFGNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN12.7000007.78.600000NaNNaNNaNNaNNaN
AGONaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN6.7000005.76.200000NaNNaNNaNNaNNaN
ALBNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN17.29999917.017.100000NaNNaNNaNNaNNaN
ANDNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaN47.80890100.00000100.000001.00000100.00000
..................................................................
XKXNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEMNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN32.70000111.116.700001NaNNaNNaNNaNNaN
ZAFNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN27.70000123.125.20000134.3480898.9557899.229041.0055298.68459
ZMBNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN8.0000007.37.700000NaNNaNNaNNaNNaN
ZWENaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN5.1000005.15.100000NaNNaNNaNNaNNaN
+

241 rows × 2047 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Find columns that contain "number", "Projection" or "$"

+
+
+
+
+
+
In [8]:
+
+
+
projection_col = [colname for colname in countryXindicator.columns if "Projection" in colname]
+number = [colname for colname in countryXindicator.columns if "number" in colname]
+dollar = [colname for colname in countryXindicator.columns if "$" in colname]
+
+ +
+
+
+ +
+
+
+
+

And remove them

+
+
+
+
+
+
In [9]:
+
+
+
countryXindicator.drop(projection_col + number+ dollar,inplace=True,axis=1)
+countryXindicator.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[9]:
+ + + + +
+
(241, 1502)
+
+ +
+ +
+
+ +
+
+
+
+

Remove population data

+
+
+
+
+
+
In [10]:
+
+
+
countryXindicator.drop(['Population, total'], axis=1, inplace=True)
+
+ +
+
+
+ +
+
+
+
+

See how much of the data is missing and drop columns where more than 50% of the values are null

+
+
+
+
+
+
In [11]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 99.5850622406639 %
+Min: 3.7344398340248963 %
+Mean: 81.31204369977415 %
+
+
+
+ +
+ +
Out[11]:
+ + + + +
+
(3.7344398340248963, 99.5850622406639)
+
+ +
+ +
+
+ +
+
+
+
In [12]:
+
+
+
dropColumnHalf(countryXindicator)
+
+ +
+
+
+ +
+
+
+
In [13]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 49.79253112033195 %
+Min: 3.7344398340248963 %
+Mean: 26.60212079299217 %
+
+
+
+ +
+ +
Out[13]:
+ + + + +
+
(3.7344398340248963, 49.79253112033195)
+
+ +
+ +
+
+ +
+
+
+
In [14]:
+
+
+
countryXindicator.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[14]:
+ + + + +
+
(241, 243)
+
+ +
+ +
+
+ +
+
+
+
In [15]:
+
+
+
 countryXindicator
+
+ +
+
+
+ +
+
+ + +
+ +
Out[15]:
+ + + +
+

Indicator NameAdjusted net enrolment rate, primary, both sexes (%)Age population, age 0, female, UNESCOAge population, age 0, total, UNESCOAge population, age 01, female, UNESCOAge population, age 01, total, UNESCOAge population, age 02, female, UNESCOAge population, age 02, total, UNESCOAge population, age 03, female, UNESCOAge population, age 03, total, UNESCOAge population, age 04, female, UNESCO...Prevalence of HIV, total (% of population ages 15-49)Primary completion rate, both sexes (%)Primary completion rate, female (%)Primary completion rate, male (%)Theoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)Unemployment, female (% of female labor force) (modeled ILO estimate)Unemployment, male (% of male labor force) (modeled ILO estimate)Unemployment, total (% of total labor force) (modeled ILO estimate)
Country Code
ABWNaN542.01112.0572.01170.0600.01224.0623.01269.0643.0...NaNNaNNaNNaN6.05.03.0NaNNaNNaN
AFGNaN682677.01403010.0650389.01338500.0620880.01279403.0593957.01225345.0569425.0...0.1NaNNaNNaN6.06.03.012.7000007.78.600000
AGONaN414919.0832093.0396537.0793950.0379705.0759164.0364295.0727455.0350180.0...1.9NaNNaNNaN6.06.03.06.7000005.76.200000
ALBNaN23697.049105.024026.049709.024243.050104.024360.050315.024397.0...0.1106.367561104.699371107.9001245.07.03.017.29999917.017.100000
ANDNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaN6.06.02.0NaNNaNNaN
..................................................................
XKXNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEMNaN458859.0936053.0452601.0923017.0444988.0907252.0436223.0889171.0426514.0...0.1NaNNaNNaN6.06.03.032.70000111.116.700001
ZAFNaN484535.0980663.0486901.0985215.0489516.0990194.0492299.0995445.0495165.0...18.9NaNNaNNaN7.05.03.027.70000123.125.200001
ZMBNaN226616.0457329.0220950.0445451.0215890.0434876.0211367.0425454.0207311.0...12.6NaNNaNNaN7.05.03.08.0000007.37.700000
ZWENaN181422.0364761.0179761.0361101.0177982.0357261.0176130.0353321.0174240.0...13.9NaNNaNNaN7.06.04.05.1000005.15.100000
+

241 rows × 243 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Split dataset into a dataframe containing year-data and a dataframe for the float-precentages-data

This i done for better imputation results

+ +
+
+
+
+
+
In [17]:
+
+
+
int_cols = find_all_integer_columns(countryXindicator)
+
+ +
+
+
+ +
+
+
+
In [29]:
+
+
+
countryXindicator_float = countryXindicator.drop(int_cols, axis=1)
+countryXindicator_float
+
+ +
+
+
+ +
+
+ + +
+ +
Out[29]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Indicator NameAdjusted net enrolment rate, primary, both sexes (%)Gross enrolment ratio, lower secondary, both sexes (%)Gross enrolment ratio, lower secondary, female (%)Gross enrolment ratio, lower secondary, male (%)Gross enrolment ratio, pre-primary, both sexes (%)Gross enrolment ratio, pre-primary, female (%)Gross enrolment ratio, pre-primary, male (%)Gross enrolment ratio, primary, both sexes (%)Gross enrolment ratio, primary, female (%)Gross enrolment ratio, primary, gender parity index (GPI)...Population growth (annual %)Population, female (% of total)Population, male (% of total)Prevalence of HIV, total (% of population ages 15-49)Primary completion rate, both sexes (%)Primary completion rate, female (%)Primary completion rate, male (%)Unemployment, female (% of female labor force) (modeled ILO estimate)Unemployment, male (% of male labor force) (modeled ILO estimate)Unemployment, total (% of total labor force) (modeled ILO estimate)
Country Code
ABWNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...0.52465852.46552147.534479NaNNaNNaNNaNNaNNaNNaN
AFGNaN67.44761749.58044184.329559NaNNaNNaN111.87708391.0832600.69200...2.94323448.45455851.5454420.1NaNNaNNaN12.7000007.78.600000
AGONaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...3.42802150.99101049.0089901.9NaNNaNNaN6.7000005.76.200000
ALBNaN101.48837399.889503102.98043888.60224287.39891189.722321113.699799111.7190170.96706...-0.29120649.51459950.4854010.1106.367561104.699371107.90012417.29999917.017.100000
ANDNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...-1.537836NaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
XKXNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...-1.103886NaNNaNNaNNaNNaNNaNNaNNaNNaN
YEMNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...2.52025449.47085650.5291440.1NaNNaNNaN32.70000111.116.700001
ZAFNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...1.58532550.89760449.10239618.9NaNNaNNaN27.70000123.125.200001
ZMBNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...3.02412350.38476649.61523412.6NaNNaNNaN8.0000007.37.700000
ZWENaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...2.34564351.33499748.66500313.9NaNNaNNaN5.1000005.15.100000
+

241 rows × 48 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [30]:
+
+
+
countryXindicator_year = countryXindicator[int_cols]
+countryXindicator_year.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[30]:
+ + + + +
+
(241, 195)
+
+ +
+ +
+
+ +
+
+
+
In [31]:
+
+
+
countryXindicator_year = countryXindicator_year[[colname for colname in countryXindicator_year.columns if "years" in colname]]
+countryXindicator_year
+
+ +
+
+
+ +
+
+ + +
+ +
Out[31]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Indicator NameDuration of compulsory education (years)Official entrance age to lower secondary education (years)Official entrance age to primary education (years)Theoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)
Country Code
ABW13.012.06.06.05.03.0
AFG9.013.07.06.06.03.0
AGO6.012.06.06.06.03.0
ALB9.011.06.05.07.03.0
AND10.012.06.06.06.02.0
.....................
XKXNaNNaNNaNNaNNaNNaN
YEM9.012.06.06.06.03.0
ZAF9.014.07.07.05.03.0
ZMB7.014.07.07.05.03.0
ZWE7.013.06.07.06.04.0
+

241 rows × 6 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Impute both dataframes

+
+
+
+
+
+
In [62]:
+
+
+
imputed_countryXindicator_float = impute_df(countryXindicator_float, max_iter=100, verbose=2)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (241, 48)
+[IterativeImputer] Ending imputation round 1/100, elapsed time 0.15
+[IterativeImputer] Change: 185.571939565928, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 2/100, elapsed time 0.30
+[IterativeImputer] Change: 84.11443482287976, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 3/100, elapsed time 0.44
+[IterativeImputer] Change: 40.004149972043805, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 4/100, elapsed time 0.59
+[IterativeImputer] Change: 20.57199335577451, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 5/100, elapsed time 0.73
+[IterativeImputer] Change: 12.447852809025877, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 6/100, elapsed time 0.88
+[IterativeImputer] Change: 10.982023559420153, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 7/100, elapsed time 1.03
+[IterativeImputer] Change: 9.335158590713679, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 8/100, elapsed time 1.17
+[IterativeImputer] Change: 8.566696528569699, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 9/100, elapsed time 1.32
+[IterativeImputer] Change: 7.68743911493866, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 10/100, elapsed time 1.46
+[IterativeImputer] Change: 6.844171756725839, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 11/100, elapsed time 1.60
+[IterativeImputer] Change: 6.065545725434555, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 12/100, elapsed time 1.75
+[IterativeImputer] Change: 5.36616411150322, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 13/100, elapsed time 1.90
+[IterativeImputer] Change: 4.744028755257943, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 14/100, elapsed time 2.04
+[IterativeImputer] Change: 4.1901923804245715, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 15/100, elapsed time 2.18
+[IterativeImputer] Change: 3.70294072516266, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 16/100, elapsed time 2.32
+[IterativeImputer] Change: 3.2688619527108522, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 17/100, elapsed time 2.47
+[IterativeImputer] Change: 2.88345738531942, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 18/100, elapsed time 2.61
+[IterativeImputer] Change: 2.532294563247444, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 19/100, elapsed time 2.75
+[IterativeImputer] Change: 2.2119869217502828, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 20/100, elapsed time 2.90
+[IterativeImputer] Change: 1.9202443611257713, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 21/100, elapsed time 3.04
+[IterativeImputer] Change: 1.6606990496546086, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 22/100, elapsed time 3.18
+[IterativeImputer] Change: 1.6559282826221846, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 23/100, elapsed time 3.33
+[IterativeImputer] Change: 1.6520065848505072, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 24/100, elapsed time 3.47
+[IterativeImputer] Change: 1.648782063252183, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 25/100, elapsed time 3.61
+[IterativeImputer] Change: 1.6461831356141814, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 26/100, elapsed time 3.77
+[IterativeImputer] Change: 1.644058054001174, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 27/100, elapsed time 3.91
+[IterativeImputer] Change: 1.6423013079998188, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 28/100, elapsed time 4.06
+[IterativeImputer] Change: 1.6408741216707456, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 29/100, elapsed time 4.20
+[IterativeImputer] Change: 1.6396788922926566, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 30/100, elapsed time 4.35
+[IterativeImputer] Change: 1.638655100233116, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 31/100, elapsed time 4.49
+[IterativeImputer] Change: 1.6377695134752297, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 32/100, elapsed time 4.64
+[IterativeImputer] Change: 1.6369872536064, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 33/100, elapsed time 4.79
+[IterativeImputer] Change: 1.636221623251295, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 34/100, elapsed time 4.95
+[IterativeImputer] Change: 1.6355083766661642, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 35/100, elapsed time 5.11
+[IterativeImputer] Change: 1.634821858203819, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 36/100, elapsed time 5.27
+[IterativeImputer] Change: 1.6341370631609828, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 37/100, elapsed time 5.42
+[IterativeImputer] Change: 1.6334747186241456, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 38/100, elapsed time 5.56
+[IterativeImputer] Change: 1.645652531918897, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 39/100, elapsed time 5.71
+[IterativeImputer] Change: 1.652182711035818, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 40/100, elapsed time 5.88
+[IterativeImputer] Change: 1.6532662904419548, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 41/100, elapsed time 6.02
+[IterativeImputer] Change: 1.649451831600862, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 42/100, elapsed time 6.18
+[IterativeImputer] Change: 1.6408222980663139, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 43/100, elapsed time 6.33
+[IterativeImputer] Change: 1.6294400496414387, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 44/100, elapsed time 6.47
+[IterativeImputer] Change: 1.6287678524729357, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 45/100, elapsed time 6.61
+[IterativeImputer] Change: 1.6281047469396985, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 46/100, elapsed time 6.76
+[IterativeImputer] Change: 1.6274834308429378, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 47/100, elapsed time 6.90
+[IterativeImputer] Change: 1.6270130873718072, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 48/100, elapsed time 7.04
+[IterativeImputer] Change: 1.6262940758389055, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 49/100, elapsed time 7.19
+[IterativeImputer] Change: 1.6257297763508762, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 50/100, elapsed time 7.33
+[IterativeImputer] Change: 1.6252298538321177, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 51/100, elapsed time 7.47
+[IterativeImputer] Change: 1.624722930666042, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 52/100, elapsed time 7.62
+[IterativeImputer] Change: 1.6242577689697895, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 53/100, elapsed time 7.76
+[IterativeImputer] Change: 1.6238212014114874, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 54/100, elapsed time 7.91
+[IterativeImputer] Change: 1.6234519318135652, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 55/100, elapsed time 8.06
+[IterativeImputer] Change: 1.6231115358163457, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 56/100, elapsed time 8.20
+[IterativeImputer] Change: 1.6228249713969374, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 57/100, elapsed time 8.34
+[IterativeImputer] Change: 1.6225670613424206, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 58/100, elapsed time 8.50
+[IterativeImputer] Change: 1.6222797867936543, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 59/100, elapsed time 8.65
+[IterativeImputer] Change: 1.6221342960698018, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 60/100, elapsed time 8.79
+[IterativeImputer] Change: 1.6219870713467206, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 61/100, elapsed time 8.93
+[IterativeImputer] Change: 1.6218646076142422, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 62/100, elapsed time 9.09
+[IterativeImputer] Change: 1.6217730363655758, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 63/100, elapsed time 9.24
+[IterativeImputer] Change: 1.621725788928061, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 64/100, elapsed time 9.38
+[IterativeImputer] Change: 1.6216920357302036, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 65/100, elapsed time 9.54
+[IterativeImputer] Change: 1.621698990708198, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 66/100, elapsed time 9.70
+[IterativeImputer] Change: 1.6217532944875095, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 67/100, elapsed time 9.84
+[IterativeImputer] Change: 1.6218227224444455, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 68/100, elapsed time 9.98
+[IterativeImputer] Change: 1.6218972339192113, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 69/100, elapsed time 10.12
+[IterativeImputer] Change: 1.6232902366910664, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 70/100, elapsed time 10.27
+[IterativeImputer] Change: 1.6249616723710512, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 71/100, elapsed time 10.41
+[IterativeImputer] Change: 1.6261259356463242, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 72/100, elapsed time 10.56
+[IterativeImputer] Change: 1.6281859862659571, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 73/100, elapsed time 10.71
+[IterativeImputer] Change: 1.63006029323919, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 74/100, elapsed time 10.87
+[IterativeImputer] Change: 1.6318311593012644, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 75/100, elapsed time 11.02
+[IterativeImputer] Change: 1.6336233225534909, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 76/100, elapsed time 11.16
+[IterativeImputer] Change: 1.6353960973093213, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 77/100, elapsed time 11.31
+[IterativeImputer] Change: 1.637182751172711, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 78/100, elapsed time 11.45
+[IterativeImputer] Change: 1.6389758111697363, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 79/100, elapsed time 11.60
+[IterativeImputer] Change: 1.6407763042253765, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 80/100, elapsed time 11.74
+[IterativeImputer] Change: 1.6425834310407716, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 81/100, elapsed time 11.89
+[IterativeImputer] Change: 1.6443968858747002, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 82/100, elapsed time 12.04
+[IterativeImputer] Change: 1.64625348689472, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 83/100, elapsed time 12.19
+[IterativeImputer] Change: 1.6481429500945106, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 84/100, elapsed time 12.33
+[IterativeImputer] Change: 1.6500518120410161, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 85/100, elapsed time 12.47
+[IterativeImputer] Change: 1.6519548194561064, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 86/100, elapsed time 12.61
+[IterativeImputer] Change: 1.6538680162736423, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 87/100, elapsed time 12.76
+[IterativeImputer] Change: 1.6552679864808504, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 88/100, elapsed time 12.92
+[IterativeImputer] Change: 1.6574626185722092, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 89/100, elapsed time 13.08
+[IterativeImputer] Change: 1.6594748268725177, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 90/100, elapsed time 13.25
+[IterativeImputer] Change: 1.6613749089305434, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 91/100, elapsed time 13.44
+[IterativeImputer] Change: 1.6632615294758013, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 92/100, elapsed time 13.63
+[IterativeImputer] Change: 1.6651267664591594, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 93/100, elapsed time 13.81
+[IterativeImputer] Change: 1.6669733028122995, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 94/100, elapsed time 14.00
+[IterativeImputer] Change: 1.6688041142649566, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 95/100, elapsed time 14.19
+[IterativeImputer] Change: 1.6706225132059456, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 96/100, elapsed time 14.37
+[IterativeImputer] Change: 1.672451264542782, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 97/100, elapsed time 14.55
+[IterativeImputer] Change: 1.6742766711767634, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 98/100, elapsed time 14.73
+[IterativeImputer] Change: 1.6761039010597747, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 99/100, elapsed time 14.91
+[IterativeImputer] Change: 1.6778935337913161, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 100/100, elapsed time 15.10
+[IterativeImputer] Change: 1.679678914280904, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Completing matrix with shape (241, 48)
+[IterativeImputer] Ending imputation round 1/100, elapsed time 0.01
+[IterativeImputer] Ending imputation round 2/100, elapsed time 0.02
+[IterativeImputer] Ending imputation round 3/100, elapsed time 0.02
+[IterativeImputer] Ending imputation round 4/100, elapsed time 0.03
+[IterativeImputer] Ending imputation round 5/100, elapsed time 0.04
+[IterativeImputer] Ending imputation round 6/100, elapsed time 0.05
+[IterativeImputer] Ending imputation round 7/100, elapsed time 0.05
+[IterativeImputer] Ending imputation round 8/100, elapsed time 0.06
+[IterativeImputer] Ending imputation round 9/100, elapsed time 0.07
+[IterativeImputer] Ending imputation round 10/100, elapsed time 0.08
+[IterativeImputer] Ending imputation round 11/100, elapsed time 0.08
+[IterativeImputer] Ending imputation round 12/100, elapsed time 0.09
+[IterativeImputer] Ending imputation round 13/100, elapsed time 0.10
+[IterativeImputer] Ending imputation round 14/100, elapsed time 0.11
+[IterativeImputer] Ending imputation round 15/100, elapsed time 0.12
+[IterativeImputer] Ending imputation round 16/100, elapsed time 0.13
+[IterativeImputer] Ending imputation round 17/100, elapsed time 0.13
+[IterativeImputer] Ending imputation round 18/100, elapsed time 0.14
+[IterativeImputer] Ending imputation round 19/100, elapsed time 0.15
+[IterativeImputer] Ending imputation round 20/100, elapsed time 0.16
+[IterativeImputer] Ending imputation round 21/100, elapsed time 0.16
+[IterativeImputer] Ending imputation round 22/100, elapsed time 0.17
+[IterativeImputer] Ending imputation round 23/100, elapsed time 0.18
+
+
+
+ +
+ +
+ + +
+
C:\Users\joach\.conda\envs\wsenv\lib\site-packages\sklearn\impute\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
+  warnings.warn("[IterativeImputer] Early stopping criterion not"
+
+
+
+ +
+ +
+ + +
+
[IterativeImputer] Ending imputation round 24/100, elapsed time 0.19
+[IterativeImputer] Ending imputation round 25/100, elapsed time 0.20
+[IterativeImputer] Ending imputation round 26/100, elapsed time 0.20
+[IterativeImputer] Ending imputation round 27/100, elapsed time 0.21
+[IterativeImputer] Ending imputation round 28/100, elapsed time 0.22
+[IterativeImputer] Ending imputation round 29/100, elapsed time 0.23
+[IterativeImputer] Ending imputation round 30/100, elapsed time 0.23
+[IterativeImputer] Ending imputation round 31/100, elapsed time 0.24
+[IterativeImputer] Ending imputation round 32/100, elapsed time 0.25
+[IterativeImputer] Ending imputation round 33/100, elapsed time 0.26
+[IterativeImputer] Ending imputation round 34/100, elapsed time 0.26
+[IterativeImputer] Ending imputation round 35/100, elapsed time 0.27
+[IterativeImputer] Ending imputation round 36/100, elapsed time 0.28
+[IterativeImputer] Ending imputation round 37/100, elapsed time 0.28
+[IterativeImputer] Ending imputation round 38/100, elapsed time 0.29
+[IterativeImputer] Ending imputation round 39/100, elapsed time 0.30
+[IterativeImputer] Ending imputation round 40/100, elapsed time 0.31
+[IterativeImputer] Ending imputation round 41/100, elapsed time 0.31
+[IterativeImputer] Ending imputation round 42/100, elapsed time 0.32
+[IterativeImputer] Ending imputation round 43/100, elapsed time 0.33
+[IterativeImputer] Ending imputation round 44/100, elapsed time 0.33
+[IterativeImputer] Ending imputation round 45/100, elapsed time 0.34
+[IterativeImputer] Ending imputation round 46/100, elapsed time 0.35
+[IterativeImputer] Ending imputation round 47/100, elapsed time 0.35
+[IterativeImputer] Ending imputation round 48/100, elapsed time 0.36
+[IterativeImputer] Ending imputation round 49/100, elapsed time 0.37
+[IterativeImputer] Ending imputation round 50/100, elapsed time 0.37
+[IterativeImputer] Ending imputation round 51/100, elapsed time 0.38
+[IterativeImputer] Ending imputation round 52/100, elapsed time 0.39
+[IterativeImputer] Ending imputation round 53/100, elapsed time 0.39
+[IterativeImputer] Ending imputation round 54/100, elapsed time 0.40
+[IterativeImputer] Ending imputation round 55/100, elapsed time 0.41
+[IterativeImputer] Ending imputation round 56/100, elapsed time 0.41
+[IterativeImputer] Ending imputation round 57/100, elapsed time 0.42
+[IterativeImputer] Ending imputation round 58/100, elapsed time 0.43
+[IterativeImputer] Ending imputation round 59/100, elapsed time 0.43
+[IterativeImputer] Ending imputation round 60/100, elapsed time 0.44
+[IterativeImputer] Ending imputation round 61/100, elapsed time 0.45
+[IterativeImputer] Ending imputation round 62/100, elapsed time 0.45
+[IterativeImputer] Ending imputation round 63/100, elapsed time 0.46
+[IterativeImputer] Ending imputation round 64/100, elapsed time 0.47
+[IterativeImputer] Ending imputation round 65/100, elapsed time 0.47
+[IterativeImputer] Ending imputation round 66/100, elapsed time 0.48
+[IterativeImputer] Ending imputation round 67/100, elapsed time 0.49
+[IterativeImputer] Ending imputation round 68/100, elapsed time 0.49
+[IterativeImputer] Ending imputation round 69/100, elapsed time 0.50
+[IterativeImputer] Ending imputation round 70/100, elapsed time 0.50
+[IterativeImputer] Ending imputation round 71/100, elapsed time 0.51
+[IterativeImputer] Ending imputation round 72/100, elapsed time 0.52
+[IterativeImputer] Ending imputation round 73/100, elapsed time 0.52
+[IterativeImputer] Ending imputation round 74/100, elapsed time 0.53
+[IterativeImputer] Ending imputation round 75/100, elapsed time 0.54
+[IterativeImputer] Ending imputation round 76/100, elapsed time 0.55
+[IterativeImputer] Ending imputation round 77/100, elapsed time 0.55
+[IterativeImputer] Ending imputation round 78/100, elapsed time 0.56
+[IterativeImputer] Ending imputation round 79/100, elapsed time 0.57
+[IterativeImputer] Ending imputation round 80/100, elapsed time 0.57
+[IterativeImputer] Ending imputation round 81/100, elapsed time 0.58
+[IterativeImputer] Ending imputation round 82/100, elapsed time 0.59
+[IterativeImputer] Ending imputation round 83/100, elapsed time 0.59
+[IterativeImputer] Ending imputation round 84/100, elapsed time 0.60
+[IterativeImputer] Ending imputation round 85/100, elapsed time 0.60
+[IterativeImputer] Ending imputation round 86/100, elapsed time 0.61
+[IterativeImputer] Ending imputation round 87/100, elapsed time 0.62
+[IterativeImputer] Ending imputation round 88/100, elapsed time 0.62
+[IterativeImputer] Ending imputation round 89/100, elapsed time 0.63
+[IterativeImputer] Ending imputation round 90/100, elapsed time 0.64
+[IterativeImputer] Ending imputation round 91/100, elapsed time 0.64
+[IterativeImputer] Ending imputation round 92/100, elapsed time 0.65
+[IterativeImputer] Ending imputation round 93/100, elapsed time 0.66
+[IterativeImputer] Ending imputation round 94/100, elapsed time 0.66
+[IterativeImputer] Ending imputation round 95/100, elapsed time 0.67
+[IterativeImputer] Ending imputation round 96/100, elapsed time 0.68
+[IterativeImputer] Ending imputation round 97/100, elapsed time 0.68
+[IterativeImputer] Ending imputation round 98/100, elapsed time 0.69
+[IterativeImputer] Ending imputation round 99/100, elapsed time 0.69
+[IterativeImputer] Ending imputation round 100/100, elapsed time 0.70
+
+
+
+ +
+
+ +
+
+
+
In [33]:
+
+
+
imputed_countryXindicator_year = impute_df(countryXindicator_year, max_iter=100, verbose=2)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (241, 6)
+[IterativeImputer] Ending imputation round 1/100, elapsed time 0.01
+[IterativeImputer] Change: 1.6443802657682207, scaled tolerance: 0.015 
+[IterativeImputer] Ending imputation round 2/100, elapsed time 0.02
+[IterativeImputer] Change: 0.0830316620808409, scaled tolerance: 0.015 
+[IterativeImputer] Ending imputation round 3/100, elapsed time 0.03
+[IterativeImputer] Change: 0.0289166641679115, scaled tolerance: 0.015 
+[IterativeImputer] Ending imputation round 4/100, elapsed time 0.03
+[IterativeImputer] Change: 0.009636184792338298, scaled tolerance: 0.015 
+[IterativeImputer] Early stopping criterion reached.
+[IterativeImputer] Completing matrix with shape (241, 6)
+[IterativeImputer] Ending imputation round 1/4, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/4, elapsed time 0.00
+[IterativeImputer] Ending imputation round 3/4, elapsed time 0.00
+[IterativeImputer] Ending imputation round 4/4, elapsed time 0.00
+
+
+
+ +
+
+ +
+
+
+
+

Convert years float to int

+
+
+
+
+
+
In [68]:
+
+
+
imputed_countryXindicator_year = imputed_countryXindicator_year.round(0).astype(int)
+
+ +
+
+
+ +
+
+
+
+

Comparison correlation plot float-precentages-data

Before Imputation

+ +
+
+
+
+
+
In [60]:
+
+
+
corr_calc = countryXindicator_float.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[60]:
+ + + + +
+
<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

After imputation

+ +
+
+
+
+
+
In [61]:
+
+
+
corr_calc = imputed_countryXindicator_float.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[61]:
+ + + + +
+
<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

Comparison correlation plot yeardata

Before imputation

+ +
+
+
+
+
+
In [53]:
+
+
+
corr_calc = countryXindicator_year.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[53]:
+ + + + +
+
<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

After imputation

+ +
+
+
+
+
+
In [54]:
+
+
+
corr_calc = imputed_countryXindicator_year.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[54]:
+ + + + +
+
<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

Merge imputed dataframes

+
+
+
+
+
+
In [55]:
+
+
+
imputed_data = imputed_countryXindicator_year.merge(imputed_countryXindicator_float, how='inner', on="Country Code")
+imputed_data
+
+ +
+
+
+ +
+
+ + +
+ +
Out[55]:
+ + + +
+

Indicator NameDuration of compulsory education (years)Official entrance age to lower secondary education (years)Official entrance age to primary education (years)Theoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)Adjusted net enrolment rate, primary, both sexes (%)Gross enrolment ratio, lower secondary, both sexes (%)Gross enrolment ratio, lower secondary, female (%)Gross enrolment ratio, lower secondary, male (%)...Population growth (annual %)Population, female (% of total)Population, male (% of total)Prevalence of HIV, total (% of population ages 15-49)Primary completion rate, both sexes (%)Primary completion rate, female (%)Primary completion rate, male (%)Unemployment, female (% of female labor force) (modeled ILO estimate)Unemployment, male (% of male labor force) (modeled ILO estimate)Unemployment, total (% of total labor force) (modeled ILO estimate)
Country Code
ABW1312665396.63654191.07285790.68907691.406452...0.52465852.46552147.5344791.41642391.12574390.75253591.42594210.3495648.7465259.324957
AFG913766387.41687467.44761749.58044184.329559...2.94323448.45455851.5454420.10000093.31278188.49887897.92035912.7000007.7000008.600000
AGO612666387.44670390.90351390.52700391.325392...3.42802150.99101049.0089901.90000092.48617292.93003092.1437306.7000005.7000006.200000
ALB911657399.516937101.48837399.889503102.980438...-0.29120649.51459950.4854010.100000106.367561104.699371107.90012417.29999917.00000017.100000
AND1012666294.59243891.28108191.06735991.467822...-1.53783649.93739850.062602-0.30109990.51069489.17508391.67197414.79872611.02389411.875731
..................................................................
XKX1012666395.41333991.08456590.72179991.429833...-1.10388649.93739750.062603-0.42303391.46488491.25030491.6197229.6624679.2055259.289344
YEM912666386.15074490.90288190.43990491.274501...2.52025449.47085650.5291440.10000092.60948192.83722492.32681532.70000111.10000016.700001
ZAF914775396.50537691.06260390.91636491.240835...1.58532550.89760449.10239618.90000091.11144491.82484090.44171927.70000123.10000025.200001
ZMB714775393.18174491.01295490.82913391.267250...3.02412350.38476649.61523412.60000091.40617091.90597090.9765308.0000007.3000007.700000
ZWE713676494.62561091.04796790.92732191.251754...2.34564351.33499748.66500313.90000091.32510491.91346790.8062845.1000005.1000005.100000
+

241 rows × 54 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [56]:
+
+
+
bigtable = imputed_data.merge(df_c, how='left', on="Country Code")
+bigtable.columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[56]:
+ + + + +
+
Index(['Country Code', 'Duration of compulsory education (years)',
+       'Official entrance age to lower secondary education (years)',
+       'Official entrance age to primary education (years)',
+       'Theoretical duration of primary education (years)',
+       'Theoretical duration of secondary education (years)',
+       'Theoretical duration of upper secondary education (years)',
+       'Adjusted net enrolment rate, primary, both sexes (%)',
+       'Gross enrolment ratio, lower secondary, both sexes (%)',
+       'Gross enrolment ratio, lower secondary, female (%)',
+       'Gross enrolment ratio, lower secondary, male (%)',
+       'Gross enrolment ratio, pre-primary, both sexes (%)',
+       'Gross enrolment ratio, pre-primary, female (%)',
+       'Gross enrolment ratio, pre-primary, male (%)',
+       'Gross enrolment ratio, primary, both sexes (%)',
+       'Gross enrolment ratio, primary, female (%)',
+       'Gross enrolment ratio, primary, gender parity index (GPI)',
+       'Gross enrolment ratio, primary, male (%)',
+       'Gross enrolment ratio, secondary, both sexes (%)',
+       'Gross enrolment ratio, secondary, female (%)',
+       'Gross enrolment ratio, secondary, gender parity index (GPI)',
+       'Gross enrolment ratio, secondary, male (%)',
+       'Gross enrolment ratio, upper secondary, both sexes (%)',
+       'Gross intake ratio to Grade 1 of primary education, both sexes (%)',
+       'Gross intake ratio to Grade 1 of primary education, female (%)',
+       'Gross intake ratio to Grade 1 of primary education, male (%)',
+       'Internet users (per 100 people)',
+       'Labor force, female (% of total labor force)',
+       'Mortality rate, under-5 (per 1,000 live births)',
+       'Net enrolment rate, primary, both sexes (%)',
+       'Percentage of enrolment in pre-primary education in private institutions (%)',
+       'Percentage of enrolment in primary education in private institutions (%)',
+       'Percentage of enrolment in secondary education in private institutions (%)',
+       'Percentage of female students enrolled in primary education who are over-age, female (%)',
+       'Percentage of male students enrolled in primary education who are over-age, male (%)',
+       'Percentage of repeaters in primary education, all grades, both sexes (%)',
+       'Percentage of repeaters in primary education, all grades, female (%)',
+       'Percentage of repeaters in primary education, all grades, male (%)',
+       'Percentage of students enrolled in primary education who are over-age, both sexes (%)',
+       'Percentage of students in pre-primary education who are female (%)',
+       'Percentage of students in primary education who are female (%)',
+       'Percentage of students in secondary education who are female (%)',
+       'Percentage of students in secondary general education who are female (%)',
+       'Population ages 0-14 (% of total)',
+       'Population ages 15-64 (% of total)', 'Population growth (annual %)',
+       'Population, female (% of total)', 'Population, male (% of total)',
+       'Prevalence of HIV, total (% of population ages 15-49)',
+       'Primary completion rate, both sexes (%)',
+       'Primary completion rate, female (%)',
+       'Primary completion rate, male (%)',
+       'Unemployment, female (% of female labor force) (modeled ILO estimate)',
+       'Unemployment, male (% of male labor force) (modeled ILO estimate)',
+       'Unemployment, total (% of total labor force) (modeled ILO estimate)',
+       'Short Name', 'Table Name', 'Long Name', '2-alpha code',
+       'Currency Unit', 'Special Notes', 'Region', 'Income Group', 'WB-2 code',
+       'National accounts base year', 'National accounts reference year',
+       'SNA price valuation', 'Lending category', 'Other groups',
+       'System of National Accounts', 'Alternative conversion factor',
+       'PPP survey year', 'Balance of Payments Manual in use',
+       'External debt Reporting status', 'System of trade',
+       'Government Accounting concept', 'IMF data dissemination standard',
+       'Latest population census', 'Latest household survey',
+       'Source of most recent Income and expenditure data',
+       'Vital registration complete', 'Latest agricultural census',
+       'Latest industrial data', 'Latest trade data',
+       'Latest water withdrawal data', 'Unnamed: 31'],
+      dtype='object')
+
+ +
+ +
+
+ +
+
+
+
In [57]:
+
+
+
bigtable
+
+ +
+
+
+ +
+
+ + +
+ +
Out[57]:
+ + + +
+

Country CodeDuration of compulsory education (years)Official entrance age to lower secondary education (years)Official entrance age to primary education (years)Theoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)Adjusted net enrolment rate, primary, both sexes (%)Gross enrolment ratio, lower secondary, both sexes (%)Gross enrolment ratio, lower secondary, female (%)...IMF data dissemination standardLatest population censusLatest household surveySource of most recent Income and expenditure dataVital registration completeLatest agricultural censusLatest industrial dataLatest trade dataLatest water withdrawal dataUnnamed: 31
0ABW1312665396.63654191.07285790.689076...NaN2010NaNNaNYesNaNNaN2012.0NaNNaN
1AFG913766387.41687467.44761749.580441...General Data Dissemination System (GDDS)1979Multiple Indicator Cluster Survey (MICS), 2010/11Integrated household survey (IHS), 2008NaN2013/14NaN2012.02000NaN
2AGO612666387.44670390.90351390.527003...General Data Dissemination System (GDDS)1970Malaria Indicator Survey (MIS), 2011Integrated household survey (IHS), 2008NaN2015NaNNaN2005NaN
3ALB911657399.516937101.48837399.889503...General Data Dissemination System (GDDS)2011Demographic and Health Survey (DHS), 2008/09Living Standards Measurement Study Survey (LSM...Yes20122010.02012.02006NaN
4AND1012666294.59243891.28108191.067359...NaN2011. Population figures compiled from adminis...NaNNaNYesNaNNaN2006.0NaNNaN
..................................................................
236XKX1012666395.41333991.08456590.721799...General Data Dissemination System (GDDS)2011NaNIntegrated household survey (IHS), 2011NaNNaNNaNNaNNaNNaN
237YEM912666386.15074490.90288190.439904...General Data Dissemination System (GDDS)2004Demographic and Health Survey (DHS), 2013Expenditure survey/budget survey (ES/BS), 2005NaNNaN2006.02012.02005NaN
238ZAF914775396.50537691.06260390.916364...Special Data Dissemination Standard (SDDS)2011Demographic and Health Survey (DHS), 2003; Wor...Expenditure survey/budget survey (ES/BS), 2010NaN20072010.02012.02000NaN
239ZMB714775393.18174491.01295490.829133...General Data Dissemination System (GDDS)2010Demographic and Health Survey (DHS), 2013Integrated household survey (IHS), 2010NaN2010. Population and Housing Census.NaN2011.02002NaN
240ZWE713676494.62561091.04796790.927321...General Data Dissemination System (GDDS)2012Demographic and Health Survey (DHS), 2010/11Integrated household survey (IHS), 2011/12NaNNaNNaN2012.02002NaN
+

241 rows × 86 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [32]:
+
+
+
bigtable.to_csv("data/unlabeled/edstats_preprocessed.csv")
+
+ +
+
+
+ +
+
+ + +
+ +
Out[32]:
+ + + +
+

Country CodeAdjusted net enrolment rate, primary, both sexes (%)Age population, age 0, female, UNESCOAge population, age 0, total, UNESCOAge population, age 01, female, UNESCOAge population, age 01, total, UNESCOAge population, age 02, female, UNESCOAge population, age 02, total, UNESCOAge population, age 03, female, UNESCOAge population, age 03, total, UNESCO...IMF data dissemination standardLatest population censusLatest household surveySource of most recent Income and expenditure dataVital registration completeLatest agricultural censusLatest industrial dataLatest trade dataLatest water withdrawal dataUnnamed: 31
0ABW99.725971542.0000001.112000e+03572.0000001.170000e+03600.0000001.224000e+03623.0000001.269000e+03...NaN2010NaNNaNYesNaNNaN2012.0NaNNaN
1AFG5789.706514682677.0000001.403010e+06650389.0000001.338500e+06620880.0000001.279403e+06593957.0000001.225345e+06...General Data Dissemination System (GDDS)1979Multiple Indicator Cluster Survey (MICS), 2010/11Integrated household survey (IHS), 2008NaN2013/14NaN2012.02000NaN
2AGO5521.646445414919.0000008.320930e+05396537.0000007.939500e+05379705.0000007.591640e+05364295.0000007.274550e+05...General Data Dissemination System (GDDS)1970Malaria Indicator Survey (MIS), 2011Integrated household survey (IHS), 2008NaN2015NaNNaN2005NaN
3ALB5797.34007523697.0000004.910500e+0424026.0000004.970900e+0424243.0000005.010400e+0424360.0000005.031500e+04...General Data Dissemination System (GDDS)2011Demographic and Health Survey (DHS), 2008/09Living Standards Measurement Study Survey (LSM...Yes20122010.02012.02006NaN
4AND1449.292555-278072.572576-6.716407e+05-178040.068286-7.673874e+05-251546.996823-4.877597e+05-173803.626626-3.196188e+05...NaN2011. Population figures compiled from adminis...NaNNaNYesNaNNaN2006.0NaNNaN
..................................................................
236XKX5764.457814-518585.303776-1.045572e+06-375532.499402-9.679352e+05-384853.639003-6.279717e+05-275863.980435-4.325407e+05...General Data Dissemination System (GDDS)2011NaNIntegrated household survey (IHS), 2011NaNNaNNaNNaNNaNNaN
237YEM5710.914403458859.0000009.360530e+05452601.0000009.230170e+05444988.0000009.072520e+05436223.0000008.891710e+05...General Data Dissemination System (GDDS)2004Demographic and Health Survey (DHS), 2013Expenditure survey/budget survey (ES/BS), 2005NaNNaN2006.02012.02005NaN
238ZAF4555.118457484535.0000009.806630e+05486901.0000009.852150e+05489516.0000009.901940e+05492299.0000009.954450e+05...Special Data Dissemination Standard (SDDS)2011Demographic and Health Survey (DHS), 2003; Wor...Expenditure survey/budget survey (ES/BS), 2010NaN20072010.02012.02000NaN
239ZMB5747.949748226616.0000004.573290e+05220950.0000004.454510e+05215890.0000004.348760e+05211367.0000004.254540e+05...General Data Dissemination System (GDDS)2010Demographic and Health Survey (DHS), 2013Integrated household survey (IHS), 2010NaN2010. Population and Housing Census.NaN2011.02002NaN
240ZWE5806.205918181422.0000003.647610e+05179761.0000003.611010e+05177982.0000003.572610e+05176130.0000003.533210e+05...General Data Dissemination System (GDDS)2012Demographic and Health Survey (DHS), 2010/11Integrated household survey (IHS), 2011/12NaNNaNNaN2012.02002NaN
+

241 rows × 635 columns

+
+
+ +
+ +
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/prep_edstats.html b/documentation/WaterSecurity/notebooks/prep_edstats.html new file mode 100644 index 0000000..b4d8c6c --- /dev/null +++ b/documentation/WaterSecurity/notebooks/prep_edstats.html @@ -0,0 +1,16665 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
import pandas as pd
+import sklearn
+from helpers import *
+import matplotlib.pyplot as plt
+import sys
+sys.path.append("..")
+from data.unlabeled.raw import edstats_co as df_c, edstats_da as df_d, edstats_se as df_s
+import seaborn as sns
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
..\data\unlabeled\raw\__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)
+..\data\unlabeled\raw\__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)
+..\data\unlabeled\raw\__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)
+
+
+
+ +
+
+ +
+
+
+
+

Remember to extract Edstats_csv.zip to Edstats_csv

+
+
+
+
+
+
In [2]:
+
+
+
df_d.columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[2]:
+ + + + +
+
Index(['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code',
+       '1970', '1971', '1972', '1973', '1974', '1975', '1976', '1977', '1978',
+       '1979', '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987',
+       '1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996',
+       '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005',
+       '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013', '2014',
+       '2015', '2016', '2017', '2020', '2025', '2030', '2035', '2040', '2045',
+       '2050', '2055', '2060', '2065', '2070', '2075', '2080', '2085', '2090',
+       '2095', '2100', 'Unnamed: 69'],
+      dtype='object')
+
+ +
+ +
+
+ +
+
+
+
In [3]:
+
+
+
years = ['2015', '2016', '2017', '2020']
+stripped_df_d = df_d[['Country Code','Indicator Code',*years]]
+print("Non na values in col 2020:",stripped_df_d['2020'].count())
+stripped_df_d
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Non na values in col 2020: 51436
+
+
+
+ +
+ +
Out[3]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Country CodeIndicator Code2015201620172020
0ARBUIS.NERA.2NaNNaNNaNNaN
1ARBUIS.NERA.2.FNaNNaNNaNNaN
2ARBUIS.NERA.2.GPINaNNaNNaNNaN
3ARBUIS.NERA.2.MNaNNaNNaNNaN
4ARBSE.PRM.TENRNaNNaNNaNNaN
.....................
886925ZWEUIS.LP.AG15T24.MNaNNaNNaNNaN
886926ZWESE.ADT.1524.LT.ZSNaNNaNNaNNaN
886927ZWESE.ADT.1524.LT.FE.ZSNaNNaNNaNNaN
886928ZWESE.ADT.1524.LT.FM.ZSNaNNaNNaNNaN
886929ZWESE.ADT.1524.LT.MA.ZSNaNNaNNaNNaN
+

886930 rows × 6 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Merge colums to remove null values

+
+
+
+
+
+
In [4]:
+
+
+
for year in years[:-1]:
+    fill_missing_with_column(stripped_df_d, '2020',year)
+print("Non na values in col 2020:", stripped_df_d['2020'].count())
+stripped_df_d
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Non na values in col 2020: 132991
+
+
+
+ +
+ +
+ + +
+
C:\Users\joach\code-projects\WaterSecurity\unlabeled_preprocessing\helpers.py:15: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame.
+Try using .loc[row_indexer,col_indexer] = value instead
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  df[into] = df[into].combine_first(df[fro])
+C:\Users\joach\.conda\envs\wsenv\lib\site-packages\pandas\core\frame.py:4308: SettingWithCopyWarning: 
+A value is trying to be set on a copy of a slice from a DataFrame
+
+See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
+  return super().drop(
+
+
+
+ +
+ +
Out[4]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Country CodeIndicator Code2020
0ARBUIS.NERA.2NaN
1ARBUIS.NERA.2.FNaN
2ARBUIS.NERA.2.GPINaN
3ARBUIS.NERA.2.MNaN
4ARBSE.PRM.TENRNaN
............
886925ZWEUIS.LP.AG15T24.MNaN
886926ZWESE.ADT.1524.LT.ZSNaN
886927ZWESE.ADT.1524.LT.FE.ZSNaN
886928ZWESE.ADT.1524.LT.FM.ZSNaN
886929ZWESE.ADT.1524.LT.MA.ZSNaN
+

886930 rows × 3 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [5]:
+
+
+
df_d_withseries = stripped_df_d.merge(df_s, how='left', left_on='Indicator Code', right_on='Series Code')
+df_d_withseries.drop(['Indicator Code'], inplace=True, axis=1)
+
+ +
+
+
+ +
+
+
+
In [6]:
+
+
+
df_d_withseries.columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[6]:
+ + + + +
+
Index(['Country Code', '2020', 'Series Code', 'Topic', 'Indicator Name',
+       'Short definition', 'Long definition', 'Unit of measure', 'Periodicity',
+       'Base Period', 'Other notes', 'Aggregation method',
+       'Limitations and exceptions', 'Notes from original source',
+       'General comments', 'Source', 'Statistical concept and methodology',
+       'Development relevance', 'Related source links', 'Other web links',
+       'Related indicators', 'License Type', 'Unnamed: 20'],
+      dtype='object')
+
+ +
+ +
+
+ +
+
+
+
+

Rearange table to indicator on column

+
+
+
+
+
+
In [7]:
+
+
+
countryXindicator = df_d_withseries.pivot_table('2020',['Country Code'],'Indicator Name')
+countryXindicator
+
+ +
+
+
+ +
+
+ + +
+ +
Out[7]:
+ + + +
+

Indicator NameAdjusted net enrolment rate, lower secondary, both sexes (%)Adjusted net enrolment rate, lower secondary, female (%)Adjusted net enrolment rate, lower secondary, gender parity index (GPI)Adjusted net enrolment rate, lower secondary, male (%)Adjusted net enrolment rate, primary, both sexes (%)Adjusted net enrolment rate, primary, female (%)Adjusted net enrolment rate, primary, gender parity index (GPI)Adjusted net enrolment rate, primary, male (%)Adjusted net enrolment rate, upper secondary, both sexes (%)Adjusted net enrolment rate, upper secondary, female (%)...Under-age enrolment ratio in secondary education, female (%)Under-age enrolment ratio in secondary education, male (%)Unemployment, female (% of female labor force) (modeled ILO estimate)Unemployment, male (% of male labor force) (modeled ILO estimate)Unemployment, total (% of total labor force) (modeled ILO estimate)Youth illiterate population, 15-24 years, % femaleYouth literacy rate, population 15-24 years, both sexes (%)Youth literacy rate, population 15-24 years, female (%)Youth literacy rate, population 15-24 years, gender parity index (GPI)Youth literacy rate, population 15-24 years, male (%)
Country Code
ABWNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AFGNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN12.7000007.78.600000NaNNaNNaNNaNNaN
AGONaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN6.7000005.76.200000NaNNaNNaNNaNNaN
ALBNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN17.29999917.017.100000NaNNaNNaNNaNNaN
ANDNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaN47.80890100.00000100.000001.00000100.00000
..................................................................
XKXNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEMNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN32.70000111.116.700001NaNNaNNaNNaNNaN
ZAFNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN27.70000123.125.20000134.3480898.9557899.229041.0055298.68459
ZMBNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN8.0000007.37.700000NaNNaNNaNNaNNaN
ZWENaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaN5.1000005.15.100000NaNNaNNaNNaNNaN
+

241 rows × 2047 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Find columns that contain "number", "Projection" or "$"

+
+
+
+
+
+
In [8]:
+
+
+
projection_col = [colname for colname in countryXindicator.columns if "Projection" in colname]
+number = [colname for colname in countryXindicator.columns if "number" in colname]
+dollar = [colname for colname in countryXindicator.columns if "$" in colname]
+
+ +
+
+
+ +
+
+
+
+

And remove them

+
+
+
+
+
+
In [9]:
+
+
+
countryXindicator.drop(projection_col + number+ dollar,inplace=True,axis=1)
+countryXindicator.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[9]:
+ + + + +
+
(241, 1502)
+
+ +
+ +
+
+ +
+
+
+
+

Remove population data

+
+
+
+
+
+
In [10]:
+
+
+
countryXindicator.drop(['Population, total'], axis=1, inplace=True)
+
+ +
+
+
+ +
+
+
+
+

See how much of the data is missing and drop columns where more than 50% of the values are null

+
+
+
+
+
+
In [11]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 99.5850622406639 %
+Min: 3.7344398340248963 %
+Mean: 81.31204369977415 %
+
+
+
+ +
+ +
Out[11]:
+ + + + +
+
(3.7344398340248963, 99.5850622406639)
+
+ +
+ +
+
+ +
+
+
+
In [12]:
+
+
+
dropColumnHalf(countryXindicator)
+
+ +
+
+
+ +
+
+
+
In [13]:
+
+
+
print_missing_percentages(countryXindicator)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 49.79253112033195 %
+Min: 3.7344398340248963 %
+Mean: 26.60212079299217 %
+
+
+
+ +
+ +
Out[13]:
+ + + + +
+
(3.7344398340248963, 49.79253112033195)
+
+ +
+ +
+
+ +
+
+
+
In [14]:
+
+
+
countryXindicator.shape
+
+ +
+
+
+ +
+
+ + +
+ +
Out[14]:
+ + + + +
+
(241, 243)
+
+ +
+ +
+
+ +
+
+
+
In [15]:
+
+
+
 countryXindicator
+
+ +
+
+
+ +
+
+ + +
+ +
Out[15]:
+ + + +
+

Indicator NameAdjusted net enrolment rate, primary, both sexes (%)Age population, age 0, female, UNESCOAge population, age 0, total, UNESCOAge population, age 01, female, UNESCOAge population, age 01, total, UNESCOAge population, age 02, female, UNESCOAge population, age 02, total, UNESCOAge population, age 03, female, UNESCOAge population, age 03, total, UNESCOAge population, age 04, female, UNESCO...Prevalence of HIV, total (% of population ages 15-49)Primary completion rate, both sexes (%)Primary completion rate, female (%)Primary completion rate, male (%)Theoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)Unemployment, female (% of female labor force) (modeled ILO estimate)Unemployment, male (% of male labor force) (modeled ILO estimate)Unemployment, total (% of total labor force) (modeled ILO estimate)
Country Code
ABWNaN542.01112.0572.01170.0600.01224.0623.01269.0643.0...NaNNaNNaNNaN6.05.03.0NaNNaNNaN
AFGNaN682677.01403010.0650389.01338500.0620880.01279403.0593957.01225345.0569425.0...0.1NaNNaNNaN6.06.03.012.7000007.78.600000
AGONaN414919.0832093.0396537.0793950.0379705.0759164.0364295.0727455.0350180.0...1.9NaNNaNNaN6.06.03.06.7000005.76.200000
ALBNaN23697.049105.024026.049709.024243.050104.024360.050315.024397.0...0.1106.367561104.699371107.9001245.07.03.017.29999917.017.100000
ANDNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaN6.06.02.0NaNNaNNaN
..................................................................
XKXNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEMNaN458859.0936053.0452601.0923017.0444988.0907252.0436223.0889171.0426514.0...0.1NaNNaNNaN6.06.03.032.70000111.116.700001
ZAFNaN484535.0980663.0486901.0985215.0489516.0990194.0492299.0995445.0495165.0...18.9NaNNaNNaN7.05.03.027.70000123.125.200001
ZMBNaN226616.0457329.0220950.0445451.0215890.0434876.0211367.0425454.0207311.0...12.6NaNNaNNaN7.05.03.08.0000007.37.700000
ZWENaN181422.0364761.0179761.0361101.0177982.0357261.0176130.0353321.0174240.0...13.9NaNNaNNaN7.06.04.05.1000005.15.100000
+

241 rows × 243 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Split dataset into a dataframe containing year-data and a dataframe for the float-precentages-data

This i done for better imputation results

+ +
+
+
+
+
+
In [16]:
+
+
+
int_cols = find_all_integer_columns(countryXindicator)
+
+ +
+
+
+ +
+
+
+
In [17]:
+
+
+
countryXindicator_float = countryXindicator.drop(int_cols, axis=1)
+print(countryXindicator_float.index.to_list())
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
['ABW', 'AFG', 'AGO', 'ALB', 'AND', 'ARB', 'ARE', 'ARG', 'ARM', 'ASM', 'ATG', 'AUS', 'AUT', 'AZE', 'BDI', 'BEL', 'BEN', 'BFA', 'BGD', 'BGR', 'BHR', 'BHS', 'BIH', 'BLR', 'BLZ', 'BMU', 'BOL', 'BRA', 'BRB', 'BRN', 'BTN', 'BWA', 'CAF', 'CAN', 'CHE', 'CHI', 'CHL', 'CHN', 'CIV', 'CMR', 'COD', 'COG', 'COL', 'COM', 'CPV', 'CRI', 'CUB', 'CUW', 'CYM', 'CYP', 'CZE', 'DEU', 'DJI', 'DMA', 'DNK', 'DOM', 'DZA', 'EAP', 'EAS', 'ECA', 'ECS', 'ECU', 'EGY', 'EMU', 'ERI', 'ESP', 'EST', 'ETH', 'EUU', 'FIN', 'FJI', 'FRA', 'FRO', 'FSM', 'GAB', 'GBR', 'GEO', 'GHA', 'GIB', 'GIN', 'GMB', 'GNB', 'GNQ', 'GRC', 'GRD', 'GRL', 'GTM', 'GUM', 'GUY', 'HIC', 'HKG', 'HND', 'HPC', 'HRV', 'HTI', 'HUN', 'IDN', 'IMN', 'IND', 'IRL', 'IRN', 'IRQ', 'ISL', 'ISR', 'ITA', 'JAM', 'JOR', 'JPN', 'KAZ', 'KEN', 'KGZ', 'KHM', 'KIR', 'KNA', 'KOR', 'KWT', 'LAC', 'LAO', 'LBN', 'LBR', 'LBY', 'LCA', 'LCN', 'LDC', 'LIC', 'LIE', 'LKA', 'LMC', 'LMY', 'LSO', 'LTU', 'LUX', 'LVA', 'MAC', 'MAR', 'MCO', 'MDA', 'MDG', 'MDV', 'MEA', 'MEX', 'MHL', 'MIC', 'MKD', 'MLI', 'MLT', 'MMR', 'MNA', 'MNE', 'MNG', 'MNP', 'MOZ', 'MRT', 'MUS', 'MWI', 'MYS', 'NAC', 'NAM', 'NCL', 'NER', 'NGA', 'NIC', 'NLD', 'NOR', 'NPL', 'NRU', 'NZL', 'OED', 'OMN', 'PAK', 'PAN', 'PER', 'PHL', 'PLW', 'PNG', 'POL', 'PRI', 'PRK', 'PRT', 'PRY', 'PSE', 'PYF', 'QAT', 'ROU', 'RUS', 'RWA', 'SAS', 'SAU', 'SDN', 'SEN', 'SGP', 'SLB', 'SLE', 'SLV', 'SMR', 'SOM', 'SRB', 'SSA', 'SSD', 'SSF', 'STP', 'SUR', 'SVK', 'SVN', 'SWE', 'SWZ', 'SXM', 'SYC', 'SYR', 'TCA', 'TCD', 'TGO', 'THA', 'TJK', 'TKM', 'TLS', 'TON', 'TTO', 'TUN', 'TUR', 'TUV', 'TZA', 'UGA', 'UKR', 'UMC', 'URY', 'USA', 'UZB', 'VCT', 'VEN', 'VGB', 'VIR', 'VNM', 'VUT', 'WLD', 'WSM', 'XKX', 'YEM', 'ZAF', 'ZMB', 'ZWE']
+
+
+
+ +
+
+ +
+
+
+
In [18]:
+
+
+
countryXindicator_year = countryXindicator[int_cols]
+countryXindicator_year
+
+ +
+
+
+ +
+
+ + +
+ +
Out[18]:
+ + + +
+

Indicator NameAge population, age 0, female, UNESCOAge population, age 0, total, UNESCOAge population, age 01, female, UNESCOAge population, age 01, total, UNESCOAge population, age 02, female, UNESCOAge population, age 02, total, UNESCOAge population, age 03, female, UNESCOAge population, age 03, total, UNESCOAge population, age 04, female, UNESCOAge population, age 04, total, UNESCO...Population, ages 7-13, malePopulation, ages 7-13, totalPopulation, ages 7-9, femalePopulation, ages 7-9, malePopulation, ages 7-9, totalPopulation, femalePopulation, maleTheoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)
Country Code
ABW542.01112.0572.01170.0600.01224.0623.01269.0643.01310.0...5152.010124.02095.02169.04264.054743.049598.06.05.03.0
AFG682677.01403010.0650389.01338500.0620880.01279403.0593957.01225345.0569425.01175957.0...3481981.06728457.01475542.01580498.03056040.016346869.017389625.06.06.03.0
AGO414919.0832093.0396537.0793950.0379705.0759164.0364295.0727455.0350180.0698548.0...1988558.03995534.0913109.0904577.01817686.014205741.013653564.06.06.03.0
ALB23697.049105.024026.049709.024243.050104.024360.050315.024397.050378.0...179160.0346732.072246.077107.0149353.01426369.01454334.05.07.03.0
ANDNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaN6.06.02.0
..................................................................
XKXNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEM458859.0936053.0452601.0923017.0444988.0907252.0436223.0889171.0426514.0869195.0...2614331.05137180.01147011.01189006.02336017.013315678.013600529.06.06.03.0
ZAF484535.0980663.0486901.0985215.0489516.0990194.0492299.0995445.0495165.01000810.0...3615273.07175459.01516934.01543083.03060017.027999778.027012199.07.05.03.0
ZMB226616.0457329.0220950.0445451.0215890.0434876.0211367.0425454.0207311.0417041.0...1334582.02657396.0583300.0588617.01171917.08112243.07988344.07.05.03.0
ZWE181422.0364761.0179761.0361101.0177982.0357261.0176130.0353321.0174240.0349351.0...1156372.02310111.0501606.0502943.01004549.08099354.07678097.07.06.04.0
+

241 rows × 195 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [19]:
+
+
+
countryXindicator_year = countryXindicator_year[[colname for colname in countryXindicator_year.columns if "years" in colname]]
+countryXindicator_year
+
+ +
+
+
+ +
+
+ + +
+ +
Out[19]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Indicator NameDuration of compulsory education (years)Official entrance age to lower secondary education (years)Official entrance age to primary education (years)Theoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)
Country Code
ABW13.012.06.06.05.03.0
AFG9.013.07.06.06.03.0
AGO6.012.06.06.06.03.0
ALB9.011.06.05.07.03.0
AND10.012.06.06.06.02.0
.....................
XKXNaNNaNNaNNaNNaNNaN
YEM9.012.06.06.06.03.0
ZAF9.014.07.07.05.03.0
ZMB7.014.07.07.05.03.0
ZWE7.013.06.07.06.04.0
+

241 rows × 6 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Impute both dataframes

+
+
+
+
+
+
In [20]:
+
+
+
imputed_countryXindicator_float = impute_df(countryXindicator_float, max_iter=100, verbose=2)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (241, 48)
+[IterativeImputer] Ending imputation round 1/100, elapsed time 0.23
+[IterativeImputer] Change: 185.571939565928, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 2/100, elapsed time 0.42
+[IterativeImputer] Change: 84.11443482287976, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 3/100, elapsed time 0.62
+[IterativeImputer] Change: 40.004149972043805, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 4/100, elapsed time 0.82
+[IterativeImputer] Change: 20.57199335577451, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 5/100, elapsed time 1.01
+[IterativeImputer] Change: 12.447852809025877, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 6/100, elapsed time 1.21
+[IterativeImputer] Change: 10.982023559420153, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 7/100, elapsed time 1.41
+[IterativeImputer] Change: 9.335158590713679, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 8/100, elapsed time 1.62
+[IterativeImputer] Change: 8.566696528569699, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 9/100, elapsed time 1.83
+[IterativeImputer] Change: 7.68743911493866, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 10/100, elapsed time 2.03
+[IterativeImputer] Change: 6.844171756725839, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 11/100, elapsed time 2.24
+[IterativeImputer] Change: 6.065545725434555, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 12/100, elapsed time 2.44
+[IterativeImputer] Change: 5.36616411150322, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 13/100, elapsed time 2.64
+[IterativeImputer] Change: 4.744028755257943, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 14/100, elapsed time 2.84
+[IterativeImputer] Change: 4.1901923804245715, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 15/100, elapsed time 3.04
+[IterativeImputer] Change: 3.70294072516266, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 16/100, elapsed time 3.24
+[IterativeImputer] Change: 3.2688619527108522, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 17/100, elapsed time 3.44
+[IterativeImputer] Change: 2.88345738531942, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 18/100, elapsed time 3.64
+[IterativeImputer] Change: 2.532294563247444, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 19/100, elapsed time 3.84
+[IterativeImputer] Change: 2.2119869217502828, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 20/100, elapsed time 4.05
+[IterativeImputer] Change: 1.9202443611257713, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 21/100, elapsed time 4.26
+[IterativeImputer] Change: 1.6606990496546086, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 22/100, elapsed time 4.46
+[IterativeImputer] Change: 1.6559282826221846, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 23/100, elapsed time 4.66
+[IterativeImputer] Change: 1.6520065848505072, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 24/100, elapsed time 4.87
+[IterativeImputer] Change: 1.648782063252183, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 25/100, elapsed time 5.07
+[IterativeImputer] Change: 1.6461831356141814, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 26/100, elapsed time 5.27
+[IterativeImputer] Change: 1.644058054001174, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 27/100, elapsed time 5.48
+[IterativeImputer] Change: 1.6423013079998188, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 28/100, elapsed time 5.68
+[IterativeImputer] Change: 1.6408741216707456, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 29/100, elapsed time 5.87
+[IterativeImputer] Change: 1.6396788922926566, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 30/100, elapsed time 6.08
+[IterativeImputer] Change: 1.638655100233116, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 31/100, elapsed time 6.28
+[IterativeImputer] Change: 1.6377695134752297, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 32/100, elapsed time 6.48
+[IterativeImputer] Change: 1.6369872536064, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 33/100, elapsed time 6.68
+[IterativeImputer] Change: 1.636221623251295, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 34/100, elapsed time 6.89
+[IterativeImputer] Change: 1.6355083766661642, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 35/100, elapsed time 7.09
+[IterativeImputer] Change: 1.634821858203819, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 36/100, elapsed time 7.30
+[IterativeImputer] Change: 1.6341370631609828, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 37/100, elapsed time 7.50
+[IterativeImputer] Change: 1.6334747186241456, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 38/100, elapsed time 7.72
+[IterativeImputer] Change: 1.645652531918897, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 39/100, elapsed time 7.92
+[IterativeImputer] Change: 1.652182711035818, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 40/100, elapsed time 8.13
+[IterativeImputer] Change: 1.6532662904419548, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 41/100, elapsed time 8.33
+[IterativeImputer] Change: 1.649451831600862, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 42/100, elapsed time 8.51
+[IterativeImputer] Change: 1.6408222980663139, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 43/100, elapsed time 8.72
+[IterativeImputer] Change: 1.6294400496414387, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 44/100, elapsed time 8.92
+[IterativeImputer] Change: 1.6287678524729357, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 45/100, elapsed time 9.12
+[IterativeImputer] Change: 1.6281047469396985, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 46/100, elapsed time 9.32
+[IterativeImputer] Change: 1.6274834308429378, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 47/100, elapsed time 9.52
+[IterativeImputer] Change: 1.6270130873718072, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 48/100, elapsed time 9.72
+[IterativeImputer] Change: 1.6262940758389055, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 49/100, elapsed time 9.95
+[IterativeImputer] Change: 1.6257297763508762, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 50/100, elapsed time 10.16
+[IterativeImputer] Change: 1.6252298538321177, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 51/100, elapsed time 10.36
+[IterativeImputer] Change: 1.624722930666042, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 52/100, elapsed time 10.57
+[IterativeImputer] Change: 1.6242577689697895, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 53/100, elapsed time 10.78
+[IterativeImputer] Change: 1.6238212014114874, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 54/100, elapsed time 10.99
+[IterativeImputer] Change: 1.6234519318135652, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 55/100, elapsed time 11.19
+[IterativeImputer] Change: 1.6231115358163457, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 56/100, elapsed time 11.40
+[IterativeImputer] Change: 1.6228249713969374, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 57/100, elapsed time 11.59
+[IterativeImputer] Change: 1.6225670613424206, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 58/100, elapsed time 11.80
+[IterativeImputer] Change: 1.6222797867936543, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 59/100, elapsed time 12.00
+[IterativeImputer] Change: 1.6221342960698018, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 60/100, elapsed time 12.20
+[IterativeImputer] Change: 1.6219870713467206, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 61/100, elapsed time 12.39
+[IterativeImputer] Change: 1.6218646076142422, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 62/100, elapsed time 12.60
+[IterativeImputer] Change: 1.6217730363655758, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 63/100, elapsed time 12.80
+[IterativeImputer] Change: 1.621725788928061, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 64/100, elapsed time 13.00
+[IterativeImputer] Change: 1.6216920357302036, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 65/100, elapsed time 13.21
+[IterativeImputer] Change: 1.621698990708198, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 66/100, elapsed time 13.41
+[IterativeImputer] Change: 1.6217532944875095, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 67/100, elapsed time 13.61
+[IterativeImputer] Change: 1.6218227224444455, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 68/100, elapsed time 13.82
+[IterativeImputer] Change: 1.6218972339192113, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 69/100, elapsed time 14.01
+[IterativeImputer] Change: 1.6232902366910664, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 70/100, elapsed time 14.21
+[IterativeImputer] Change: 1.6249616723710512, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 71/100, elapsed time 14.41
+[IterativeImputer] Change: 1.6261259356463242, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 72/100, elapsed time 14.62
+[IterativeImputer] Change: 1.6281859862659571, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 73/100, elapsed time 14.82
+[IterativeImputer] Change: 1.63006029323919, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 74/100, elapsed time 15.03
+[IterativeImputer] Change: 1.6318311593012644, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 75/100, elapsed time 15.24
+[IterativeImputer] Change: 1.6336233225534909, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 76/100, elapsed time 15.44
+[IterativeImputer] Change: 1.6353960973093213, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 77/100, elapsed time 15.65
+[IterativeImputer] Change: 1.637182751172711, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 78/100, elapsed time 15.86
+[IterativeImputer] Change: 1.6389758111697363, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 79/100, elapsed time 16.06
+[IterativeImputer] Change: 1.6407763042253765, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 80/100, elapsed time 16.25
+[IterativeImputer] Change: 1.6425834310407716, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 81/100, elapsed time 16.45
+[IterativeImputer] Change: 1.6443968858747002, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 82/100, elapsed time 16.67
+[IterativeImputer] Change: 1.64625348689472, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 83/100, elapsed time 16.87
+[IterativeImputer] Change: 1.6481429500945106, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 84/100, elapsed time 17.07
+[IterativeImputer] Change: 1.6500518120410161, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 85/100, elapsed time 17.28
+[IterativeImputer] Change: 1.6519548194561064, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 86/100, elapsed time 17.48
+[IterativeImputer] Change: 1.6538680162736423, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 87/100, elapsed time 17.68
+[IterativeImputer] Change: 1.6552679864808504, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 88/100, elapsed time 17.88
+[IterativeImputer] Change: 1.6574626185722092, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 89/100, elapsed time 18.09
+[IterativeImputer] Change: 1.6594748268725177, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 90/100, elapsed time 18.30
+[IterativeImputer] Change: 1.6613749089305434, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 91/100, elapsed time 18.50
+[IterativeImputer] Change: 1.6632615294758013, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 92/100, elapsed time 18.70
+[IterativeImputer] Change: 1.6651267664591594, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 93/100, elapsed time 18.90
+[IterativeImputer] Change: 1.6669733028122995, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 94/100, elapsed time 19.11
+[IterativeImputer] Change: 1.6688041142649566, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 95/100, elapsed time 19.31
+[IterativeImputer] Change: 1.6706225132059456, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 96/100, elapsed time 19.52
+[IterativeImputer] Change: 1.672451264542782, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 97/100, elapsed time 19.72
+[IterativeImputer] Change: 1.6742766711767634, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 98/100, elapsed time 19.93
+[IterativeImputer] Change: 1.6761039010597747, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 99/100, elapsed time 20.13
+[IterativeImputer] Change: 1.6778935337913161, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Ending imputation round 100/100, elapsed time 20.33
+[IterativeImputer] Change: 1.679678914280904, scaled tolerance: 0.19410198974609402 
+[IterativeImputer] Completing matrix with shape (241, 48)
+[IterativeImputer] Ending imputation round 1/100, elapsed time 0.01
+[IterativeImputer] Ending imputation round 2/100, elapsed time 0.02
+[IterativeImputer] Ending imputation round 3/100, elapsed time 0.02
+[IterativeImputer] Ending imputation round 4/100, elapsed time 0.03
+[IterativeImputer] Ending imputation round 5/100, elapsed time 0.04
+[IterativeImputer] Ending imputation round 6/100, elapsed time 0.05
+[IterativeImputer] Ending imputation round 7/100, elapsed time 0.05
+[IterativeImputer] Ending imputation round 8/100, elapsed time 0.06
+[IterativeImputer] Ending imputation round 9/100, elapsed time 0.07
+[IterativeImputer] Ending imputation round 10/100, elapsed time 0.07
+[IterativeImputer] Ending imputation round 11/100, elapsed time 0.08
+[IterativeImputer] Ending imputation round 12/100, elapsed time 0.09
+[IterativeImputer] Ending imputation round 13/100, elapsed time 0.09
+[IterativeImputer] Ending imputation round 14/100, elapsed time 0.10
+[IterativeImputer] Ending imputation round 15/100, elapsed time 0.11
+[IterativeImputer] Ending imputation round 16/100, elapsed time 0.12
+[IterativeImputer] Ending imputation round 17/100, elapsed time 0.12
+[IterativeImputer] Ending imputation round 18/100, elapsed time 0.13
+[IterativeImputer] Ending imputation round 19/100, elapsed time 0.14
+[IterativeImputer] Ending imputation round 20/100, elapsed time 0.14
+[IterativeImputer] Ending imputation round 21/100, elapsed time 0.15
+[IterativeImputer] Ending imputation round 22/100, elapsed time 0.16
+[IterativeImputer] Ending imputation round 23/100, elapsed time 0.16
+[IterativeImputer] Ending imputation round 24/100, elapsed time 0.17
+[IterativeImputer] Ending imputation round 25/100, elapsed time 0.18
+[IterativeImputer] Ending imputation round 26/100, elapsed time 0.19
+
+
+
+ +
+ +
+ + +
+
C:\Users\joach\.conda\envs\wsenv\lib\site-packages\sklearn\impute\_iterative.py:685: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
+  warnings.warn("[IterativeImputer] Early stopping criterion not"
+
+
+
+ +
+ +
+ + +
+
[IterativeImputer] Ending imputation round 27/100, elapsed time 0.20
+[IterativeImputer] Ending imputation round 28/100, elapsed time 0.20
+[IterativeImputer] Ending imputation round 29/100, elapsed time 0.21
+[IterativeImputer] Ending imputation round 30/100, elapsed time 0.22
+[IterativeImputer] Ending imputation round 31/100, elapsed time 0.22
+[IterativeImputer] Ending imputation round 32/100, elapsed time 0.23
+[IterativeImputer] Ending imputation round 33/100, elapsed time 0.24
+[IterativeImputer] Ending imputation round 34/100, elapsed time 0.24
+[IterativeImputer] Ending imputation round 35/100, elapsed time 0.25
+[IterativeImputer] Ending imputation round 36/100, elapsed time 0.25
+[IterativeImputer] Ending imputation round 37/100, elapsed time 0.26
+[IterativeImputer] Ending imputation round 38/100, elapsed time 0.27
+[IterativeImputer] Ending imputation round 39/100, elapsed time 0.27
+[IterativeImputer] Ending imputation round 40/100, elapsed time 0.28
+[IterativeImputer] Ending imputation round 41/100, elapsed time 0.28
+[IterativeImputer] Ending imputation round 42/100, elapsed time 0.29
+[IterativeImputer] Ending imputation round 43/100, elapsed time 0.30
+[IterativeImputer] Ending imputation round 44/100, elapsed time 0.30
+[IterativeImputer] Ending imputation round 45/100, elapsed time 0.31
+[IterativeImputer] Ending imputation round 46/100, elapsed time 0.31
+[IterativeImputer] Ending imputation round 47/100, elapsed time 0.32
+[IterativeImputer] Ending imputation round 48/100, elapsed time 0.33
+[IterativeImputer] Ending imputation round 49/100, elapsed time 0.33
+[IterativeImputer] Ending imputation round 50/100, elapsed time 0.34
+[IterativeImputer] Ending imputation round 51/100, elapsed time 0.34
+[IterativeImputer] Ending imputation round 52/100, elapsed time 0.35
+[IterativeImputer] Ending imputation round 53/100, elapsed time 0.36
+[IterativeImputer] Ending imputation round 54/100, elapsed time 0.36
+[IterativeImputer] Ending imputation round 55/100, elapsed time 0.37
+[IterativeImputer] Ending imputation round 56/100, elapsed time 0.37
+[IterativeImputer] Ending imputation round 57/100, elapsed time 0.38
+[IterativeImputer] Ending imputation round 58/100, elapsed time 0.39
+[IterativeImputer] Ending imputation round 59/100, elapsed time 0.39
+[IterativeImputer] Ending imputation round 60/100, elapsed time 0.40
+[IterativeImputer] Ending imputation round 61/100, elapsed time 0.41
+[IterativeImputer] Ending imputation round 62/100, elapsed time 0.41
+[IterativeImputer] Ending imputation round 63/100, elapsed time 0.42
+[IterativeImputer] Ending imputation round 64/100, elapsed time 0.42
+[IterativeImputer] Ending imputation round 65/100, elapsed time 0.43
+[IterativeImputer] Ending imputation round 66/100, elapsed time 0.44
+[IterativeImputer] Ending imputation round 67/100, elapsed time 0.44
+[IterativeImputer] Ending imputation round 68/100, elapsed time 0.45
+[IterativeImputer] Ending imputation round 69/100, elapsed time 0.46
+[IterativeImputer] Ending imputation round 70/100, elapsed time 0.46
+[IterativeImputer] Ending imputation round 71/100, elapsed time 0.47
+[IterativeImputer] Ending imputation round 72/100, elapsed time 0.48
+[IterativeImputer] Ending imputation round 73/100, elapsed time 0.48
+[IterativeImputer] Ending imputation round 74/100, elapsed time 0.49
+[IterativeImputer] Ending imputation round 75/100, elapsed time 0.49
+[IterativeImputer] Ending imputation round 76/100, elapsed time 0.50
+[IterativeImputer] Ending imputation round 77/100, elapsed time 0.51
+[IterativeImputer] Ending imputation round 78/100, elapsed time 0.51
+[IterativeImputer] Ending imputation round 79/100, elapsed time 0.52
+[IterativeImputer] Ending imputation round 80/100, elapsed time 0.52
+[IterativeImputer] Ending imputation round 81/100, elapsed time 0.53
+[IterativeImputer] Ending imputation round 82/100, elapsed time 0.54
+[IterativeImputer] Ending imputation round 83/100, elapsed time 0.54
+[IterativeImputer] Ending imputation round 84/100, elapsed time 0.55
+[IterativeImputer] Ending imputation round 85/100, elapsed time 0.55
+[IterativeImputer] Ending imputation round 86/100, elapsed time 0.56
+[IterativeImputer] Ending imputation round 87/100, elapsed time 0.57
+[IterativeImputer] Ending imputation round 88/100, elapsed time 0.58
+[IterativeImputer] Ending imputation round 89/100, elapsed time 0.58
+[IterativeImputer] Ending imputation round 90/100, elapsed time 0.59
+[IterativeImputer] Ending imputation round 91/100, elapsed time 0.59
+[IterativeImputer] Ending imputation round 92/100, elapsed time 0.60
+[IterativeImputer] Ending imputation round 93/100, elapsed time 0.61
+[IterativeImputer] Ending imputation round 94/100, elapsed time 0.62
+[IterativeImputer] Ending imputation round 95/100, elapsed time 0.62
+[IterativeImputer] Ending imputation round 96/100, elapsed time 0.63
+[IterativeImputer] Ending imputation round 97/100, elapsed time 0.64
+[IterativeImputer] Ending imputation round 98/100, elapsed time 0.64
+[IterativeImputer] Ending imputation round 99/100, elapsed time 0.65
+[IterativeImputer] Ending imputation round 100/100, elapsed time 0.65
+
+
+
+ +
+
+ +
+
+
+
In [21]:
+
+
+
imputed_countryXindicator_year = impute_df(countryXindicator_year, max_iter=100, verbose=2)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (241, 6)
+[IterativeImputer] Ending imputation round 1/100, elapsed time 0.01
+[IterativeImputer] Change: 1.6443802657682207, scaled tolerance: 0.015 
+[IterativeImputer] Ending imputation round 2/100, elapsed time 0.02
+[IterativeImputer] Change: 0.0830316620808409, scaled tolerance: 0.015 
+[IterativeImputer] Ending imputation round 3/100, elapsed time 0.03
+[IterativeImputer] Change: 0.0289166641679115, scaled tolerance: 0.015 
+[IterativeImputer] Ending imputation round 4/100, elapsed time 0.04
+[IterativeImputer] Change: 0.009636184792338298, scaled tolerance: 0.015 
+[IterativeImputer] Early stopping criterion reached.
+[IterativeImputer] Completing matrix with shape (241, 6)
+[IterativeImputer] Ending imputation round 1/4, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/4, elapsed time 0.00
+[IterativeImputer] Ending imputation round 3/4, elapsed time 0.00
+[IterativeImputer] Ending imputation round 4/4, elapsed time 0.00
+
+
+
+ +
+
+ +
+
+
+
+

Convert years float to int

+
+
+
+
+
+
In [22]:
+
+
+
imputed_countryXindicator_year = imputed_countryXindicator_year.round(0).astype(int)
+
+ +
+
+
+ +
+
+
+
+

Comparison correlation plot float-precentages-data

Before Imputation

+ +
+
+
+
+
+
In [23]:
+
+
+
corr_calc = countryXindicator_float.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[23]:
+ + + + +
+
<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

After imputation

+ +
+
+
+
+
+
In [24]:
+
+
+
corr_calc = imputed_countryXindicator_float.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, xticklabels=False, yticklabels=False, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[24]:
+ + + + +
+
<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

Comparison correlation plot yeardata

Before imputation

+ +
+
+
+
+
+
In [25]:
+
+
+
corr_calc = countryXindicator_year.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[25]:
+ + + + +
+
<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

After imputation

+ +
+
+
+
+
+
In [26]:
+
+
+
corr_calc = imputed_countryXindicator_year.corr()
+sns.heatmap(corr_calc, vmin=-1, vmax=1, center=0, cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[26]:
+ + + + +
+
<AxesSubplot:xlabel='Indicator Name', ylabel='Indicator Name'>
+
+ +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+
+ +
+
+
+
+

Merge imputed dataframes

+
+
+
+
+
+
In [27]:
+
+
+
imputed_data = imputed_countryXindicator_year.merge(imputed_countryXindicator_float, how='inner', on="Country Code")
+imputed_data
+
+ +
+
+
+ +
+
+ + +
+ +
Out[27]:
+ + + +
+

Indicator NameDuration of compulsory education (years)Official entrance age to lower secondary education (years)Official entrance age to primary education (years)Theoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)Adjusted net enrolment rate, primary, both sexes (%)Gross enrolment ratio, lower secondary, both sexes (%)Gross enrolment ratio, lower secondary, female (%)Gross enrolment ratio, lower secondary, male (%)...Population growth (annual %)Population, female (% of total)Population, male (% of total)Prevalence of HIV, total (% of population ages 15-49)Primary completion rate, both sexes (%)Primary completion rate, female (%)Primary completion rate, male (%)Unemployment, female (% of female labor force) (modeled ILO estimate)Unemployment, male (% of male labor force) (modeled ILO estimate)Unemployment, total (% of total labor force) (modeled ILO estimate)
Country Code
ABW1312665396.63654191.07285790.68907691.406452...0.52465852.46552147.5344791.41642391.12574390.75253591.42594210.3495648.7465259.324957
AFG913766387.41687467.44761749.58044184.329559...2.94323448.45455851.5454420.10000093.31278188.49887897.92035912.7000007.7000008.600000
AGO612666387.44670390.90351390.52700391.325392...3.42802150.99101049.0089901.90000092.48617292.93003092.1437306.7000005.7000006.200000
ALB911657399.516937101.48837399.889503102.980438...-0.29120649.51459950.4854010.100000106.367561104.699371107.90012417.29999917.00000017.100000
AND1012666294.59243891.28108191.06735991.467822...-1.53783649.93739850.062602-0.30109990.51069489.17508391.67197414.79872611.02389411.875731
..................................................................
XKX1012666395.41333991.08456590.72179991.429833...-1.10388649.93739750.062603-0.42303391.46488491.25030491.6197229.6624679.2055259.289344
YEM912666386.15074490.90288190.43990491.274501...2.52025449.47085650.5291440.10000092.60948192.83722492.32681532.70000111.10000016.700001
ZAF914775396.50537691.06260390.91636491.240835...1.58532550.89760449.10239618.90000091.11144491.82484090.44171927.70000123.10000025.200001
ZMB714775393.18174491.01295490.82913391.267250...3.02412350.38476649.61523412.60000091.40617091.90597090.9765308.0000007.3000007.700000
ZWE713676494.62561091.04796790.92732191.251754...2.34564351.33499748.66500313.90000091.32510491.91346790.8062845.1000005.1000005.100000
+

241 rows × 54 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [28]:
+
+
+
bigtable = imputed_data.merge(df_c, how='left', on="Country Code")
+bigtable.columns
+
+ +
+
+
+ +
+
+ + +
+ +
Out[28]:
+ + + + +
+
Index(['Country Code', 'Duration of compulsory education (years)',
+       'Official entrance age to lower secondary education (years)',
+       'Official entrance age to primary education (years)',
+       'Theoretical duration of primary education (years)',
+       'Theoretical duration of secondary education (years)',
+       'Theoretical duration of upper secondary education (years)',
+       'Adjusted net enrolment rate, primary, both sexes (%)',
+       'Gross enrolment ratio, lower secondary, both sexes (%)',
+       'Gross enrolment ratio, lower secondary, female (%)',
+       'Gross enrolment ratio, lower secondary, male (%)',
+       'Gross enrolment ratio, pre-primary, both sexes (%)',
+       'Gross enrolment ratio, pre-primary, female (%)',
+       'Gross enrolment ratio, pre-primary, male (%)',
+       'Gross enrolment ratio, primary, both sexes (%)',
+       'Gross enrolment ratio, primary, female (%)',
+       'Gross enrolment ratio, primary, gender parity index (GPI)',
+       'Gross enrolment ratio, primary, male (%)',
+       'Gross enrolment ratio, secondary, both sexes (%)',
+       'Gross enrolment ratio, secondary, female (%)',
+       'Gross enrolment ratio, secondary, gender parity index (GPI)',
+       'Gross enrolment ratio, secondary, male (%)',
+       'Gross enrolment ratio, upper secondary, both sexes (%)',
+       'Gross intake ratio to Grade 1 of primary education, both sexes (%)',
+       'Gross intake ratio to Grade 1 of primary education, female (%)',
+       'Gross intake ratio to Grade 1 of primary education, male (%)',
+       'Internet users (per 100 people)',
+       'Labor force, female (% of total labor force)',
+       'Mortality rate, under-5 (per 1,000 live births)',
+       'Net enrolment rate, primary, both sexes (%)',
+       'Percentage of enrolment in pre-primary education in private institutions (%)',
+       'Percentage of enrolment in primary education in private institutions (%)',
+       'Percentage of enrolment in secondary education in private institutions (%)',
+       'Percentage of female students enrolled in primary education who are over-age, female (%)',
+       'Percentage of male students enrolled in primary education who are over-age, male (%)',
+       'Percentage of repeaters in primary education, all grades, both sexes (%)',
+       'Percentage of repeaters in primary education, all grades, female (%)',
+       'Percentage of repeaters in primary education, all grades, male (%)',
+       'Percentage of students enrolled in primary education who are over-age, both sexes (%)',
+       'Percentage of students in pre-primary education who are female (%)',
+       'Percentage of students in primary education who are female (%)',
+       'Percentage of students in secondary education who are female (%)',
+       'Percentage of students in secondary general education who are female (%)',
+       'Population ages 0-14 (% of total)',
+       'Population ages 15-64 (% of total)', 'Population growth (annual %)',
+       'Population, female (% of total)', 'Population, male (% of total)',
+       'Prevalence of HIV, total (% of population ages 15-49)',
+       'Primary completion rate, both sexes (%)',
+       'Primary completion rate, female (%)',
+       'Primary completion rate, male (%)',
+       'Unemployment, female (% of female labor force) (modeled ILO estimate)',
+       'Unemployment, male (% of male labor force) (modeled ILO estimate)',
+       'Unemployment, total (% of total labor force) (modeled ILO estimate)',
+       'Short Name', 'Table Name', 'Long Name', '2-alpha code',
+       'Currency Unit', 'Special Notes', 'Region', 'Income Group', 'WB-2 code',
+       'National accounts base year', 'National accounts reference year',
+       'SNA price valuation', 'Lending category', 'Other groups',
+       'System of National Accounts', 'Alternative conversion factor',
+       'PPP survey year', 'Balance of Payments Manual in use',
+       'External debt Reporting status', 'System of trade',
+       'Government Accounting concept', 'IMF data dissemination standard',
+       'Latest population census', 'Latest household survey',
+       'Source of most recent Income and expenditure data',
+       'Vital registration complete', 'Latest agricultural census',
+       'Latest industrial data', 'Latest trade data',
+       'Latest water withdrawal data', 'Unnamed: 31'],
+      dtype='object')
+
+ +
+ +
+
+ +
+
+
+
In [29]:
+
+
+
bigtable.set_index('Country Code', inplace=True)
+
+ +
+
+
+ +
+
+
+
In [30]:
+
+
+
bigtable
+
+ +
+
+
+ +
+
+ + +
+ +
Out[30]:
+ + + +
+

Duration of compulsory education (years)Official entrance age to lower secondary education (years)Official entrance age to primary education (years)Theoretical duration of primary education (years)Theoretical duration of secondary education (years)Theoretical duration of upper secondary education (years)Adjusted net enrolment rate, primary, both sexes (%)Gross enrolment ratio, lower secondary, both sexes (%)Gross enrolment ratio, lower secondary, female (%)Gross enrolment ratio, lower secondary, male (%)...IMF data dissemination standardLatest population censusLatest household surveySource of most recent Income and expenditure dataVital registration completeLatest agricultural censusLatest industrial dataLatest trade dataLatest water withdrawal dataUnnamed: 31
Country Code
ABW1312665396.63654191.07285790.68907691.406452...NaN2010NaNNaNYesNaNNaN2012.0NaNNaN
AFG913766387.41687467.44761749.58044184.329559...General Data Dissemination System (GDDS)1979Multiple Indicator Cluster Survey (MICS), 2010/11Integrated household survey (IHS), 2008NaN2013/14NaN2012.02000NaN
AGO612666387.44670390.90351390.52700391.325392...General Data Dissemination System (GDDS)1970Malaria Indicator Survey (MIS), 2011Integrated household survey (IHS), 2008NaN2015NaNNaN2005NaN
ALB911657399.516937101.48837399.889503102.980438...General Data Dissemination System (GDDS)2011Demographic and Health Survey (DHS), 2008/09Living Standards Measurement Study Survey (LSM...Yes20122010.02012.02006NaN
AND1012666294.59243891.28108191.06735991.467822...NaN2011. Population figures compiled from adminis...NaNNaNYesNaNNaN2006.0NaNNaN
..................................................................
XKX1012666395.41333991.08456590.72179991.429833...General Data Dissemination System (GDDS)2011NaNIntegrated household survey (IHS), 2011NaNNaNNaNNaNNaNNaN
YEM912666386.15074490.90288190.43990491.274501...General Data Dissemination System (GDDS)2004Demographic and Health Survey (DHS), 2013Expenditure survey/budget survey (ES/BS), 2005NaNNaN2006.02012.02005NaN
ZAF914775396.50537691.06260390.91636491.240835...Special Data Dissemination Standard (SDDS)2011Demographic and Health Survey (DHS), 2003; Wor...Expenditure survey/budget survey (ES/BS), 2010NaN20072010.02012.02000NaN
ZMB714775393.18174491.01295490.82913391.267250...General Data Dissemination System (GDDS)2010Demographic and Health Survey (DHS), 2013Integrated household survey (IHS), 2010NaN2010. Population and Housing Census.NaN2011.02002NaN
ZWE713676494.62561091.04796790.92732191.251754...General Data Dissemination System (GDDS)2012Demographic and Health Survey (DHS), 2010/11Integrated household survey (IHS), 2011/12NaNNaNNaN2012.02002NaN
+

241 rows × 85 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [31]:
+
+
+
bigtable.to_csv("../data/unlabeled/preprocessed/edstats_preprocessed.csv")
+
+ +
+
+
+ +
+
+
+
In [ ]:
+
+
+
 
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/prep_hdro_v2-checkpoint.html b/documentation/WaterSecurity/notebooks/prep_hdro_v2-checkpoint.html new file mode 100644 index 0000000..8f3ab4f --- /dev/null +++ b/documentation/WaterSecurity/notebooks/prep_hdro_v2-checkpoint.html @@ -0,0 +1,85672 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [159]:
+
+
+
import pandas as pd 
+from pandas.plotting import scatter_matrix
+import numpy as np
+import json
+from helpers import *
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+import sys
+import matplotlib.pyplot as plt
+import seaborn as sns
+!pip install plotly
+import plotly.express as px
+sys.path.append("..")
+from data.unlabeled import hdro_inicator_values as inicator_values, hdro_country_name as country_name, hdro_indicator_name as indicator_name
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Requirement already satisfied: plotly in /opt/anaconda3/lib/python3.7/site-packages (4.14.3)
+Requirement already satisfied: retrying>=1.3.3 in /opt/anaconda3/lib/python3.7/site-packages (from plotly) (1.3.3)
+Requirement already satisfied: six in /opt/anaconda3/lib/python3.7/site-packages (from plotly) (1.12.0)
+
+
+
+ +
+
+ +
+
+
+
In [148]:
+
+
+
#print(inicator_values)
+#print(country_name)
+#print(indicator_name)
+
+ +
+
+
+ +
+
+
+
+

Unwrap values to create good df

+
+
+
+
+
+
In [149]:
+
+
+
df_inicator_values = pd.DataFrame(inicator_values).T
+df_inicator_values.columns = [indicator_name[nm] for nm in df_inicator_values.columns]
+df_inicator_values = df_inicator_values.applymap(lambda x: x['2019'] if pd.notnull(x) else x)
+df_inicator_values
+
+ +
+
+
+ +
+
+ + +
+ +
Out[149]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gross enrolment ratio, pre-primary (% of preschool-age children)Percentage of primary schools with access to the internetPercentage of secondary schools with access to the internetGross enrolment ratio, tertiary (% of tertiary school-age population)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, female (%)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, male (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are female (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are male (%)Primary school teachers trained to teach (%)Pupil-teacher ratio, primary school (pupils per teacher)
AFG26.08013.22036.9201.9486.00627.24468.95779.72638.04225.8...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AGO30.23223.13338.0564.0236.35930.000150.52665.99531.82566.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ALB93.17493.70092.4979.70210.61429.50819.64252.8522.88161.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AND72.32771.48473.32710.43910.56446.429NaNNaN0.07788.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ARG57.15859.16154.82811.12310.72939.87762.78221.80544.78192.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
WSM74.94279.12771.583NaNNaN10.00023.88629.9830.19718.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEM28.02019.92036.9182.8805.1460.97160.35245.62729.16237.3...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZAF75.47874.97778.20710.03110.29145.33367.90810.29858.55866.9...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZMB44.44038.48854.0686.2838.17617.964120.11278.13417.86144.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZWE64.93559.79270.7838.0668.92334.57186.13564.73914.64532.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
+

195 rows × 98 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Display the max, min and mean number of null values per column

+
+
+
+
+
+
In [150]:
+
+
+
print_missing_percentages(df_inicator_values)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 98.97435897435898 %
+Min: 0.0 %
+Mean: 37.566718995290415 %
+
+
+
+ +
+ +
Out[150]:
+ + + + +
+
(0.0, 98.97435897435898)
+
+ +
+ +
+
+ +
+
+
+
+

There is a high number of missing values, we therefore remove columns where more than 50% of the data is missing

+
+
+
+
+
+
In [151]:
+
+
+
dropColumnHalf(df_inicator_values)
+
+ +
+
+
+ +
+
+
+
+

Again see the max, min and mean number of missing values per column

+
+
+
+
+
+
In [152]:
+
+
+
min_missing, max_missing = print_missing_percentages(df_inicator_values)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 49.743589743589745 %
+Min: 0.0 %
+Mean: 10.616150019135096 %
+
+
+
+ +
+
+ +
+
+
+
In [153]:
+
+
+
df_inicator_values.describe()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[153]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
count175.000000167.000000167.000000174.000000174.000000193.000000185.000000180.000000195.000000195.000000...167.000000178.000000178.000000167.000000167.000000156.000000152.000000156.000000152.000000152.000000
mean61.06807461.73601265.8235878.4800179.13340222.98129548.30934638.25801139.39142659.257436...0.93899414440.99851124458.3313540.7026830.7421860.54506419.38914523.40138519.0025990.595250
std29.61079829.28405026.3953583.4213322.80298411.82878440.52883427.774415146.48585523.231038...0.07455915359.93598623943.4826280.1658670.1431940.1727899.9487189.7440049.7773240.190002
min0.0000001.7380009.0000001.0700002.2560000.1000000.2830000.1440000.01100013.200000...0.488000186.041000640.1050000.2700000.4320000.1760004.4440008.5000004.4240000.232000
25%37.29650036.92500045.3565005.8500006.73375014.76500013.17700012.5382502.08100041.200000...0.9085002925.6142506275.9337500.5770000.6215000.40475010.79225016.60300010.5940000.431500
50%64.82800068.06700070.6820009.1110009.25250021.09400040.53600032.5335008.77200060.000000...0.9650008399.44250016951.3570000.7300000.7600000.52750017.93450021.77950017.5260000.604000
75%89.14500087.99050090.91650011.23050011.55100030.00000070.50400063.34550028.56250078.000000...0.98600022583.77950035488.4760000.8315000.8485000.69175027.61525028.62500027.0125000.767250
max100.000000100.000000100.00000013.88200014.43100055.660000186.53800094.5810001433.784000100.000000...1.03600071387.276000107833.0290000.9490000.9650000.85800045.30700056.99600044.1670000.899000
+

8 rows × 67 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [43]:
+
+
+
#Find columns that only contain integers or null values
+#find_all_integer_columns(df_inicator_values)
+
+ +
+
+
+ +
+
+
+
In [154]:
+
+
+
df_inicator_values.columns.values
+
+ +
+
+
+ +
+
+ + +
+ +
Out[154]:
+ + + + +
+
array(['Population with at least some secondary education (% ages 25 and older)',
+       'Population with at least some secondary education, female (% ages 25 and older)',
+       'Population with at least some secondary education, male (% ages 25 and older)',
+       'Mean years of schooling, female (years)',
+       'Mean years of schooling, male (years)',
+       'Share of seats in parliament (% held by women)',
+       'Adolescent birth rate (births per 1,000 women ages 15-19)',
+       'Vulnerable employment (% of total employment)',
+       'Total population (millions)', 'Urban population (%)',
+       'Labour force participation rate (% ages 15 and older), female',
+       'Labour force participation rate (% ages 15 and older), male',
+       'Sex ratio at birth (male to female births)',
+       'Remittances, inflows (% of GDP)',
+       'Foreign direct investment, net inflows (% of GDP)',
+       'Population ages 15?64 (millions)',
+       'Infants lacking immunization, measles (% of one-year-olds)',
+       'Infants lacking immunization, DTP (% of one-year-olds)',
+       'Gross fixed capital formation (% of GDP)',
+       'Gender Inequality Index (GII)',
+       'Life expectancy at birth (years)',
+       'Expected years of schooling (years)',
+       'Inequality-adjusted education index',
+       'Inequality-adjusted life expectancy index',
+       'Inequality in education (%)', 'Inequality in life expectancy (%)',
+       'Mean years of schooling (years)', 'Life expectancy index',
+       'Income index', 'Education index',
+       'Unemployment, youth (% ages 15?24)',
+       'Private capital flows (% of GDP)',
+       'Life expectancy at birth, female (years)',
+       'Life expectancy at birth, male (years)',
+       'Young age (0-14) dependency ratio (per 100 people ages 15-64)',
+       'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)',
+       'Expected years of schooling, female (years)',
+       'Expected years of schooling, male (years)',
+       'Population ages 65 and older (millions)',
+       'Population under age 5 (millions)',
+       'Exports and imports (% of GDP)', 'Human Development Index (HDI)',
+       'Unemployment, total (% of labour force)', 'HDI rank',
+       'Youth not in school or employment (% ages 15-24)',
+       'Labour force participation rate (% ages 15 and older)',
+       'Employment to population ratio (% ages 15 and older)',
+       'Employment in agriculture (% of total employment)',
+       'Employment in services (% of total employment)',
+       'Working poor at PPP$3.20 a day (% of total employment)',
+       'Total unemployment rate (female to male ratio)',
+       'Youth unemployment rate (female to male ratio)',
+       'Share of employment in nonagriculture, female (% of total employment in nonagriculture)',
+       'Gross capital formation (% of GDP)',
+       'Gross domestic product (GDP), total (2017 PPP $ billions)',
+       'GDP per capita (2017 PPP $)',
+       'Gross national income (GNI) per capita (constant 2017 PPP$)',
+       'Gender Development Index (GDI)',
+       'Estimated gross national income per capita, female (2017 PPP $)',
+       'Estimated gross national income per capita, male (2017 PPP $)',
+       'Human Development Index (HDI), female',
+       'Human Development Index (HDI), male',
+       'Inequality-adjusted income index',
+       'Overall loss in HDI due to inequality (%)',
+       'Inequality in income (%)', 'Coefficient of human inequality',
+       'Inequality-adjusted HDI (IHDI)'], dtype=object)
+
+ +
+ +
+
+ +
+
+
+
In [161]:
+
+
+
size = df_inicator_values.shape[1]
+corr = df_inicator_values.corr()
+plt.subplots(figsize=(20,20))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size),cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[161]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f8a6180c0d0>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [167]:
+
+
+
percentage_columns = []
+money_columns = []
+index_columns = []
+year_columns = []
+rest = []
+
+for column in df_inicator_values.columns.values:
+    if '%' in column:
+        percentage_columns.append(column)
+    elif '$' in column:
+        money_columns.append(column)
+    elif 'years' in column:
+        year_columns.append(column)
+    elif 'index' in column.lower():
+        index_columns.append(column)
+    else:
+        rest.append(column)
+
+print(percentage_columns,'\n',len(percentage_columns))
+print(money_columns,'\n',len(money_columns))
+print(index_columns,'\n',len(index_columns))
+print(year_columns,'\n',len(year_columns))
+print(rest,'\n',len(rest))
+
+print(len(percentage_columns)+len(money_columns)+len(index_columns)+len(year_columns)+len(rest))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
['Population with at least some secondary education (% ages 25 and older)', 'Population with at least some secondary education, female (% ages 25 and older)', 'Population with at least some secondary education, male (% ages 25 and older)', 'Share of seats in parliament (% held by women)', 'Vulnerable employment (% of total employment)', 'Urban population (%)', 'Labour force participation rate (% ages 15 and older), female', 'Labour force participation rate (% ages 15 and older), male', 'Remittances, inflows (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Infants lacking immunization, measles (% of one-year-olds)', 'Infants lacking immunization, DTP (% of one-year-olds)', 'Gross fixed capital formation (% of GDP)', 'Inequality in education (%)', 'Inequality in life expectancy (%)', 'Unemployment, youth (% ages 15?24)', 'Private capital flows (% of GDP)', 'Exports and imports (% of GDP)', 'Unemployment, total (% of labour force)', 'Youth not in school or employment (% ages 15-24)', 'Labour force participation rate (% ages 15 and older)', 'Employment to population ratio (% ages 15 and older)', 'Employment in agriculture (% of total employment)', 'Employment in services (% of total employment)', 'Working poor at PPP$3.20 a day (% of total employment)', 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 'Gross capital formation (% of GDP)', 'Overall loss in HDI due to inequality (%)', 'Inequality in income (%)'] 
+ 29
+['Gross domestic product (GDP), total (2017 PPP $ billions)', 'GDP per capita (2017 PPP $)', 'Gross national income (GNI) per capita (constant 2017 PPP$)', 'Estimated gross national income per capita, female (2017 PPP $)', 'Estimated gross national income per capita, male (2017 PPP $)'] 
+ 5
+['Gender Inequality Index (GII)', 'Inequality-adjusted education index', 'Inequality-adjusted life expectancy index', 'Life expectancy index', 'Income index', 'Education index', 'Human Development Index (HDI)', 'Gender Development Index (GDI)', 'Human Development Index (HDI), female', 'Human Development Index (HDI), male', 'Inequality-adjusted income index'] 
+ 11
+['Mean years of schooling, female (years)', 'Mean years of schooling, male (years)', 'Life expectancy at birth (years)', 'Expected years of schooling (years)', 'Mean years of schooling (years)', 'Life expectancy at birth, female (years)', 'Life expectancy at birth, male (years)', 'Expected years of schooling, female (years)', 'Expected years of schooling, male (years)'] 
+ 9
+['Adolescent birth rate (births per 1,000 women ages 15-19)', 'Total population (millions)', 'Sex ratio at birth (male to female births)', 'Population ages 15?64 (millions)', 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 'Population ages 65 and older (millions)', 'Population under age 5 (millions)', 'HDI rank', 'Total unemployment rate (female to male ratio)', 'Youth unemployment rate (female to male ratio)', 'Coefficient of human inequality', 'Inequality-adjusted HDI (IHDI)'] 
+ 13
+67
+
+
+
+ +
+
+ +
+
+
+
+

Dataframe is splitted

+
+
+
+
+
+
In [168]:
+
+
+
split_df1 = df_inicator_values[percentage_columns]
+split_df2 = df_inicator_values[money_columns]
+split_df3 = df_inicator_values[index_columns]
+split_df4 = df_inicator_values[year_columns]
+split_df5 = df_inicator_values[rest]
+
+ +
+
+
+ +
+
+
+
In [170]:
+
+
+
print('- Dataframe 1 -')
+min1, max1 = print_missing_percentages(split_df1)
+print('- Dataframe 2 -')
+min2, max2 = print_missing_percentages(split_df2)
+print('- Dataframe 3 -')
+min3, max3 = print_missing_percentages(split_df3)
+print('- Dataframe 4 -')
+min4, max4 = print_missing_percentages(split_df4)
+print('- Dataframe 5 -')
+min5, max5 = print_missing_percentages(split_df5)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
- Dataframe 1 -
+Max, min and mean number of missing values for the columns
+Max: 49.743589743589745 %
+Min: 0.0 %
+Mean: 14.624226348364274 %
+- Dataframe 2 -
+Max, min and mean number of missing values for the columns
+Max: 8.717948717948717 %
+Min: 2.051282051282051 %
+Mean: 6.153846153846153 %
+- Dataframe 3 -
+Max, min and mean number of missing values for the columns
+Max: 20.0 %
+Min: 2.051282051282051 %
+Mean: 9.790209790209792 %
+- Dataframe 4 -
+Max, min and mean number of missing values for the columns
+Max: 10.76923076923077 %
+Min: 1.0256410256410255 %
+Mean: 5.584045584045585 %
+- Dataframe 5 -
+Max, min and mean number of missing values for the columns
+Max: 22.05128205128205 %
+Min: 0.0 %
+Mean: 7.57396449704142 %
+
+
+
+ +
+
+ +
+
+
+
+

Imputation of the individual datasets

idf --> stands for imputed dataframe

+
+
+
+
+
+
In [176]:
+
+
+
idf1 = impute_df(split_df1, max_iter= int(max1), verbose=2)
+size = idf1.shape[1]
+corr = idf1.corr()
+plt.subplots(figsize=(20,20))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 29)
+[IterativeImputer] Ending imputation round 1/49, elapsed time 0.59
+[IterativeImputer] Ending imputation round 2/49, elapsed time 0.64
+[IterativeImputer] Ending imputation round 3/49, elapsed time 0.70
+[IterativeImputer] Ending imputation round 4/49, elapsed time 0.76
+[IterativeImputer] Ending imputation round 5/49, elapsed time 0.83
+[IterativeImputer] Ending imputation round 6/49, elapsed time 0.89
+[IterativeImputer] Ending imputation round 7/49, elapsed time 0.94
+[IterativeImputer] Ending imputation round 8/49, elapsed time 1.00
+[IterativeImputer] Ending imputation round 9/49, elapsed time 1.06
+[IterativeImputer] Ending imputation round 10/49, elapsed time 1.13
+[IterativeImputer] Ending imputation round 11/49, elapsed time 1.18
+[IterativeImputer] Ending imputation round 12/49, elapsed time 1.23
+[IterativeImputer] Ending imputation round 13/49, elapsed time 1.30
+[IterativeImputer] Ending imputation round 14/49, elapsed time 1.36
+[IterativeImputer] Ending imputation round 15/49, elapsed time 1.42
+[IterativeImputer] Ending imputation round 16/49, elapsed time 1.48
+[IterativeImputer] Ending imputation round 17/49, elapsed time 1.58
+[IterativeImputer] Ending imputation round 18/49, elapsed time 1.65
+[IterativeImputer] Ending imputation round 19/49, elapsed time 1.71
+[IterativeImputer] Ending imputation round 20/49, elapsed time 1.78
+[IterativeImputer] Ending imputation round 21/49, elapsed time 1.88
+[IterativeImputer] Ending imputation round 22/49, elapsed time 1.94
+[IterativeImputer] Ending imputation round 23/49, elapsed time 2.04
+[IterativeImputer] Ending imputation round 24/49, elapsed time 2.13
+[IterativeImputer] Ending imputation round 25/49, elapsed time 2.21
+[IterativeImputer] Ending imputation round 26/49, elapsed time 2.28
+[IterativeImputer] Ending imputation round 27/49, elapsed time 2.46
+[IterativeImputer] Ending imputation round 28/49, elapsed time 2.53
+[IterativeImputer] Ending imputation round 29/49, elapsed time 2.60
+[IterativeImputer] Ending imputation round 30/49, elapsed time 2.66
+[IterativeImputer] Ending imputation round 31/49, elapsed time 2.76
+[IterativeImputer] Ending imputation round 32/49, elapsed time 2.85
+[IterativeImputer] Ending imputation round 33/49, elapsed time 2.95
+[IterativeImputer] Ending imputation round 34/49, elapsed time 3.06
+[IterativeImputer] Ending imputation round 35/49, elapsed time 3.16
+[IterativeImputer] Ending imputation round 36/49, elapsed time 3.23
+[IterativeImputer] Ending imputation round 37/49, elapsed time 3.29
+[IterativeImputer] Ending imputation round 38/49, elapsed time 3.36
+[IterativeImputer] Ending imputation round 39/49, elapsed time 3.40
+[IterativeImputer] Ending imputation round 40/49, elapsed time 3.46
+[IterativeImputer] Ending imputation round 41/49, elapsed time 3.55
+[IterativeImputer] Ending imputation round 42/49, elapsed time 3.60
+[IterativeImputer] Ending imputation round 43/49, elapsed time 3.66
+[IterativeImputer] Ending imputation round 44/49, elapsed time 3.71
+[IterativeImputer] Ending imputation round 45/49, elapsed time 3.77
+[IterativeImputer] Ending imputation round 46/49, elapsed time 3.83
+[IterativeImputer] Ending imputation round 47/49, elapsed time 3.90
+[IterativeImputer] Ending imputation round 48/49, elapsed time 4.00
+[IterativeImputer] Ending imputation round 49/49, elapsed time 4.05
+[IterativeImputer] Completing matrix with shape (195, 29)
+[IterativeImputer] Ending imputation round 1/49, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/49, elapsed time 0.01
+[IterativeImputer] Ending imputation round 3/49, elapsed time 0.01
+[IterativeImputer] Ending imputation round 4/49, elapsed time 0.02
+[IterativeImputer] Ending imputation round 5/49, elapsed time 0.02
+[IterativeImputer] Ending imputation round 6/49, elapsed time 0.02
+[IterativeImputer] Ending imputation round 7/49, elapsed time 0.03
+[IterativeImputer] Ending imputation round 8/49, elapsed time 0.03
+[IterativeImputer] Ending imputation round 9/49, elapsed time 0.04
+[IterativeImputer] Ending imputation round 10/49, elapsed time 0.04
+[IterativeImputer] Ending imputation round 11/49, elapsed time 0.05
+[IterativeImputer] Ending imputation round 12/49, elapsed time 0.05
+[IterativeImputer] Ending imputation round 13/49, elapsed time 0.05
+[IterativeImputer] Ending imputation round 14/49, elapsed time 0.06
+[IterativeImputer] Ending imputation round 15/49, elapsed time 0.06
+[IterativeImputer] Ending imputation round 16/49, elapsed time 0.07
+[IterativeImputer] Ending imputation round 17/49, elapsed time 0.07
+[IterativeImputer] Ending imputation round 18/49, elapsed time 0.07
+[IterativeImputer] Ending imputation round 19/49, elapsed time 0.08
+[IterativeImputer] Ending imputation round 20/49, elapsed time 0.08
+[IterativeImputer] Ending imputation round 21/49, elapsed time 0.09
+[IterativeImputer] Ending imputation round 22/49, elapsed time 0.09
+[IterativeImputer] Ending imputation round 23/49, elapsed time 0.10
+[IterativeImputer] Ending imputation round 24/49, elapsed time 0.10
+[IterativeImputer] Ending imputation round 25/49, elapsed time 0.10
+[IterativeImputer] Ending imputation round 26/49, elapsed time 0.11
+[IterativeImputer] Ending imputation round 27/49, elapsed time 0.12
+[IterativeImputer] Ending imputation round 28/49, elapsed time 0.12
+[IterativeImputer] Ending imputation round 29/49, elapsed time 0.12
+[IterativeImputer] Ending imputation round 30/49, elapsed time 0.13
+[IterativeImputer] Ending imputation round 31/49, elapsed time 0.14
+[IterativeImputer] Ending imputation round 32/49, elapsed time 0.14/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning:
+
+[IterativeImputer] Early stopping criterion not reached.
+
+
+[IterativeImputer] Ending imputation round 33/49, elapsed time 0.16
+[IterativeImputer] Ending imputation round 34/49, elapsed time 0.17
+[IterativeImputer] Ending imputation round 35/49, elapsed time 0.18
+[IterativeImputer] Ending imputation round 36/49, elapsed time 0.19
+[IterativeImputer] Ending imputation round 37/49, elapsed time 0.19
+[IterativeImputer] Ending imputation round 38/49, elapsed time 0.21
+[IterativeImputer] Ending imputation round 39/49, elapsed time 0.22
+[IterativeImputer] Ending imputation round 40/49, elapsed time 0.23
+[IterativeImputer] Ending imputation round 41/49, elapsed time 0.24
+[IterativeImputer] Ending imputation round 42/49, elapsed time 0.24
+[IterativeImputer] Ending imputation round 43/49, elapsed time 0.25
+[IterativeImputer] Ending imputation round 44/49, elapsed time 0.25
+[IterativeImputer] Ending imputation round 45/49, elapsed time 0.25
+[IterativeImputer] Ending imputation round 46/49, elapsed time 0.26
+[IterativeImputer] Ending imputation round 47/49, elapsed time 0.27
+[IterativeImputer] Ending imputation round 48/49, elapsed time 0.28
+[IterativeImputer] Ending imputation round 49/49, elapsed time 0.28
+
+
+
+ +
+ +
Out[176]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f8a43103150>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [181]:
+
+
+
idf2 = impute_df(split_df2, max_iter= int(max2), verbose=2)
+size = idf2.shape[1]
+corr = idf2.corr()
+fig = plt.subplots(figsize=(15,15))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 5)
+[IterativeImputer] Ending imputation round 1/8, elapsed time 0.05
+[IterativeImputer] Ending imputation round 2/8, elapsed time 0.08
+[IterativeImputer] Ending imputation round 3/8, elapsed time 0.10
+[IterativeImputer] Ending imputation round 4/8, elapsed time 0.20
+[IterativeImputer] Ending imputation round 5/8, elapsed time 0.25
+[IterativeImputer] Ending imputation round 6/8, elapsed time 0.26
+[IterativeImputer] Ending imputation round 7/8, elapsed time 0.27
+[IterativeImputer] Ending imputation round 8/8, elapsed time 0.29
+[IterativeImputer] Completing matrix with shape (195, 5)
+[IterativeImputer] Ending imputation round 1/8, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/8, elapsed time 0.01
+[IterativeImputer] Ending imputation round 3/8, elapsed time 0.01
+[IterativeImputer] Ending imputation round 4/8, elapsed time 0.01
+[IterativeImputer] Ending imputation round 5/8, elapsed time 0.02
+[IterativeImputer] Ending imputation round 6/8, elapsed time 0.02
+[IterativeImputer] Ending imputation round 7/8, elapsed time 0.02
+[IterativeImputer] Ending imputation round 8/8, elapsed time 0.03
+/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning:
+
+[IterativeImputer] Early stopping criterion not reached.
+
+
+
+
+ +
+ +
Out[181]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f8a42f7d1d0>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [183]:
+
+
+
idf3 = impute_df(split_df3, max_iter= int(max3), verbose=2)
+size = idf3.shape[1]
+corr = idf3.corr()
+fig = plt.subplots(figsize=(15,15))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 11)
+[IterativeImputer] Ending imputation round 1/20, elapsed time 0.05
+[IterativeImputer] Ending imputation round 2/20, elapsed time 0.09
+[IterativeImputer] Ending imputation round 3/20, elapsed time 0.11
+[IterativeImputer] Ending imputation round 4/20, elapsed time 0.14
+[IterativeImputer] Ending imputation round 5/20, elapsed time 0.17
+[IterativeImputer] Ending imputation round 6/20, elapsed time 0.20
+[IterativeImputer] Ending imputation round 7/20, elapsed time 0.23
+[IterativeImputer] Ending imputation round 8/20, elapsed time 0.29
+[IterativeImputer] Ending imputation round 9/20, elapsed time 0.35
+[IterativeImputer] Ending imputation round 10/20, elapsed time 0.40
+[IterativeImputer] Ending imputation round 11/20, elapsed time 0.43
+[IterativeImputer] Ending imputation round 12/20, elapsed time 0.48
+[IterativeImputer] Ending imputation round 13/20, elapsed time 0.52
+[IterativeImputer] Ending imputation round 14/20, elapsed time 0.55
+[IterativeImputer] Ending imputation round 15/20, elapsed time 0.58
+[IterativeImputer] Ending imputation round 16/20, elapsed time 0.61
+[IterativeImputer] Ending imputation round 17/20, elapsed time 0.63
+[IterativeImputer] Ending imputation round 18/20, elapsed time 0.66
+[IterativeImputer] Ending imputation round 19/20, elapsed time 0.69
+[IterativeImputer] Ending imputation round 20/20, elapsed time 0.74
+[IterativeImputer] Completing matrix with shape (195, 11)
+[IterativeImputer] Ending imputation round 1/20, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/20, elapsed time 0.01
+[IterativeImputer] Ending imputation round 3/20, elapsed time 0.01
+[IterativeImputer] Ending imputation round 4/20, elapsed time 0.02
+[IterativeImputer] Ending imputation round 5/20, elapsed time 0.02
+[IterativeImputer] Ending imputation round 6/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 7/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 8/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 9/20, elapsed time 0.04
+[IterativeImputer] Ending imputation round 10/20, elapsed time 0.04
+[IterativeImputer] Ending imputation round 11/20, elapsed time 0.04
+[IterativeImputer] Ending imputation round 12/20, elapsed time 0.05
+[IterativeImputer] Ending imputation round 13/20, elapsed time 0.05
+[IterativeImputer] Ending imputation round 14/20, elapsed time 0.06
+[IterativeImputer] Ending imputation round 15/20, elapsed time 0.07
+[IterativeImputer] Ending imputation round 16/20, elapsed time 0.07
+[IterativeImputer] Ending imputation round 17/20, elapsed time 0.07
+[IterativeImputer] Ending imputation round 18/20, elapsed time 0.08
+[IterativeImputer] Ending imputation round 19/20, elapsed time 0.08
+[IterativeImputer] Ending imputation round 20/20, elapsed time 0.08
+/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning:
+
+[IterativeImputer] Early stopping criterion not reached.
+
+
+
+
+ +
+ +
Out[183]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f8a43781d50>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [184]:
+
+
+
idf4 = impute_df(split_df4, max_iter= int(max4), verbose=2)
+size = idf4.shape[1]
+corr = idf4.corr()
+fig = plt.subplots(figsize=(15,15))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 9)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.03
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.04
+[IterativeImputer] Ending imputation round 3/10, elapsed time 0.05
+[IterativeImputer] Ending imputation round 4/10, elapsed time 0.07
+[IterativeImputer] Ending imputation round 5/10, elapsed time 0.09
+[IterativeImputer] Ending imputation round 6/10, elapsed time 0.10
+[IterativeImputer] Ending imputation round 7/10, elapsed time 0.12
+[IterativeImputer] Ending imputation round 8/10, elapsed time 0.13
+[IterativeImputer] Ending imputation round 9/10, elapsed time 0.15
+[IterativeImputer] Ending imputation round 10/10, elapsed time 0.17
+[IterativeImputer] Completing matrix with shape (195, 9)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.02
+/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning:
+
+[IterativeImputer] Early stopping criterion not reached.
+
+[IterativeImputer] Ending imputation round 3/10, elapsed time 0.04
+[IterativeImputer] Ending imputation round 4/10, elapsed time 0.05
+[IterativeImputer] Ending imputation round 5/10, elapsed time 0.05
+[IterativeImputer] Ending imputation round 6/10, elapsed time 0.06
+[IterativeImputer] Ending imputation round 7/10, elapsed time 0.06
+[IterativeImputer] Ending imputation round 8/10, elapsed time 0.07
+[IterativeImputer] Ending imputation round 9/10, elapsed time 0.07
+[IterativeImputer] Ending imputation round 10/10, elapsed time 0.07
+
+
+
+ +
+ +
Out[184]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f8a4378a490>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [185]:
+
+
+
idf5 = impute_df(split_df5, max_iter= int(max5), verbose=2)
+size = idf5.shape[1]
+corr = idf5.corr()
+fig = plt.subplots(figsize=(15,15))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 13)
+[IterativeImputer] Ending imputation round 1/22, elapsed time 0.07
+[IterativeImputer] Ending imputation round 2/22, elapsed time 0.13
+[IterativeImputer] Ending imputation round 3/22, elapsed time 0.15
+[IterativeImputer] Ending imputation round 4/22, elapsed time 0.18
+[IterativeImputer] Ending imputation round 5/22, elapsed time 0.21
+[IterativeImputer] Ending imputation round 6/22, elapsed time 0.25
+[IterativeImputer] Ending imputation round 7/22, elapsed time 0.31
+[IterativeImputer] Ending imputation round 8/22, elapsed time 0.34
+[IterativeImputer] Ending imputation round 9/22, elapsed time 0.37
+[IterativeImputer] Ending imputation round 10/22, elapsed time 0.39
+[IterativeImputer] Ending imputation round 11/22, elapsed time 0.41
+[IterativeImputer] Ending imputation round 12/22, elapsed time 0.43
+[IterativeImputer] Ending imputation round 13/22, elapsed time 0.46
+[IterativeImputer] Ending imputation round 14/22, elapsed time 0.49
+[IterativeImputer] Ending imputation round 15/22, elapsed time 0.52
+[IterativeImputer] Ending imputation round 16/22, elapsed time 0.54
+[IterativeImputer] Ending imputation round 17/22, elapsed time 0.56
+[IterativeImputer] Ending imputation round 18/22, elapsed time 0.58
+[IterativeImputer] Early stopping criterion reached.
+[IterativeImputer] Completing matrix with shape (195, 13)
+[IterativeImputer] Ending imputation round 1/18, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/18, elapsed time 0.01
+[IterativeImputer] Ending imputation round 3/18, elapsed time 0.01
+[IterativeImputer] Ending imputation round 4/18, elapsed time 0.01
+[IterativeImputer] Ending imputation round 5/18, elapsed time 0.01
+[IterativeImputer] Ending imputation round 6/18, elapsed time 0.02
+[IterativeImputer] Ending imputation round 7/18, elapsed time 0.02
+[IterativeImputer] Ending imputation round 8/18, elapsed time 0.02
+[IterativeImputer] Ending imputation round 9/18, elapsed time 0.02
+[IterativeImputer] Ending imputation round 10/18, elapsed time 0.03
+[IterativeImputer] Ending imputation round 11/18, elapsed time 0.03
+[IterativeImputer] Ending imputation round 12/18, elapsed time 0.03
+[IterativeImputer] Ending imputation round 13/18, elapsed time 0.04
+[IterativeImputer] Ending imputation round 14/18, elapsed time 0.04
+[IterativeImputer] Ending imputation round 15/18, elapsed time 0.04
+[IterativeImputer] Ending imputation round 16/18, elapsed time 0.04
+[IterativeImputer] Ending imputation round 17/18, elapsed time 0.05
+[IterativeImputer] Ending imputation round 18/18, elapsed time 0.05
+
+
+
+ +
+ +
Out[185]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f8a443c31d0>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
+

Imputation of the whole dataset

+
+
+
+
+
+
In [9]:
+
+
+
iter_number = (max_missing + min_missing) // 2
+print(iter_number)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
24.0
+
+
+
+ +
+
+ +
+
+
+
In [137]:
+
+
+
imputed_df = impute_df(df_inicator_values, max_iter=int(iter_number), verbose=2)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 67)
+[IterativeImputer] Ending imputation round 1/24, elapsed time 0.63
+[IterativeImputer] Ending imputation round 2/24, elapsed time 1.17
+[IterativeImputer] Ending imputation round 3/24, elapsed time 1.49
+[IterativeImputer] Ending imputation round 4/24, elapsed time 1.85
+[IterativeImputer] Ending imputation round 5/24, elapsed time 2.17
+[IterativeImputer] Ending imputation round 6/24, elapsed time 2.54
+[IterativeImputer] Ending imputation round 7/24, elapsed time 2.92
+[IterativeImputer] Ending imputation round 8/24, elapsed time 3.16
+[IterativeImputer] Ending imputation round 9/24, elapsed time 3.45
+[IterativeImputer] Ending imputation round 10/24, elapsed time 3.69
+[IterativeImputer] Ending imputation round 11/24, elapsed time 3.96
+[IterativeImputer] Ending imputation round 12/24, elapsed time 4.20
+[IterativeImputer] Ending imputation round 13/24, elapsed time 4.63
+[IterativeImputer] Ending imputation round 14/24, elapsed time 4.97
+[IterativeImputer] Ending imputation round 15/24, elapsed time 5.29
+[IterativeImputer] Ending imputation round 16/24, elapsed time 5.53
+[IterativeImputer] Ending imputation round 17/24, elapsed time 5.84
+[IterativeImputer] Ending imputation round 18/24, elapsed time 6.17
+[IterativeImputer] Ending imputation round 19/24, elapsed time 6.50
+[IterativeImputer] Ending imputation round 20/24, elapsed time 6.87
+[IterativeImputer] Ending imputation round 21/24, elapsed time 7.12
+[IterativeImputer] Ending imputation round 22/24, elapsed time 7.53
+[IterativeImputer] Ending imputation round 23/24, elapsed time 7.83
+[IterativeImputer] Ending imputation round 24/24, elapsed time 8.42
+[IterativeImputer] Completing matrix with shape (195, 67)
+[IterativeImputer] Ending imputation round 1/24, elapsed time 0.01
+[IterativeImputer] Ending imputation round 2/24, elapsed time 0.03
+[IterativeImputer] Ending imputation round 3/24, elapsed time 0.05
+[IterativeImputer] Ending imputation round 4/24, elapsed time 0.07
+[IterativeImputer] Ending imputation round 5/24, elapsed time 0.08
+[IterativeImputer] Ending imputation round 6/24, elapsed time 0.10
+[IterativeImputer] Ending imputation round 7/24, elapsed time 0.11
+[IterativeImputer] Ending imputation round 8/24, elapsed time 0.13
+[IterativeImputer] Ending imputation round 9/24, elapsed time 0.14
+[IterativeImputer] Ending imputation round 10/24, elapsed time 0.16
+[IterativeImputer] Ending imputation round 11/24, elapsed time 0.17
+[IterativeImputer] Ending imputation round 12/24, elapsed time 0.19
+/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning:
+
+[IterativeImputer] Early stopping criterion not reached.
+
+[IterativeImputer] Ending imputation round 13/24, elapsed time 0.21
+[IterativeImputer] Ending imputation round 14/24, elapsed time 0.23
+[IterativeImputer] Ending imputation round 15/24, elapsed time 0.23
+[IterativeImputer] Ending imputation round 16/24, elapsed time 0.25
+[IterativeImputer] Ending imputation round 17/24, elapsed time 0.26
+[IterativeImputer] Ending imputation round 18/24, elapsed time 0.27
+[IterativeImputer] Ending imputation round 19/24, elapsed time 0.28
+[IterativeImputer] Ending imputation round 20/24, elapsed time 0.29
+[IterativeImputer] Ending imputation round 21/24, elapsed time 0.30
+[IterativeImputer] Ending imputation round 22/24, elapsed time 0.32
+[IterativeImputer] Ending imputation round 23/24, elapsed time 0.32
+[IterativeImputer] Ending imputation round 24/24, elapsed time 0.33
+
+
+
+ +
+
+ +
+
+
+
In [12]:
+
+
+
display(imputed_df)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + +
+

Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
AFG26.08013.22036.9201.9480006.00600027.24468.95700079.72600038.04225.8...0.659000819.3850003565.865000.3910000.5930000.36129232.76232723.40239532.3670910.334824
AGO30.23223.13338.0564.0230006.35900030.000150.52600065.99500031.82566.2...0.9030005205.0490007022.231000.5520000.6110000.44200031.67000028.90000031.7330000.397000
ALB93.17493.70092.4979.70200010.61400029.50819.64200052.8520002.88161.2...0.96700011004.45500016884.667000.7800000.8070000.64800010.94300013.17900010.8930000.708000
AND72.32771.48473.32710.43900010.56400046.42921.56873511.9974870.07788.0...0.91564241463.35618767486.522320.8342490.9010730.72951416.48387923.40398416.0003410.748567
ARG57.15859.16154.82811.12300010.72900039.87762.78200021.80500044.78192.0...0.99300014872.16700027825.757000.8350000.8400000.60600013.72800025.15900013.2380000.729000
..................................................................
WSM74.94279.12771.58311.11950910.42049210.00023.88600029.9830000.19718.1...0.9442194054.3750008410.109000.6863800.7246720.49150813.06313123.40316212.7673830.614683
YEM28.02019.92036.9182.8800005.1460000.97160.35200045.62700029.16237.3...0.488000186.0410002980.035000.2700000.5530000.32700031.70200021.80000030.8670000.321000
ZAF75.47874.97778.20710.03100010.29100045.33367.90800010.29800058.55866.9...0.9860009247.75100015094.546000.7020000.7120000.31200033.99200056.99600031.1630000.468000
ZMB44.44038.48854.0686.2830008.17600017.964120.11200078.13400017.86144.1...0.9580003379.5490003270.422000.5690000.5930000.29200031.33600044.84000030.5920000.401000
ZWE64.93559.79270.7838.0660008.92300034.57186.13500064.73900014.64532.2...0.9310002374.6120002984.896000.5500000.5900000.35300022.76700028.76900022.5250000.441000
+

195 rows × 67 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [14]:
+
+
+
#scatter_matrix(imputed_df, figsize=(size, size)) Takes a lot of time to work, visualization is not that great. But it can stay, just in case
+
+ +
+
+
+ +
+
+
+
In [15]:
+
+
+
#sns.heatmap(corr, annot = True, vmin=-1, vmax=1, center= 0, fmt='.1g', cmap= 'coolwarm', linewidths=1, linecolor='black', square=True, yticklabels=False, xticklabels=False)
+size = imputed_df.shape[1]
+corr = imputed_df.corr()
+plt.subplots(figsize=(20,20))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size),cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[15]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f8a6be8cdd0>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [42]:
+
+
+
map_columns = {}
+count = 0
+for col in imputed_df.columns:
+    map_columns[count] = col
+    count += 1
+
+print(map_columns)
+
+#Makes it easier to check the correlations
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
{0: 'Population with at least some secondary education (% ages 25 and older)', 1: 'Population with at least some secondary education, female (% ages 25 and older)', 2: 'Population with at least some secondary education, male (% ages 25 and older)', 3: 'Mean years of schooling, female (years)', 4: 'Mean years of schooling, male (years)', 5: 'Share of seats in parliament (% held by women)', 6: 'Adolescent birth rate (births per 1,000 women ages 15-19)', 7: 'Vulnerable employment (% of total employment)', 8: 'Total population (millions)', 9: 'Urban population (%)', 10: 'Labour force participation rate (% ages 15 and older), female', 11: 'Labour force participation rate (% ages 15 and older), male', 12: 'Sex ratio at birth (male to female births)', 13: 'Remittances, inflows (% of GDP)', 14: 'Foreign direct investment, net inflows (% of GDP)', 15: 'Population ages 15?64 (millions)', 16: 'Infants lacking immunization, measles (% of one-year-olds)', 17: 'Infants lacking immunization, DTP (% of one-year-olds)', 18: 'Gross fixed capital formation (% of GDP)', 19: 'Gender Inequality Index (GII)', 20: 'Life expectancy at birth (years)', 21: 'Expected years of schooling (years)', 22: 'Inequality-adjusted education index', 23: 'Inequality-adjusted life expectancy index', 24: 'Inequality in education (%)', 25: 'Inequality in life expectancy (%)', 26: 'Mean years of schooling (years)', 27: 'Life expectancy index', 28: 'Income index', 29: 'Education index', 30: 'Unemployment, youth (% ages 15?24)', 31: 'Private capital flows (% of GDP)', 32: 'Life expectancy at birth, female (years)', 33: 'Life expectancy at birth, male (years)', 34: 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 35: 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 36: 'Expected years of schooling, female (years)', 37: 'Expected years of schooling, male (years)', 38: 'Population ages 65 and older (millions)', 39: 'Population under age 5 (millions)', 40: 'Exports and imports (% of GDP)', 41: 'Human Development Index (HDI)', 42: 'Unemployment, total (% of labour force)', 43: 'HDI rank', 44: 'Youth not in school or employment (% ages 15-24)', 45: 'Labour force participation rate (% ages 15 and older)', 46: 'Employment to population ratio (% ages 15 and older)', 47: 'Employment in agriculture (% of total employment)', 48: 'Employment in services (% of total employment)', 49: 'Working poor at PPP$3.20 a day (% of total employment)', 50: 'Total unemployment rate (female to male ratio)', 51: 'Youth unemployment rate (female to male ratio)', 52: 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 53: 'Gross capital formation (% of GDP)', 54: 'Gross domestic product (GDP), total (2017 PPP $ billions)', 55: 'GDP per capita (2017 PPP $)', 56: 'Gross national income (GNI) per capita (constant 2017 PPP$)', 57: 'Gender Development Index (GDI)', 58: 'Estimated gross national income per capita, female (2017 PPP $)', 59: 'Estimated gross national income per capita, male (2017 PPP $)', 60: 'Human Development Index (HDI), female', 61: 'Human Development Index (HDI), male', 62: 'Inequality-adjusted income index', 63: 'Overall loss in HDI due to inequality (%)', 64: 'Inequality in income (%)', 65: 'Coefficient of human inequality', 66: 'Inequality-adjusted HDI (IHDI)'}
+
+
+
+ +
+
+ +
+
+
+
In [43]:
+
+
+
x = imputed_df.loc[:, imputed_df.columns].values
+print(x)
+x = StandardScaler().fit_transform(x)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[[26.08       13.22       36.92       ... 23.40239465 32.36709094
+   0.33482394]
+ [30.232      23.133      38.056      ... 28.9        31.733
+   0.397     ]
+ [93.174      93.7        92.497      ... 13.179      10.893
+   0.708     ]
+ ...
+ [75.478      74.977      78.207      ... 56.996      31.163
+   0.468     ]
+ [44.44       38.488      54.068      ... 44.84       30.592
+   0.401     ]
+ [64.935      59.792      70.783      ... 28.769      22.525
+   0.441     ]]
+
+
+
+ +
+
+ +
+
+
+
In [44]:
+
+
+
print(imputed_df.shape)
+print(x.shape)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
(195, 67)
+(195, 67)
+
+
+
+ +
+
+ +
+
+
+
In [45]:
+
+
+
display(x)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + + + +
+
array([[-1.23937651e+00, -1.53575927e+00, -9.67770574e-01, ...,
+         1.00848260e-04,  1.54283328e+00, -1.49474833e+00],
+       [-1.09115273e+00, -1.20078779e+00, -9.25717124e-01, ...,
+         6.32931604e-01,  1.47104631e+00, -1.14014979e+00],
+       [ 1.15583694e+00,  1.18375092e+00,  1.08962783e+00, ...,
+        -1.17671672e+00, -8.88300426e-01,  6.33525610e-01],
+       ...,
+       [ 5.24100909e-01,  5.51079584e-01,  5.60628007e-01, ...,
+         3.86706934e+00,  1.40651524e+00, -7.35227114e-01],
+       [-5.83936096e-01, -6.81924979e-01, -3.32970786e-01, ...,
+         2.46778907e+00,  1.34187095e+00, -1.11733725e+00],
+       [ 1.47722465e-01,  3.79612664e-02,  2.85799827e-01, ...,
+         6.17852160e-01,  4.28586392e-01, -8.89211795e-01]])
+
+ +
+ +
+
+ +
+
+
+
+

We want to check if the mean of the normalized dataset is 0 and std is 1

It looks like it

+
+
+
+
+
+
In [34]:
+
+
+
np.mean(x), np.std(x)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[34]:
+ + + + +
+
(-7.613929047563262e-18, 1.0)
+
+ +
+ +
+
+ +
+
+
+
+

To show the normalized data

+
+
+
+
+
+
In [50]:
+
+
+
feat_cols = imputed_df.columns.values.tolist()
+#print(feat_cols)
+normalized_imputed_df = pd.DataFrame(x, columns=feat_cols)
+normalized_imputed_df.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[50]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
0-1.239377-1.535759-0.967771-1.942292-1.1127270.3618380.5332601.562488-0.009236-1.443911...-3.734095-0.874370-0.860080-1.896431-1.038168-1.1500081.5201660.0001011.542833-1.494748
1-1.091153-1.200788-0.925717-1.319112-0.9838280.5964482.5770451.053248-0.0517860.299618...-0.448076-0.599552-0.722613-0.896440-0.908711-0.6647131.3985830.6329321.471046-1.140150
21.1558371.1837511.0896280.3864500.5698970.554566-0.7023720.565815-0.2498840.083835...0.413830-0.236145-0.3303620.5196970.5009340.573955-0.908449-1.176717-0.8883000.633526
30.4116120.4330470.3799760.6077920.5516401.994999-0.654096-0.949352-0.2690751.240433...-0.2778181.6724971.6821870.8566421.1775101.064095-0.2917180.000284-0.3100860.864885
4-0.1299120.016639-0.3048360.8132160.6118901.4372470.378540-0.5856220.0368871.413060...0.7639800.0062170.1047900.8613080.7382720.321411-0.5984630.202304-0.6228170.753291
+

5 rows × 67 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Now we start with the PCA Part

+
+
+
+
+
+
In [109]:
+
+
+
num_components = 3
+pca_imputed = PCA(n_components=num_components)
+pComponents_imputed = pca_imputed.fit_transform(x)
+component_col = ['PC'+str(i+1) for i in range(num_components)]
+print(component_col)
+
+percentage_list = [element * 100 for element in pca_imputed.explained_variance_ratio_]
+percentage_list = ['%.2f' % elem for elem in percentage_list]
+print(percentage_list)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
['PC1', 'PC2', 'PC3']
+['49.80', '7.90', '7.20']
+
+
+
+ +
+
+ +
+
+
+
+

PC stands for principal components

+
+
+
+
+
+
In [82]:
+
+
+
pc_imputed_df = pd.DataFrame(data = pComponents_imputed, columns = component_col)
+print(pc_imputed_df.shape)
+pc_imputed_df.head()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
(195, 3)
+
+
+
+ +
+ +
Out[82]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PC1PC2PC3
08.7406083.668457-0.157376
17.364151-2.6641020.136981
2-3.2658692.200737-0.526536
3-5.434715-1.268159-0.277446
4-3.9716860.572238-0.254418
+
+
+ +
+ +
+
+ +
+
+
+
In [111]:
+
+
+
print('Explained variation percentage per principal component: {}'.format(percentage_list))
+total_explained_percentage = (sum(pca_imputed.explained_variance_ratio_)*100)
+print('Total percentage of the explained data by',pca_imputed.n_components,'components is: %.2f' %total_explained_percentage)
+print('Percentage of the information that is lost for using',pca_imputed.n_components,'components is: %.2f' %(100-total_explained_percentage))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Explained variation percentage per principal component: ['49.80', '7.90', '7.20']
+Total percentage of the explained data by 3 components is: 64.90
+Percentage of the information that is lost for using 3 components is: 35.10
+
+
+
+ +
+
+ +
+
+
+
+

Outliers are a big problem as it can be seen from the graph

+
+
+
+
+
+
In [117]:
+
+
+
l_dict = {}
+for i in range(len(percentage_list)):
+    l_dict[str(i)] = 'PC'+str(i+1)+' '+str(percentage_list[i])+'%'
+
+print(l_dict)
+
+fig = px.scatter_3d(
+    pComponents_imputed, x=0, y=1, z=2,
+    title=f'Total Explained Variance: {total_explained_percentage:.2f}%',
+    labels=l_dict
+)
+
+fig.show()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
{'0': 'PC1 49.80%', '1': 'PC2 7.90%', '2': 'PC3 7.20%'}
+
+
+
+ +
+ +
+ + + + + +
+ +
+
+ +
+
+
+
In [ ]:
+
+
+
imputed_df.to_csv("data/unlabeled/hdro_preprocessed.csv")
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/notebooks/prep_hdro_v2.html b/documentation/WaterSecurity/notebooks/prep_hdro_v2.html new file mode 100644 index 0000000..0aad399 --- /dev/null +++ b/documentation/WaterSecurity/notebooks/prep_hdro_v2.html @@ -0,0 +1,85511 @@ + + + + +Notebook + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+
In [1]:
+
+
+
import pandas as pd 
+from pandas.plotting import scatter_matrix
+import numpy as np
+import json
+from helpers import *
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+import sys
+import matplotlib.pyplot as plt
+import seaborn as sns
+import plotly.express as px
+sys.path.append("..")
+from data.unlabeled.raw import hdro_inicator_values as inicator_values, hdro_country_name as country_name, hdro_indicator_name as indicator_name
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Something went wrong loading the Economic Fitness Dataset [Errno 2] File b'../data/unlabeled/raw/Economic_Fitness_CSV\\Country.csv' does not exist: b'../data/unlabeled/raw/Economic_Fitness_CSV\\Country.csv'
+Something went wrong loading the Education Dataset [Errno 2] File b'../data/unlabeled/raw/Edstats_csv/EdStatsCountry.csv' does not exist: b'../data/unlabeled/raw/Edstats_csv/EdStatsCountry.csv'
+../data/unlabeled/raw/__init__.py:41: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_eah = pd.read_csv(aquastat_eah_path, skipfooter=8)
+../data/unlabeled/raw/__init__.py:42: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wr = pd.read_csv(aquastat_wr_path, skipfooter=8)
+../data/unlabeled/raw/__init__.py:43: ParserWarning: Falling back to the 'python' engine because the 'c' engine does not support skipfooter; you can avoid this warning by specifying engine='python'.
+  aquastat_wu = pd.read_csv(aquastat_wu_path, skipfooter=8)
+
+
+
+ +
+
+ +
+
+
+
In [2]:
+
+
+
#print(inicator_values)
+#print(country_name)
+#print(indicator_name)
+
+ +
+
+
+ +
+
+
+
+

Unwrap values to create good df

+
+
+
+
+
+
In [3]:
+
+
+
df_inicator_values = pd.DataFrame(inicator_values).T
+df_inicator_values.columns = [indicator_name[nm] for nm in df_inicator_values.columns]
+df_inicator_values = df_inicator_values.applymap(lambda x: x['2019'] if pd.notnull(x) else x)
+df_inicator_values
+
+ +
+
+
+ +
+
+ + +
+ +
Out[3]:
+ + + +
+

Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gross enrolment ratio, pre-primary (% of preschool-age children)Percentage of primary schools with access to the internetPercentage of secondary schools with access to the internetGross enrolment ratio, tertiary (% of tertiary school-age population)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, female (%)Share of graduates in science, technology, engineering and mathematics programmes at tertiary level, male (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are female (%)Share of graduates from science, technology, engineering and mathematics programmes in tertiary education who are male (%)Primary school teachers trained to teach (%)Pupil-teacher ratio, primary school (pupils per teacher)
AFG26.08013.22036.9201.9486.00627.24468.95779.72638.04225.8...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AGO30.23223.13338.0564.0236.35930.000150.52665.99531.82566.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ALB93.17493.70092.4979.70210.61429.50819.64252.8522.88161.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
AND72.32771.48473.32710.43910.56446.429NaNNaN0.07788.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ARG57.15859.16154.82811.12310.72939.87762.78221.80544.78192.0...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
..................................................................
WSM74.94279.12771.583NaNNaN10.00023.88629.9830.19718.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
YEM28.02019.92036.9182.8805.1460.97160.35245.62729.16237.3...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZAF75.47874.97778.20710.03110.29145.33367.90810.29858.55866.9...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZMB44.44038.48854.0686.2838.17617.964120.11278.13417.86144.1...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
ZWE64.93559.79270.7838.0668.92334.57186.13564.73914.64532.2...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
+

195 rows × 98 columns

+
+
+ +
+ +
+
+ +
+
+
+
+

Display the max, min and mean number of null values per column

+
+
+
+
+
+
In [4]:
+
+
+
print_missing_percentages(df_inicator_values)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 98.97435897435898 %
+Min: 0.0 %
+Mean: 37.566718995290415 %
+
+
+
+ +
+ +
Out[4]:
+ + + + +
+
(0.0, 98.97435897435898)
+
+ +
+ +
+
+ +
+
+
+
+

There is a high number of missing values, we therefore remove columns where more than 50% of the data is missing

+
+
+
+
+
+
In [5]:
+
+
+
dropColumnHalf(df_inicator_values)
+
+ +
+
+
+ +
+
+
+
+

Again see the max, min and mean number of missing values per column

+
+
+
+
+
+
In [6]:
+
+
+
min_missing, max_missing = print_missing_percentages(df_inicator_values)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Max, min and mean number of missing values for the columns
+Max: 49.743589743589745 %
+Min: 0.0 %
+Mean: 10.616150019135096 %
+
+
+
+ +
+
+ +
+
+
+
In [7]:
+
+
+
df_inicator_values.describe()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[7]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Mean years of schooling, female (years)Mean years of schooling, male (years)Share of seats in parliament (% held by women)Adolescent birth rate (births per 1,000 women ages 15-19)Vulnerable employment (% of total employment)Total population (millions)Urban population (%)...Gender Development Index (GDI)Estimated gross national income per capita, female (2017 PPP $)Estimated gross national income per capita, male (2017 PPP $)Human Development Index (HDI), femaleHuman Development Index (HDI), maleInequality-adjusted income indexOverall loss in HDI due to inequality (%)Inequality in income (%)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
count175.000000167.000000167.000000174.000000174.000000193.000000185.000000180.000000195.000000195.000000...167.000000178.000000178.000000167.000000167.000000156.000000152.000000156.000000152.000000152.000000
mean61.06807461.73601265.8235878.4800179.13340222.98129548.30934638.25801139.39142659.257436...0.93899414440.99851124458.3313540.7026830.7421860.54506419.38914523.40138519.0025990.595250
std29.61079829.28405026.3953583.4213322.80298411.82878440.52883427.774415146.48585523.231038...0.07455915359.93598623943.4826280.1658670.1431940.1727899.9487189.7440049.7773240.190002
min0.0000001.7380009.0000001.0700002.2560000.1000000.2830000.1440000.01100013.200000...0.488000186.041000640.1050000.2700000.4320000.1760004.4440008.5000004.4240000.232000
25%37.29650036.92500045.3565005.8500006.73375014.76500013.17700012.5382502.08100041.200000...0.9085002925.6142506275.9337500.5770000.6215000.40475010.79225016.60300010.5940000.431500
50%64.82800068.06700070.6820009.1110009.25250021.09400040.53600032.5335008.77200060.000000...0.9650008399.44250016951.3570000.7300000.7600000.52750017.93450021.77950017.5260000.604000
75%89.14500087.99050090.91650011.23050011.55100030.00000070.50400063.34550028.56250078.000000...0.98600022583.77950035488.4760000.8315000.8485000.69175027.61525028.62500027.0125000.767250
max100.000000100.000000100.00000013.88200014.43100055.660000186.53800094.5810001433.784000100.000000...1.03600071387.276000107833.0290000.9490000.9650000.85800045.30700056.99600044.1670000.899000
+

8 rows × 67 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [11]:
+
+
+
#Find columns that only contain integers or null values
+#find_all_integer_columns(df_inicator_values)
+
+ +
+
+
+ +
+
+
+
In [8]:
+
+
+
# Column values are shown
+
+df_inicator_values.columns.values
+
+ +
+
+
+ +
+
+ + +
+ +
Out[8]:
+ + + + +
+
array(['Population with at least some secondary education (% ages 25 and older)',
+       'Population with at least some secondary education, female (% ages 25 and older)',
+       'Population with at least some secondary education, male (% ages 25 and older)',
+       'Mean years of schooling, female (years)',
+       'Mean years of schooling, male (years)',
+       'Share of seats in parliament (% held by women)',
+       'Adolescent birth rate (births per 1,000 women ages 15-19)',
+       'Vulnerable employment (% of total employment)',
+       'Total population (millions)', 'Urban population (%)',
+       'Labour force participation rate (% ages 15 and older), female',
+       'Labour force participation rate (% ages 15 and older), male',
+       'Sex ratio at birth (male to female births)',
+       'Remittances, inflows (% of GDP)',
+       'Foreign direct investment, net inflows (% of GDP)',
+       'Population ages 15?64 (millions)',
+       'Infants lacking immunization, measles (% of one-year-olds)',
+       'Infants lacking immunization, DTP (% of one-year-olds)',
+       'Gross fixed capital formation (% of GDP)',
+       'Gender Inequality Index (GII)',
+       'Life expectancy at birth (years)',
+       'Expected years of schooling (years)',
+       'Inequality-adjusted education index',
+       'Inequality-adjusted life expectancy index',
+       'Inequality in education (%)', 'Inequality in life expectancy (%)',
+       'Mean years of schooling (years)', 'Life expectancy index',
+       'Income index', 'Education index',
+       'Unemployment, youth (% ages 15?24)',
+       'Private capital flows (% of GDP)',
+       'Life expectancy at birth, female (years)',
+       'Life expectancy at birth, male (years)',
+       'Young age (0-14) dependency ratio (per 100 people ages 15-64)',
+       'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)',
+       'Expected years of schooling, female (years)',
+       'Expected years of schooling, male (years)',
+       'Population ages 65 and older (millions)',
+       'Population under age 5 (millions)',
+       'Exports and imports (% of GDP)', 'Human Development Index (HDI)',
+       'Unemployment, total (% of labour force)', 'HDI rank',
+       'Youth not in school or employment (% ages 15-24)',
+       'Labour force participation rate (% ages 15 and older)',
+       'Employment to population ratio (% ages 15 and older)',
+       'Employment in agriculture (% of total employment)',
+       'Employment in services (% of total employment)',
+       'Working poor at PPP$3.20 a day (% of total employment)',
+       'Total unemployment rate (female to male ratio)',
+       'Youth unemployment rate (female to male ratio)',
+       'Share of employment in nonagriculture, female (% of total employment in nonagriculture)',
+       'Gross capital formation (% of GDP)',
+       'Gross domestic product (GDP), total (2017 PPP $ billions)',
+       'GDP per capita (2017 PPP $)',
+       'Gross national income (GNI) per capita (constant 2017 PPP$)',
+       'Gender Development Index (GDI)',
+       'Estimated gross national income per capita, female (2017 PPP $)',
+       'Estimated gross national income per capita, male (2017 PPP $)',
+       'Human Development Index (HDI), female',
+       'Human Development Index (HDI), male',
+       'Inequality-adjusted income index',
+       'Overall loss in HDI due to inequality (%)',
+       'Inequality in income (%)', 'Coefficient of human inequality',
+       'Inequality-adjusted HDI (IHDI)'], dtype=object)
+
+ +
+ +
+
+ +
+
+
+
+

Initial Correlation Matrix

+
+
+
+
+
+
In [13]:
+
+
+
size = df_inicator_values.shape[1]
+corr = df_inicator_values.corr()
+plt.subplots(figsize=(20,20))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size),cmap='mako')
+
+ +
+
+
+ +
+
+ + +
+ +
Out[13]:
+ + + + +
+
<AxesSubplot:>
+
+ +
+ +
+ +
+ + + +
+ + + + + + + + 2021-05-12T09:28:20.328956 + image/svg+xml + + + Matplotlib v3.4.1, https://matplotlib.org
+ +
+ +
+
+ +
+
+
+
+

Division of the columns

+
+
+
+
+
+
In [9]:
+
+
+
percentage_columns = []
+money_columns = []
+index_columns = []
+year_columns = []
+millions_columns = []
+rest = []
+
+for column in df_inicator_values.columns.values:
+    if '%' in column:
+        percentage_columns.append(column)
+    elif '$' in column:
+        money_columns.append(column)
+    elif 'years' in column:
+        year_columns.append(column)
+    elif 'index' in column.lower():
+        index_columns.append(column)
+    elif 'millions' in column.lower():
+        millions_columns.append(column)
+    else:
+        rest.append(column)
+
+print(percentage_columns,'\n',len(percentage_columns))
+print(money_columns,'\n',len(money_columns))
+print(index_columns,'\n',len(index_columns))
+print(year_columns,'\n',len(year_columns))
+print(millions_columns,'\n',len(millions_columns))
+print(rest,'\n',len(rest))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
['Population with at least some secondary education (% ages 25 and older)', 'Population with at least some secondary education, female (% ages 25 and older)', 'Population with at least some secondary education, male (% ages 25 and older)', 'Share of seats in parliament (% held by women)', 'Vulnerable employment (% of total employment)', 'Urban population (%)', 'Labour force participation rate (% ages 15 and older), female', 'Labour force participation rate (% ages 15 and older), male', 'Remittances, inflows (% of GDP)', 'Foreign direct investment, net inflows (% of GDP)', 'Infants lacking immunization, measles (% of one-year-olds)', 'Infants lacking immunization, DTP (% of one-year-olds)', 'Gross fixed capital formation (% of GDP)', 'Inequality in education (%)', 'Inequality in life expectancy (%)', 'Unemployment, youth (% ages 15?24)', 'Private capital flows (% of GDP)', 'Exports and imports (% of GDP)', 'Unemployment, total (% of labour force)', 'Youth not in school or employment (% ages 15-24)', 'Labour force participation rate (% ages 15 and older)', 'Employment to population ratio (% ages 15 and older)', 'Employment in agriculture (% of total employment)', 'Employment in services (% of total employment)', 'Working poor at PPP$3.20 a day (% of total employment)', 'Share of employment in nonagriculture, female (% of total employment in nonagriculture)', 'Gross capital formation (% of GDP)', 'Overall loss in HDI due to inequality (%)', 'Inequality in income (%)'] 
+ 29
+['Gross domestic product (GDP), total (2017 PPP $ billions)', 'GDP per capita (2017 PPP $)', 'Gross national income (GNI) per capita (constant 2017 PPP$)', 'Estimated gross national income per capita, female (2017 PPP $)', 'Estimated gross national income per capita, male (2017 PPP $)'] 
+ 5
+['Gender Inequality Index (GII)', 'Inequality-adjusted education index', 'Inequality-adjusted life expectancy index', 'Life expectancy index', 'Income index', 'Education index', 'Human Development Index (HDI)', 'Gender Development Index (GDI)', 'Human Development Index (HDI), female', 'Human Development Index (HDI), male', 'Inequality-adjusted income index'] 
+ 11
+['Mean years of schooling, female (years)', 'Mean years of schooling, male (years)', 'Life expectancy at birth (years)', 'Expected years of schooling (years)', 'Mean years of schooling (years)', 'Life expectancy at birth, female (years)', 'Life expectancy at birth, male (years)', 'Expected years of schooling, female (years)', 'Expected years of schooling, male (years)'] 
+ 9
+['Total population (millions)', 'Population ages 15?64 (millions)', 'Population ages 65 and older (millions)', 'Population under age 5 (millions)'] 
+ 4
+['Adolescent birth rate (births per 1,000 women ages 15-19)', 'Sex ratio at birth (male to female births)', 'Young age (0-14) dependency ratio (per 100 people ages 15-64)', 'Old-age (65 and older) dependency ratio (per 100 people ages 15-64)', 'HDI rank', 'Total unemployment rate (female to male ratio)', 'Youth unemployment rate (female to male ratio)', 'Coefficient of human inequality', 'Inequality-adjusted HDI (IHDI)'] 
+ 9
+
+
+
+ +
+
+ +
+
+
+
+

Dataframe is splitted

+
+
+
+
+
+
In [10]:
+
+
+
split_df1 = df_inicator_values[percentage_columns]
+split_df2 = df_inicator_values[money_columns]
+split_df3 = df_inicator_values[index_columns]
+split_df4 = df_inicator_values[year_columns]
+split_df5 = df_inicator_values[millions_columns]
+split_df6 = df_inicator_values[rest]
+
+ +
+
+
+ +
+
+
+
In [11]:
+
+
+
print('- Dataframe 1 -')
+min1, max1 = print_missing_percentages(split_df1)
+print('- Dataframe 2 -')
+min2, max2 = print_missing_percentages(split_df2)
+print('- Dataframe 3 -')
+min3, max3 = print_missing_percentages(split_df3)
+print('- Dataframe 4 -')
+min4, max4 = print_missing_percentages(split_df4)
+print('- Dataframe 5 -')
+min5, max5 = print_missing_percentages(split_df5)
+print('- Dataframe 6 -')
+min6, max6 = print_missing_percentages(split_df6)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
- Dataframe 1 -
+Max, min and mean number of missing values for the columns
+Max: 49.743589743589745 %
+Min: 0.0 %
+Mean: 14.624226348364274 %
+- Dataframe 2 -
+Max, min and mean number of missing values for the columns
+Max: 8.717948717948717 %
+Min: 2.051282051282051 %
+Mean: 6.153846153846153 %
+- Dataframe 3 -
+Max, min and mean number of missing values for the columns
+Max: 20.0 %
+Min: 2.051282051282051 %
+Mean: 9.790209790209792 %
+- Dataframe 4 -
+Max, min and mean number of missing values for the columns
+Max: 10.76923076923077 %
+Min: 1.0256410256410255 %
+Mean: 5.584045584045585 %
+- Dataframe 5 -
+Max, min and mean number of missing values for the columns
+Max: 5.128205128205129 %
+Min: 0.0 %
+Mean: 3.8461538461538467 %
+- Dataframe 6 -
+Max, min and mean number of missing values for the columns
+Max: 22.05128205128205 %
+Min: 3.076923076923077 %
+Mean: 9.230769230769232 %
+
+
+
+ +
+
+ +
+
+
+
+

Imputation of the individual datasets

idf --> stands for imputed dataframe

+
+
+
+
+
+
In [12]:
+
+
+
if max1 < 10:
+    max1 = 10
+
+idf1 = impute_df(split_df1, max_iter= int(max1), verbose=2)
+size = idf1.shape[1]
+corr = idf1.corr()
+plt.subplots(figsize=(20,20))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 29)
+[IterativeImputer] Ending imputation round 1/49, elapsed time 0.22
+[IterativeImputer] Ending imputation round 2/49, elapsed time 0.30
+[IterativeImputer] Ending imputation round 3/49, elapsed time 0.42
+[IterativeImputer] Ending imputation round 4/49, elapsed time 0.49
+[IterativeImputer] Ending imputation round 5/49, elapsed time 0.56
+[IterativeImputer] Ending imputation round 6/49, elapsed time 0.63
+[IterativeImputer] Ending imputation round 7/49, elapsed time 0.73
+[IterativeImputer] Ending imputation round 8/49, elapsed time 0.81
+[IterativeImputer] Ending imputation round 9/49, elapsed time 0.92
+[IterativeImputer] Ending imputation round 10/49, elapsed time 0.97
+[IterativeImputer] Ending imputation round 11/49, elapsed time 1.03
+[IterativeImputer] Ending imputation round 12/49, elapsed time 1.08
+[IterativeImputer] Ending imputation round 13/49, elapsed time 1.13
+[IterativeImputer] Ending imputation round 14/49, elapsed time 1.21
+[IterativeImputer] Ending imputation round 15/49, elapsed time 1.30
+[IterativeImputer] Ending imputation round 16/49, elapsed time 1.41
+[IterativeImputer] Ending imputation round 17/49, elapsed time 1.50
+[IterativeImputer] Ending imputation round 18/49, elapsed time 1.60
+[IterativeImputer] Ending imputation round 19/49, elapsed time 1.69
+[IterativeImputer] Ending imputation round 20/49, elapsed time 1.75
+[IterativeImputer] Ending imputation round 21/49, elapsed time 1.81
+[IterativeImputer] Ending imputation round 22/49, elapsed time 1.86
+[IterativeImputer] Ending imputation round 23/49, elapsed time 1.92
+[IterativeImputer] Ending imputation round 24/49, elapsed time 1.98
+[IterativeImputer] Ending imputation round 25/49, elapsed time 2.06
+[IterativeImputer] Ending imputation round 26/49, elapsed time 2.18
+[IterativeImputer] Ending imputation round 27/49, elapsed time 2.26
+[IterativeImputer] Ending imputation round 28/49, elapsed time 2.35
+[IterativeImputer] Ending imputation round 29/49, elapsed time 2.43
+[IterativeImputer] Ending imputation round 30/49, elapsed time 2.50
+[IterativeImputer] Ending imputation round 31/49, elapsed time 2.57
+[IterativeImputer] Ending imputation round 32/49, elapsed time 2.63
+[IterativeImputer] Ending imputation round 33/49, elapsed time 2.71
+[IterativeImputer] Ending imputation round 34/49, elapsed time 2.77
+[IterativeImputer] Ending imputation round 35/49, elapsed time 2.90
+[IterativeImputer] Ending imputation round 36/49, elapsed time 2.97
+[IterativeImputer] Ending imputation round 37/49, elapsed time 3.09
+[IterativeImputer] Ending imputation round 38/49, elapsed time 3.16
+[IterativeImputer] Ending imputation round 39/49, elapsed time 3.22
+[IterativeImputer] Ending imputation round 40/49, elapsed time 3.27
+[IterativeImputer] Ending imputation round 41/49, elapsed time 3.32
+[IterativeImputer] Ending imputation round 42/49, elapsed time 3.37
+[IterativeImputer] Ending imputation round 43/49, elapsed time 3.44
+[IterativeImputer] Ending imputation round 44/49, elapsed time 3.51
+[IterativeImputer] Ending imputation round 45/49, elapsed time 3.61
+[IterativeImputer] Ending imputation round 46/49, elapsed time 3.68
+[IterativeImputer] Ending imputation round 47/49, elapsed time 3.75
+[IterativeImputer] Ending imputation round 48/49, elapsed time 3.80
+[IterativeImputer] Ending imputation round 49/49, elapsed time 3.86
+[IterativeImputer] Completing matrix with shape (195, 29)
+[IterativeImputer] Ending imputation round 1/49, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/49, elapsed time 0.01
+[IterativeImputer] Ending imputation round 3/49, elapsed time 0.01
+[IterativeImputer] Ending imputation round 4/49, elapsed time 0.01
+[IterativeImputer] Ending imputation round 5/49, elapsed time 0.02
+[IterativeImputer] Ending imputation round 6/49, elapsed time 0.02
+[IterativeImputer] Ending imputation round 7/49, elapsed time 0.02
+[IterativeImputer] Ending imputation round 8/49, elapsed time 0.03
+[IterativeImputer] Ending imputation round 9/49, elapsed time 0.03
+[IterativeImputer] Ending imputation round 10/49, elapsed time 0.03
+[IterativeImputer] Ending imputation round 11/49, elapsed time 0.04
+[IterativeImputer] Ending imputation round 12/49, elapsed time 0.04
+[IterativeImputer] Ending imputation round 13/49, elapsed time 0.04
+[IterativeImputer] Ending imputation round 14/49, elapsed time 0.05
+[IterativeImputer] Ending imputation round 15/49, elapsed time 0.05
+[IterativeImputer] Ending imputation round 16/49, elapsed time 0.05
+[IterativeImputer] Ending imputation round 17/49, elapsed time 0.06
+[IterativeImputer] Ending imputation round 18/49, elapsed time 0.06
+[IterativeImputer] Ending imputation round 19/49, elapsed time 0.07
+[IterativeImputer] Ending imputation round 20/49, elapsed time 0.07
+[IterativeImputer] Ending imputation round 21/49, elapsed time 0.07
+[IterativeImputer] Ending imputation round 22/49, elapsed time 0.08
+[IterativeImputer] Ending imputation round 23/49, elapsed time 0.08
+[IterativeImputer] Ending imputation round 24/49, elapsed time 0.08
+[IterativeImputer] Ending imputation round 25/49, elapsed time 0.09
+[IterativeImputer] Ending imputation round 26/49, elapsed time 0.09
+[IterativeImputer] Ending imputation round 27/49, elapsed time 0.09
+[IterativeImputer] Ending imputation round 28/49, elapsed time 0.10
+[IterativeImputer] Ending imputation round 29/49, elapsed time 0.10
+[IterativeImputer] Ending imputation round 30/49, elapsed time 0.10
+[IterativeImputer] Ending imputation round 31/49, elapsed time 0.11
+[IterativeImputer] Ending imputation round 32/49, elapsed time 0.11
+[IterativeImputer] Ending imputation round 33/49, elapsed time 0.11
+[IterativeImputer] Ending imputation round 34/49, elapsed time 0.11
+[IterativeImputer] Ending imputation round 35/49, elapsed time 0.12
+[IterativeImputer] Ending imputation round 36/49, elapsed time 0.12
+[IterativeImputer] Ending imputation round 37/49, elapsed time 0.12
+[IterativeImputer] Ending imputation round 38/49, elapsed time 0.13
+[IterativeImputer] Ending imputation round 39/49, elapsed time 0.13
+[IterativeImputer] Ending imputation round 40/49, elapsed time 0.13
+[IterativeImputer] Ending imputation round 41/49, elapsed time 0.14
+[IterativeImputer] Ending imputation round 42/49, elapsed time 0.14
+[IterativeImputer] Ending imputation round 43/49, elapsed time 0.14
+[IterativeImputer] Ending imputation round 44/49, elapsed time 0.15
+[IterativeImputer] Ending imputation round 45/49, elapsed time 0.15
+[IterativeImputer] Ending imputation round 46/49, elapsed time 0.15
+[IterativeImputer] Ending imputation round 47/49, elapsed time 0.16
+[IterativeImputer] Ending imputation round 48/49, elapsed time 0.16
+[IterativeImputer] Ending imputation round 49/49, elapsed time 0.16
+/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
+  " reached.", ConvergenceWarning)
+
+
+
+ +
+ +
Out[12]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f96dc9daa10>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [13]:
+
+
+
if max2 < 10:
+    max2 = 10
+
+idf2 = impute_df(split_df2, max_iter= int(max2), verbose=2)
+size = idf2.shape[1]
+corr = idf2.corr()
+fig = plt.subplots(figsize=(15,15))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 5)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.08
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.10
+[IterativeImputer] Ending imputation round 3/10, elapsed time 0.11
+[IterativeImputer] Ending imputation round 4/10, elapsed time 0.14
+[IterativeImputer] Ending imputation round 5/10, elapsed time 0.15
+[IterativeImputer] Ending imputation round 6/10, elapsed time 0.16
+[IterativeImputer] Ending imputation round 7/10, elapsed time 0.17
+[IterativeImputer] Ending imputation round 8/10, elapsed time 0.18
+[IterativeImputer] Ending imputation round 9/10, elapsed time 0.21
+[IterativeImputer] Ending imputation round 10/10, elapsed time 0.22
+[IterativeImputer] Completing matrix with shape (195, 5)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.00
+[IterativeImputer] Ending imputation round 3/10, elapsed time 0.00
+[IterativeImputer] Ending imputation round 4/10, elapsed time 0.00
+[IterativeImputer] Ending imputation round 5/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 6/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 7/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 8/10, elapsed time 0.02
+[IterativeImputer] Ending imputation round 9/10, elapsed time 0.02
+[IterativeImputer] Ending imputation round 10/10, elapsed time 0.02
+/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
+  " reached.", ConvergenceWarning)
+
+
+
+ +
+ +
Out[13]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f96dca54bd0>
+
+ +
+ +
+ +
+ + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+ +
+
+
+
In [14]:
+
+
+
if max3 < 10:
+    max3 = 10
+
+idf3 = impute_df(split_df3, max_iter= int(max3), verbose=2)
+size = idf3.shape[1]
+corr = idf3.corr()
+fig = plt.subplots(figsize=(15,15))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 11)
+[IterativeImputer] Ending imputation round 1/20, elapsed time 0.09
+[IterativeImputer] Ending imputation round 2/20, elapsed time 0.18
+[IterativeImputer] Ending imputation round 3/20, elapsed time 0.20
+[IterativeImputer] Ending imputation round 4/20, elapsed time 0.22
+[IterativeImputer] Ending imputation round 5/20, elapsed time 0.24
+[IterativeImputer] Ending imputation round 6/20, elapsed time 0.26
+[IterativeImputer] Ending imputation round 7/20, elapsed time 0.29
+[IterativeImputer] Ending imputation round 8/20, elapsed time 0.37
+[IterativeImputer] Ending imputation round 9/20, elapsed time 0.39
+[IterativeImputer] Ending imputation round 10/20, elapsed time 0.41
+[IterativeImputer] Ending imputation round 11/20, elapsed time 0.43
+[IterativeImputer] Ending imputation round 12/20, elapsed time 0.45
+[IterativeImputer] Ending imputation round 13/20, elapsed time 0.47
+[IterativeImputer] Ending imputation round 14/20, elapsed time 0.49
+[IterativeImputer] Ending imputation round 15/20, elapsed time 0.54
+[IterativeImputer] Ending imputation round 16/20, elapsed time 0.59
+[IterativeImputer] Ending imputation round 17/20, elapsed time 0.62
+[IterativeImputer] Ending imputation round 18/20, elapsed time 0.64
+[IterativeImputer] Ending imputation round 19/20, elapsed time 0.66
+[IterativeImputer] Ending imputation round 20/20, elapsed time 0.68
+[IterativeImputer] Completing matrix with shape (195, 11)
+[IterativeImputer] Ending imputation round 1/20, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/20, elapsed time 0.01
+[IterativeImputer] Ending imputation round 3/20, elapsed time 0.01
+[IterativeImputer] Ending imputation round 4/20, elapsed time 0.01
+[IterativeImputer] Ending imputation round 5/20, elapsed time 0.01
+[IterativeImputer] Ending imputation round 6/20, elapsed time 0.01
+[IterativeImputer] Ending imputation round 7/20, elapsed time 0.01
+[IterativeImputer] Ending imputation round 8/20, elapsed time 0.02
+[IterativeImputer] Ending imputation round 9/20, elapsed time 0.02
+[IterativeImputer] Ending imputation round 10/20, elapsed time 0.02
+[IterativeImputer] Ending imputation round 11/20, elapsed time 0.02
+[IterativeImputer] Ending imputation round 12/20, elapsed time 0.02
+[IterativeImputer] Ending imputation round 13/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 14/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 15/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 16/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 17/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 18/20, elapsed time 0.03
+[IterativeImputer] Ending imputation round 19/20, elapsed time 0.04
+[IterativeImputer] Ending imputation round 20/20, elapsed time 0.04
+/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
+  " reached.", ConvergenceWarning)
+
+
+
+ +
+ +
Out[14]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f96dca79190>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [15]:
+
+
+
if max4 < 10:
+    max4 = 10
+
+idf4 = impute_df(split_df4, max_iter= int(max4), verbose=2)
+size = idf4.shape[1]
+corr = idf4.corr()
+fig = plt.subplots(figsize=(15,15))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 9)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.06
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.08
+[IterativeImputer] Ending imputation round 3/10, elapsed time 0.09
+[IterativeImputer] Ending imputation round 4/10, elapsed time 0.11
+[IterativeImputer] Ending imputation round 5/10, elapsed time 0.12
+[IterativeImputer] Ending imputation round 6/10, elapsed time 0.13
+[IterativeImputer] Ending imputation round 7/10, elapsed time 0.14
+[IterativeImputer] Ending imputation round 8/10, elapsed time 0.15
+[IterativeImputer] Ending imputation round 9/10, elapsed time 0.16
+[IterativeImputer] Ending imputation round 10/10, elapsed time 0.18
+[IterativeImputer] Completing matrix with shape (195, 9)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.00
+[IterativeImputer] Ending imputation round 3/10, elapsed time 0.00
+[IterativeImputer] Ending imputation round 4/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 5/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 6/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 7/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 8/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 9/10, elapsed time 0.01
+[IterativeImputer] Ending imputation round 10/10, elapsed time 0.01
+/opt/anaconda3/lib/python3.7/site-packages/sklearn/impute/_iterative.py:603: ConvergenceWarning: [IterativeImputer] Early stopping criterion not reached.
+  " reached.", ConvergenceWarning)
+
+
+
+ +
+ +
Out[15]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f96dcaaca90>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [16]:
+
+
+
if max5 < 10:
+    max5 = 10
+
+idf5 = impute_df(split_df5, max_iter= int(max5), verbose=2)
+size = idf5.shape[1]
+corr = idf5.corr()
+fig = plt.subplots(figsize=(15,15))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 4)
+[IterativeImputer] Ending imputation round 1/10, elapsed time 0.03
+[IterativeImputer] Ending imputation round 2/10, elapsed time 0.04
+[IterativeImputer] Ending imputation round 3/10, elapsed time 0.04
+[IterativeImputer] Ending imputation round 4/10, elapsed time 0.04
+[IterativeImputer] Ending imputation round 5/10, elapsed time 0.05
+[IterativeImputer] Ending imputation round 6/10, elapsed time 0.05
+[IterativeImputer] Ending imputation round 7/10, elapsed time 0.06
+[IterativeImputer] Ending imputation round 8/10, elapsed time 0.08
+[IterativeImputer] Early stopping criterion reached.
+[IterativeImputer] Completing matrix with shape (195, 4)
+[IterativeImputer] Ending imputation round 1/8, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/8, elapsed time 0.00
+[IterativeImputer] Ending imputation round 3/8, elapsed time 0.00
+[IterativeImputer] Ending imputation round 4/8, elapsed time 0.00
+[IterativeImputer] Ending imputation round 5/8, elapsed time 0.00
+[IterativeImputer] Ending imputation round 6/8, elapsed time 0.00
+[IterativeImputer] Ending imputation round 7/8, elapsed time 0.00
+[IterativeImputer] Ending imputation round 8/8, elapsed time 0.00
+
+
+
+ +
+ +
Out[16]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f96dcaaa210>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
In [17]:
+
+
+
if max6 < 10:
+    max6 = 10
+
+idf6 = impute_df(split_df6, max_iter= int(max6), verbose=2)
+size = idf6.shape[1]
+corr = idf6.corr()
+plt.subplots(figsize=(20,20))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[IterativeImputer] Completing matrix with shape (195, 9)
+[IterativeImputer] Ending imputation round 1/22, elapsed time 0.12
+[IterativeImputer] Ending imputation round 2/22, elapsed time 0.14
+[IterativeImputer] Ending imputation round 3/22, elapsed time 0.15
+[IterativeImputer] Ending imputation round 4/22, elapsed time 0.16
+[IterativeImputer] Ending imputation round 5/22, elapsed time 0.17
+[IterativeImputer] Ending imputation round 6/22, elapsed time 0.18
+[IterativeImputer] Ending imputation round 7/22, elapsed time 0.20
+[IterativeImputer] Early stopping criterion reached.
+[IterativeImputer] Completing matrix with shape (195, 9)
+[IterativeImputer] Ending imputation round 1/7, elapsed time 0.00
+[IterativeImputer] Ending imputation round 2/7, elapsed time 0.01
+[IterativeImputer] Ending imputation round 3/7, elapsed time 0.01
+[IterativeImputer] Ending imputation round 4/7, elapsed time 0.01
+[IterativeImputer] Ending imputation round 5/7, elapsed time 0.01
+[IterativeImputer] Ending imputation round 6/7, elapsed time 0.01
+[IterativeImputer] Ending imputation round 7/7, elapsed time 0.02
+
+
+
+ +
+ +
Out[17]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f96dcad8550>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
+

Dataframe is merged and displayed below

+
+
+
+
+
+
In [18]:
+
+
+
# 29 - 5 - 11 - 9 - 4 - 9
+final_df = idf1.merge(idf2, left_index=True, right_index=True)
+print(final_df.shape)
+final_df = final_df.merge(idf3, left_index=True, right_index=True)
+print(final_df.shape)
+final_df = final_df.merge(idf4, left_index=True, right_index=True)
+print(final_df.shape)
+final_df = final_df.merge(idf5, left_index=True, right_index=True)
+print(final_df.shape)
+final_df = final_df.merge(idf6, left_index=True, right_index=True)
+print(final_df.shape)
+
+display(final_df)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
(195, 34)
+(195, 45)
+(195, 54)
+(195, 58)
+(195, 67)
+
+
+
+ +
+ +
+ + + +
+

Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Share of seats in parliament (% held by women)Vulnerable employment (% of total employment)Urban population (%)Labour force participation rate (% ages 15 and older), femaleLabour force participation rate (% ages 15 and older), maleRemittances, inflows (% of GDP)Foreign direct investment, net inflows (% of GDP)...Population under age 5 (millions)Adolescent birth rate (births per 1,000 women ages 15-19)Sex ratio at birth (male to female births)Young age (0-14) dependency ratio (per 100 people ages 15-64)Old-age (65 and older) dependency ratio (per 100 people ages 15-64)HDI rankTotal unemployment rate (female to male ratio)Youth unemployment rate (female to male ratio)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
AFG26.08013.22036.92027.24479.7260025.821.59500074.6580004.5420000.123000...5.63900068.9570001.06000077.3460004.764000169.01.3560001.30800029.7727320.360280
AGO30.23223.13338.05630.00065.9950066.276.13600078.9130000.002000-4.331000...5.670000150.5260001.03000091.0970004.297000148.01.0160000.90600031.7330000.397000
ALB93.17493.70092.49729.50852.8520061.246.71200064.5680009.6400007.912000...0.16900019.6420001.09000025.43900020.76400069.00.9030000.79900010.8930000.708000
AND72.32771.48473.32746.42915.4288488.054.35136472.2329831.7035733.286625...-7.12225914.9769281.05854725.01792821.99298736.01.7884141.41207310.0665360.786988
ARG57.15859.16154.82839.87721.8050092.050.72100072.7300000.1190001.389000...3.74200062.7820001.04000038.33400017.52300046.01.2220001.29100013.2380000.729000
..................................................................
WSM74.94279.12771.58310.00029.9830018.131.10400055.45600017.2540003.114609...0.02700023.8860001.08000066.1940008.623000111.01.2970001.49200021.1047000.551524
YEM28.02019.92036.9180.97145.6270037.35.83400070.1830007.999177-4.382862...4.09900060.3520001.05000067.7730005.015000179.02.0880001.46700030.8670000.321000
ZAF75.47874.97778.20745.33310.2980066.949.61000062.7490000.2530001.316000...5.78600067.9080001.03000044.1480008.253000114.01.1490001.16100031.1630000.468000
ZMB44.44038.48854.06817.96478.1340044.170.37000079.0760000.5510002.087000...2.902000120.1120001.03000083.2290003.960000146.01.1470001.07900030.5920000.401000
ZWE64.93559.79270.78334.57164.7390032.278.10600088.9930008.0680004.005312...2.13800086.1350001.02000076.8450005.433000150.01.2310001.26900022.5250000.441000
+

195 rows × 67 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [19]:
+
+
+
size = final_df.shape[1]
+corr = final_df.corr()
+plt.subplots(figsize=(20,20))
+sns.heatmap(corr, vmin=-1, vmax=1, center=0, xticklabels=range(size), yticklabels=range(size))
+
+ +
+
+
+ +
+
+ + +
+ +
Out[19]:
+ + + + +
+
<matplotlib.axes._subplots.AxesSubplot at 0x7f96dd44db50>
+
+ +
+ +
+ +
+ + + +

+ +
+ +
+
+ +
+
+
+
+

PCA of the Final Dataset

+
+
+
+
+
+
In [20]:
+
+
+
x = final_df.loc[:, final_df.columns].values
+print(x)
+x = StandardScaler().fit_transform(x)
+
+print(final_df.shape)
+print(x.shape)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
[[26.08       13.22       36.92       ...  1.308      29.77273216
+   0.36028022]
+ [30.232      23.133      38.056      ...  0.906      31.733
+   0.397     ]
+ [93.174      93.7        92.497      ...  0.799      10.893
+   0.708     ]
+ ...
+ [75.478      74.977      78.207      ...  1.161      31.163
+   0.468     ]
+ [44.44       38.488      54.068      ...  1.079      30.592
+   0.401     ]
+ [64.935      59.792      70.783      ...  1.269      22.525
+   0.441     ]]
+(195, 67)
+(195, 67)
+
+
+
+ +
+
+ +
+
+
+
In [21]:
+
+
+
np.mean(x), np.std(x)
+
+ +
+
+
+ +
+
+ + +
+ +
Out[21]:
+ + + + +
+
(-1.713134035701734e-17, 1.0)
+
+ +
+ +
+
+ +
+
+
+
In [22]:
+
+
+
feat_cols = final_df.columns.values.tolist()
+#print(feat_cols)
+normalized_final_df = pd.DataFrame(x, columns=feat_cols)
+normalized_final_df.head()
+
+ +
+
+
+ +
+
+ + +
+ +
Out[22]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Population with at least some secondary education (% ages 25 and older)Population with at least some secondary education, female (% ages 25 and older)Population with at least some secondary education, male (% ages 25 and older)Share of seats in parliament (% held by women)Vulnerable employment (% of total employment)Urban population (%)Labour force participation rate (% ages 15 and older), femaleLabour force participation rate (% ages 15 and older), maleRemittances, inflows (% of GDP)Foreign direct investment, net inflows (% of GDP)...Population under age 5 (millions)Adolescent birth rate (births per 1,000 women ages 15-19)Sex ratio at birth (male to female births)Young age (0-14) dependency ratio (per 100 people ages 15-64)Old-age (65 and older) dependency ratio (per 100 people ages 15-64)HDI rankTotal unemployment rate (female to male ratio)Youth unemployment rate (female to male ratio)Coefficient of human inequalityInequality-adjusted HDI (IHDI)
0-1.242745-1.537407-0.9721840.3666961.554740-1.443911-2.0356800.233312-0.027719-0.441756...0.2250850.5348510.4625241.513894-0.9507051.364735-0.115960-0.0497731.192044-1.315801
1-1.094493-1.202607-0.9300830.6013351.0448870.2996181.6126590.733974-0.796618-0.915527...0.2278332.593772-1.2161612.161911-0.9997770.976870-0.308338-0.4850751.408239-1.110134
21.1529201.1807061.0875380.5594470.5568680.083835-0.355562-0.9539240.8356830.386758...-0.259851-0.7099322.141209-0.9322260.730564-0.482239-0.372276-0.600939-0.8901680.631779
30.4085550.4303880.3770852.000054-0.8327101.2404330.155448-0.052027-0.508438-0.105242...-0.906248-0.8276850.381230-0.9520690.859705-1.0917410.1287070.062921-0.9813171.074191
4-0.1330710.014194-0.3085011.442235-0.5959531.413060-0.0873930.006454-0.776802-0.307092...0.0569090.378985-0.656599-0.3245480.390002-0.907043-0.191780-0.068182-0.6315420.749400
+

5 rows × 67 columns

+
+
+ +
+ +
+
+ +
+
+
+
In [23]:
+
+
+
num_components = 3
+pca_final = PCA(n_components=num_components)
+pComponents_final = pca_final.fit_transform(x)
+component_col = ['PC'+str(i+1) for i in range(num_components)]
+print(component_col)
+
+percentage_list = [element * 100 for element in pca_final.explained_variance_ratio_]
+percentage_list = ['%.2f' % elem for elem in percentage_list]
+print(percentage_list)
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
['PC1', 'PC2', 'PC3']
+['49.73', '7.93', '7.18']
+
+
+
+ +
+
+ +
+
+
+
In [24]:
+
+
+
pc_final_df = pd.DataFrame(data = pComponents_final, columns = component_col)
+print(pc_final_df.shape)
+pc_final_df.head()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
(195, 3)
+
+
+
+ +
+ +
Out[24]:
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
PC1PC2PC3
08.6609253.855080-0.080563
17.369314-2.6161490.028787
2-3.2873242.085875-0.449373
3-5.811149-0.738707-0.166838
4-3.9175830.676241-0.271113
+
+
+ +
+ +
+
+ +
+
+
+
In [25]:
+
+
+
print('Explained variation percentage per principal component: {}'.format(percentage_list))
+total_explained_percentage = (sum(pca_final.explained_variance_ratio_)*100)
+print('Total percentage of the explained data by',pca_final.n_components,'components is: %.2f' %total_explained_percentage)
+print('Percentage of the information that is lost for using',pca_final.n_components,'components is: %.2f' %(100-total_explained_percentage))
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
Explained variation percentage per principal component: ['49.73', '7.93', '7.18']
+Total percentage of the explained data by 3 components is: 64.84
+Percentage of the information that is lost for using 3 components is: 35.16
+
+
+
+ +
+
+ +
+
+
+
+

3 Main Principle Component is presented

+
+
+
+
+
+
In [26]:
+
+
+
l_dict = {}
+for i in range(len(percentage_list)):
+    l_dict[str(i)] = 'PC'+str(i+1)+' '+str(percentage_list[i])+'%'
+
+print(l_dict)
+
+fig = px.scatter_3d(
+    pComponents_final, x=0, y=1, z=2,
+    title=f'Total Explained Variance: {total_explained_percentage:.2f}%',
+    labels=l_dict
+)
+
+fig.show()
+
+ +
+
+
+ +
+
+ + +
+ +
+ + +
+
{'0': 'PC1 49.73%', '1': 'PC2 7.93%', '2': 'PC3 7.18%'}
+
+
+
+ +
+ +
+ + + + + +
+ +
+
+ +
+
+
+
+

Conversion of the Dataset to CSV

+
+
+
+
+
+
In [29]:
+
+
+
final_df.to_csv("../data/unlabeled/preprocessed/hdro_preprocessed.csv")
+
+ +
+
+
+ +
+
+
+ + + + + + diff --git a/documentation/WaterSecurity/unlabeled_preprocessing/index.html b/documentation/WaterSecurity/unlabeled_preprocessing/index.html index 2e19913..d7cad3b 100644 --- a/documentation/WaterSecurity/unlabeled_preprocessing/index.html +++ b/documentation/WaterSecurity/unlabeled_preprocessing/index.html @@ -5,7 +5,10 @@ WaterSecurity.unlabeled_preprocessing API documentation - + @@ -22,6 +25,27 @@

Module WaterSecurity.unlabeled_preprocessing

+

Notebooks below

+ +
+ +Expand source code + +
"""
+## Notebooks below
+* [Combining datasets](../notebooks/combine_unlabeled.html)
+* [Econ preprocessing](../notebooks/prep_economic_v2.html)
+* [Education preprocessing](../notebooks/prep_edstats.html)
+* [Human development preprocessing](../notebooks/prep_hdro_v2.html)
+* [Aquastat dataset](../notebooks/prep_aquastat.html)
+"""
+

Sub-modules

@@ -42,7 +66,9 @@

Sub-modules