[Datasource:Eurostat]
import numpy as np import matplotlib.pyplot as plt import pandas as pd import re # import and clean data regx1 = re.compile(r'Germany.\(.*') regx2 = re.compile(r'European Union.\(.*') types = {'Value' : 'int'} df = pd.read_csv('eurostat/illegala.csv',na_values=[':'],usecols=range(7)) df = df.dropna() df['Value'] = df['Value'].str.replace(',','') df['Value'] = df['Value'].astype(int) df['GEO'] = df['GEO'].str.replace(regx1,'Germany') df['GEO'] = df['GEO'].str.replace(regx2,'EU') df = df.drop(df[df.GEO == 'European Free Trade Association'].index) df = df.drop(df[df.GEO == 'Candidate countries in 2007 (3 countries)'].index) df = df.drop(df[df.GEO == 'Total'].index) df = df.drop(df[df.GEO == 'EU'].index) df.set_index('TIME',inplace=True,drop=False) ### year = 2014 ### # select all under-age subsets males_under_18 = df.loc[((df['SEX'] == 'Males') & ( df['AGE'] == 'Less than 18 years') & (df['TIME'] == year))] females_under_18 = df.loc[((df['SEX'] == 'Females') & ( df['AGE'] == 'Less than 18 years') & (df['TIME'] == year))] countries = list (males_under_18['GEO'].unique()) xticks = [countries[i][0:3] for i in range(len(countries))] values_m = males_under_18.loc[(males_under_18['TIME'] == year)][['TIME','GEO','Value']] values_f = females_under_18.loc[(females_under_18['TIME'] == year)][['TIME','GEO','Value']] plt.title( 'Third country nationals under 18 found to be illegally present {}'.format( year)) plt.bar(np.arange(len(values_m)) -0.2,values_m['Value'],width=0.4,color='b', label='Male') plt.bar(np.arange(len(values_f)) + 0.2,values_f['Value'],width=0.4,color='r', label='Female') plt.xticks(range(len(values_m)),xticks) plt.xlabel('$Datasource:Eurostat$') plt.ylabel('Number of individuals') plt.legend(loc='upper left') plt.show()