Eurostat statistics part IV: illegal third country nationals under 18 year old by host country

illegals

[Datasource:Eurostat]

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re

# import and clean data
regx1 = re.compile(r'Germany.\(.*')
regx2 = re.compile(r'European Union.\(.*')

types = {'Value' : 'int'}
df = pd.read_csv('eurostat/illegala.csv',na_values=[':'],usecols=range(7))
df = df.dropna()
df['Value'] = df['Value'].str.replace(',','')
df['Value'] = df['Value'].astype(int)
df['GEO'] = df['GEO'].str.replace(regx1,'Germany')
df['GEO'] = df['GEO'].str.replace(regx2,'EU')
df = df.drop(df[df.GEO == 'European Free Trade Association'].index)
df = df.drop(df[df.GEO == 'Candidate countries in 2007 (3 countries)'].index)
df = df.drop(df[df.GEO == 'Total'].index)
df = df.drop(df[df.GEO == 'EU'].index)
df.set_index('TIME',inplace=True,drop=False)

###
year = 2014
###

# select all under-age subsets
males_under_18 = df.loc[((df['SEX'] == 'Males') & (
    df['AGE'] == 'Less than 18 years') & (df['TIME'] == year))]

females_under_18 = df.loc[((df['SEX'] == 'Females') & (
    df['AGE'] == 'Less than 18 years') & (df['TIME'] == year))]

countries = list (males_under_18['GEO'].unique())
xticks = [countries[i][0:3] for i in range(len(countries))]

values_m = males_under_18.loc[(males_under_18['TIME'] == year)][['TIME','GEO','Value']]

values_f = females_under_18.loc[(females_under_18['TIME'] == year)][['TIME','GEO','Value']]

plt.title(
    'Third country nationals under 18 found to be illegally present {}'.format(
        year))
plt.bar(np.arange(len(values_m)) -0.2,values_m['Value'],width=0.4,color='b',
        label='Male')
plt.bar(np.arange(len(values_f)) + 0.2,values_f['Value'],width=0.4,color='r',
        label='Female')

plt.xticks(range(len(values_m)),xticks)
plt.xlabel('$Datasource:Eurostat$')
plt.ylabel('Number of individuals')
plt.legend(loc='upper left')
plt.show()

About swdevperestroika

High tech industry veteran, avid hacker reluctantly transformed to mgmt consultant.
This entry was posted in Data Analytics, Politik, Python, Statistics, Sverige and tagged , , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out /  Change )

Twitter picture

You are commenting using your Twitter account. Log Out /  Change )

Facebook photo

You are commenting using your Facebook account. Log Out /  Change )

Connecting to %s