In [9]:
# Plotly setup for VS Code
import plotly.io as pio
pio.renderers.default = 'notebook'

import plotly.offline as py
py.init_notebook_mode(connected=True)
In [10]:
#import all the necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
In [11]:
import plotly.figure_factory as ff
#import all the necessary functions and classes
import plotly.express as px
import plotly.offline as py
py.offline.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.figure_factory import create_table
In [12]:
py.offline.init_notebook_mode(connected=True)
In [38]:
gapminder = px.data.gapminder()  #loading from plotly express

# Step 1: Check for missing values
print("Missing values per column:")
print(gapminder.isnull().sum())
Missing values per column:
country      0
continent    0
year         0
lifeExp      0
pop          0
gdpPercap    0
iso_alpha    0
iso_num      0
dtype: int64
In [39]:
# Step 2: Check for duplicates
duplicate_rows = gapminder.duplicated()
print(f"Number of duplicate rows: {duplicate_rows.sum()}")
Number of duplicate rows: 0
In [40]:
# Step 3: Validate data types
print("\nData types of each column:")
print(gapminder.dtypes)
Data types of each column:
country       object
continent     object
year           int64
lifeExp      float64
pop            int64
gdpPercap    float64
iso_alpha     object
iso_num        int64
dtype: object
In [41]:
# Step 4: Categorical consistency
print("\nUnique continents:")
print(gapminder['continent'].unique())

print("\nNumber of unique countries:")
print(gapminder['country'].nunique())
Unique continents:
['Asia' 'Europe' 'Africa' 'Americas' 'Oceania']

Number of unique countries:
142
In [42]:
# Step 5: Summary stats and outlier check
numerical_cols = ['lifeExp', 'pop', 'gdpPercap']
print("\nSummary statistics for numerical columns:")
print(gapminder[numerical_cols].describe())
Summary statistics for numerical columns:
           lifeExp           pop      gdpPercap
count  1704.000000  1.704000e+03    1704.000000
mean     59.474439  2.960121e+07    7215.327081
std      12.917107  1.061579e+08    9857.454543
min      23.599000  6.001100e+04     241.165876
25%      48.198000  2.793664e+06    1202.060309
50%      60.712500  7.023596e+06    3531.846989
75%      70.845500  1.958522e+07    9325.462346
max      82.603000  1.318683e+09  113523.132900
In [43]:
gapminder.head()
Out[43]:
country continent year lifeExp pop gdpPercap iso_alpha iso_num
0 Afghanistan Asia 1952 28.801 8425333 779.445314 AFG 4
1 Afghanistan Asia 1957 30.332 9240934 820.853030 AFG 4
2 Afghanistan Asia 1962 31.997 10267083 853.100710 AFG 4
3 Afghanistan Asia 1967 34.020 11537966 836.197138 AFG 4
4 Afghanistan Asia 1972 36.088 13079460 739.981106 AFG 4
In [44]:
#check the shape of the data
gapminder.shape
Out[44]:
(1704, 8)
In [45]:
#check the columns
gapminder.columns
Out[45]:
Index(['country', 'continent', 'year', 'lifeExp', 'pop', 'gdpPercap',
       'iso_alpha', 'iso_num'],
      dtype='object')
In [46]:
#check the info of the data
gapminder.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1704 entries, 0 to 1703
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   country    1704 non-null   object 
 1   continent  1704 non-null   object 
 2   year       1704 non-null   int64  
 3   lifeExp    1704 non-null   float64
 4   pop        1704 non-null   int64  
 5   gdpPercap  1704 non-null   float64
 6   iso_alpha  1704 non-null   object 
 7   iso_num    1704 non-null   int64  
dtypes: float64(2), int64(3), object(3)
memory usage: 106.6+ KB
In [47]:
#describe the data
gapminder.describe()
Out[47]:
year lifeExp pop gdpPercap iso_num
count 1704.00000 1704.000000 1.704000e+03 1704.000000 1704.000000
mean 1979.50000 59.474439 2.960121e+07 7215.327081 425.880282
std 17.26533 12.917107 1.061579e+08 9857.454543 248.305709
min 1952.00000 23.599000 6.001100e+04 241.165876 4.000000
25% 1965.75000 48.198000 2.793664e+06 1202.060309 208.000000
50% 1979.50000 60.712500 7.023596e+06 3531.846989 410.000000
75% 1993.25000 70.845500 1.958522e+07 9325.462346 638.000000
max 2007.00000 82.603000 1.318683e+09 113523.132900 894.000000
In [48]:
gapminder['country'].value_counts()   #check the records for each country
Out[48]:
country
Afghanistan           12
Albania               12
Algeria               12
Angola                12
Argentina             12
                      ..
Vietnam               12
West Bank and Gaza    12
Yemen, Rep.           12
Zambia                12
Zimbabwe              12
Name: count, Length: 142, dtype: int64
In [49]:
gapminder['continent'].value_counts()  #check the records for each continent
Out[49]:
continent
Africa      624
Asia        396
Europe      360
Americas    300
Oceania      24
Name: count, dtype: int64
In [50]:
import plotly.figure_factory as ff

# Create a Plotly table from the first few rows of the Gapminder dataset
table_fig = ff.create_table(gapminder.head())

# Show the table
table_fig.show()

Barc chart¶

In [51]:
#query the data for canada
canada_data = px.data.gapminder().query("country == 'Canada'")
canada_data
Out[51]:
country continent year lifeExp pop gdpPercap iso_alpha iso_num
240 Canada Americas 1952 68.750 14785584 11367.16112 CAN 124
241 Canada Americas 1957 69.960 17010154 12489.95006 CAN 124
242 Canada Americas 1962 71.300 18985849 13462.48555 CAN 124
243 Canada Americas 1967 72.130 20819767 16076.58803 CAN 124
244 Canada Americas 1972 72.880 22284500 18970.57086 CAN 124
245 Canada Americas 1977 74.210 23796400 22090.88306 CAN 124
246 Canada Americas 1982 75.760 25201900 22898.79214 CAN 124
247 Canada Americas 1987 76.860 26549700 26626.51503 CAN 124
248 Canada Americas 1992 77.950 28523502 26342.88426 CAN 124
249 Canada Americas 1997 78.610 30305843 28954.92589 CAN 124
250 Canada Americas 2002 79.770 31902268 33328.96507 CAN 124
251 Canada Americas 2007 80.653 33390141 36319.23501 CAN 124
In [52]:
px.bar(canada_data,x='year',y='lifeExp',height=400)   #plot a barchart for year and lifexp
In [53]:
px.bar(canada_data,x='year',y='pop',color='lifeExp',height=400,
      labels={'pop':'population'})   # add color to the barchart to see life exp and on y axis plot population

Life Expectency vs gdp per capita

In [54]:
#query the data for 2007
gapminder2007 = gapminder.query('year == 2007')
gapminder2007
Out[54]:
country continent year lifeExp pop gdpPercap iso_alpha iso_num
11 Afghanistan Asia 2007 43.828 31889923 974.580338 AFG 4
23 Albania Europe 2007 76.423 3600523 5937.029526 ALB 8
35 Algeria Africa 2007 72.301 33333216 6223.367465 DZA 12
47 Angola Africa 2007 42.731 12420476 4797.231267 AGO 24
59 Argentina Americas 2007 75.320 40301927 12779.379640 ARG 32
... ... ... ... ... ... ... ... ...
1655 Vietnam Asia 2007 74.249 85262356 2441.576404 VNM 704
1667 West Bank and Gaza Asia 2007 73.422 4018332 3025.349798 PSE 275
1679 Yemen, Rep. Asia 2007 62.698 22211743 2280.769906 YEM 887
1691 Zambia Africa 2007 42.384 11746035 1271.211593 ZMB 894
1703 Zimbabwe Africa 2007 43.487 12311143 469.709298 ZWE 716

142 rows × 8 columns

In [55]:
px.scatter(gapminder2007,x='gdpPercap',y='lifeExp')  #plot a scatter plot between grdpPercap vs lifeExp
In [56]:
px.scatter(gapminder2007,x='gdpPercap',y='lifeExp',color='continent')  #add color based on the continent
In [57]:
#draw a bubble plot
px.scatter(gapminder2007,x='gdpPercap',y='lifeExp',color='continent',size='pop',size_max=60)
In [58]:
#add hover name - country
px.scatter(gapminder2007,x='gdpPercap',y='lifeExp',color='continent',size='pop',size_max=60,
          hover_name='country')
In [59]:
#facetplot
px.scatter(gapminder, x = 'gdpPercap',y='lifeExp',color='continent',size='pop',
          size_max=60, hover_name='country',facet_col='continent')
In [60]:
#facetplot
px.scatter(gapminder, x = 'gdpPercap',y='lifeExp',color='continent',size='pop',
          size_max=60, hover_name='country',facet_col='continent',log_x=True)
In [61]:
#adding animation to bubble plot
px.scatter(gapminder,x='gdpPercap',y='lifeExp',color='continent',size='pop',size_max=40,
          hover_name='country',log_x=True,animation_frame='year',
          animation_group='country',range_y=[23,90])
In [62]:
#plot geographical maps
px.choropleth(gapminder,locations='iso_alpha',color='lifeExp',hover_name='country',
             animation_frame='year',color_continuous_scale=px.colors.sequential.Plasma,projection='natural earth')