Data Wangling
Simple calculation using Pandas
import pandas as pd
#Simple Calculation
#Count
count_1 = df['var1'].count() #Count of one column (example: cases)
count_2 = df[['var1','var2']].count() #Count of more than one columns (example: cases, deaths)
count_all = df.count() #Count all varialbes in the table
#mean
mean_value = df['var1'].mean()
# mean = round(df['var1'].mean(),2) #If decimal places are needed
#standand deviation
std_value = df['var1'].std()
print('Descriptive statistics of cases')
print('Count:',count_1)
print('Mean',mean_value )
print('Stand Deviation',std_value)
Descriptive statistics
import pandas as pd
#Descriptive Statistics of one varialbe
descriptive_stats_1 = df['var1'].describe()
#Descriptive Statistics of more than one varialbes
descriptive_stats_2 = df[['var1','deaths']].describe()
#Descriptive Statistics of all numbercal varialbes in a dataframe
descriptive_stats_all = df.describe()
Extract a subset by columns and rows
subset_1 = df[['var1','var2','var3']] #Subset by column name(s)
subset_2 = df[df["var1"] > 100] #Select rows by value (example: value =100)
Data aggregation
#Collapse
#Collapse data by one varialbe, one aggregation method
df_1 = df.groupby(['var1'], dropna=True).sum().reset_index()
#Collapse data by one varialbe, by more than one aggregation methods
df_2 = df.groupby(['var1']).agg({'var2':['sum'], 'var3':['mean']}).reset_index() #var2, var3 need to be numeric data
Last updated