import pandas as pd#Simple Calculation#Countcount_1 = df['var1'].count()#Count of one column (example: cases)count_2 = df[['var1','var2']].count()#Count of more than one columns (example: cases, deaths)count_all = df.count()#Count all varialbes in the table#meanmean_value = df['var1'].mean()# mean = round(df['var1'].mean(),2) #If decimal places are needed#standand deviationstd_value = df['var1'].std()print('Descriptive statistics of cases')print('Count:',count_1)print('Mean',mean_value )print('Stand Deviation',std_value)
Descriptive statistics
import pandas as pd#Descriptive Statistics of one varialbedescriptive_stats_1 = df['var1'].describe()#Descriptive Statistics of more than one varialbesdescriptive_stats_2 = df[['var1','deaths']].describe()#Descriptive Statistics of all numbercal varialbes in a dataframedescriptive_stats_all = df.describe()
Extract a subset by columns and rows
subset_1 = df[['var1','var2','var3']]#Subset by column name(s)subset_2 = df[df["var1"]>100]#Select rows by value (example: value =100)
Data aggregation
#Collapse#Collapse data by one varialbe, one aggregation methoddf_1 = df.groupby(['var1'], dropna=True).sum().reset_index()#Collapse data by one varialbe, by more than one aggregation methodsdf_2 = df.groupby(['var1']).agg({'var2':['sum'], 'var3':['mean']}).reset_index()#var2, var3 need to be numeric data