Step 1: Go to your Google Workspace Marketplace and search for "colab"
Step 2: Locate "Colaboratory" and click add to drive
Step 3: Access Colab as using a regular Google doc
Basic Layout in Google Colab
Results Export
Export data to csv file to local directory
Export data to CSV file to Google Drive
Export plot image to local directory
from google.colab import filesdf.to_csv('FILENAME.csv')files.download('FILENAME.csv')#Download winder pops out
from google.colab import drivedrive.mount('/content/drive')#Copy and paste Google Authentication codedf.to_csv('/content/drive/PATH TO Google Drive FOLDER')
Import csv files from local directory
pandas tutorial: pandas.read_csv
#Uplocad csv file from your local directoryfrom google.colab import filesuploaded = files.upload()import pandas as pddf = pd.read_csv('FILENAME.csv')#The filename of the uplpaded csv file#df.shapedf.head()
#Uplocad Excel file from your local directoryfrom google.colab import filesuploaded = files.upload()import pandas as pddf = pd.read_excel('FILENAME.xlsx')#The filename of the uplpaded csv file#df.shapedf.head()
Import data from URL
#Example: NYT Github COVID-19 data#https://raw.githubusercontent.com/nytimes/covid-19-data/master/live/us-counties.csvurl ='The URL of data'df = pd.read_csv(url)#df.shapedf.head()
Google Drive Import
Read data by Google Sheets Name
#gspread setup!pip install --upgrade gspread#Authenticate access to your Google Drivefrom google.colab import authauth.authenticate_user()import gspreadfrom oauth2client.client import GoogleCredentialsgc = gspread.authorize(GoogleCredentials.get_application_default())import pandas as pdworksheet = gc.open('Google Sheets NAME').sheet1 rows = worksheet.get_all_values()# get_all_values gives a list of rows.df = pd.DataFrame.from_records(rows)# Convert to a DataFrame and render.df.head()
Read data by Google Sheets ID
#gspread setup!pip install --upgrade gspread#Authenticate access to your Google Drivefrom google.colab import authauth.authenticate_user()import gspreadfrom oauth2client.client import GoogleCredentialsgc = gspread.authorize(GoogleCredentials.get_application_default())worksheet = gc.open_by_key('Google Sheets ID').worksheet('NAME OF A SHEET TAB')#Call by Sheet ID & Namerows = worksheet.get_all_values()# get_all_values gives a list of rows.df = pd.DataFrame.from_records(rows[1:], columns=rows[0])# Convert to a DataFrame and render. 1st Row as Headersdf.head()
Read CSV file from Google Drive - by sharing link
!pip install -U -q PyDrive from pydrive.auth import GoogleAuth from pydrive.drive import GoogleDrive from google.colab import auth from oauth2client.client import GoogleCredentials # Authenticate and create the PyDrive client. auth.authenticate_user()gauth =GoogleAuth()gauth.credentials = GoogleCredentials.get_application_default()drive =GoogleDrive(gauth)#Copy and paste Google Authentication codelink ='SHARING LINK'#The sharing link of the data file stored on your Google Driveid= link.split("/")[-2]#print(id)downloaded = drive.CreateFile({'id':id})downloaded.GetContentFile('covid_county.csv')df = pd.read_csv('covid_county.csv')df.head()
Read CSV file from Google Drive - by mounting Google Drive
#Mount Google Drivefrom google.colab import drivedrive.mount('/content/drive')path ='/content/drive/PATH TO THE FILE'df = pd.read_csv(path)df.head()
Google Colab
In this tutorial, you will learn how to install Google Colab in your Google Drive, and use Colab to perform a number of data tasks including:
import pandas as pd#Simple Calculation#Countcount_1 = df['var1'].count()#Count of one column (example: cases)count_2 = df[['var1','var2']].count()#Count of more than one columns (example: cases, deaths)count_all = df.count()#Count all varialbes in the table#meanmean_value = df['var1'].mean()# mean = round(df['var1'].mean(),2) #If decimal places are needed#standand deviationstd_value = df['var1'].std()print('Descriptive statistics of cases')print('Count:',count_1)print('Mean',mean_value )print('Stand Deviation',std_value)
Descriptive statistics
import pandas as pd#Descriptive Statistics of one varialbedescriptive_stats_1 = df['var1'].describe()#Descriptive Statistics of more than one varialbesdescriptive_stats_2 = df[['var1','deaths']].describe()#Descriptive Statistics of all numbercal varialbes in a dataframedescriptive_stats_all = df.describe()
Extract a subset by columns and rows
subset_1 = df[['var1','var2','var3']]#Subset by column name(s)subset_2 = df[df["var1"]>100]#Select rows by value (example: value =100)
Data aggregation
#Collapse#Collapse data by one varialbe, one aggregation methoddf_1 = df.groupby(['var1'], dropna=True).sum().reset_index()#Collapse data by one varialbe, by more than one aggregation methodsdf_2 = df.groupby(['var1']).agg({'var2':['sum'], 'var3':['mean']}).reset_index()#var2, var3 need to be numeric data
Visualization
Exemplar data: COVID-19 case and death data (date: 202012006)