Data Import

Import CSV & Excel files from local directory

Import csv files from local directory pandas tutorial: pandas.read_csv

#Uplocad csv file from your local directory
from google.colab import files
uploaded = files.upload()

import pandas as pd
df = pd.read_csv('FILENAME.csv')  #The filename of the uplpaded csv file
#df.shape
df.head()

Import Excel files pandas tutorial: pandas.read_excel

#Uplocad Excel file from your local directory
from google.colab import files
uploaded = files.upload()

import pandas as pd
df = pd.read_excel('FILENAME.xlsx')  #The filename of the uplpaded csv file
#df.shape
df.head()

Import data from URL

#Example: NYT Github COVID-19 data
#https://raw.githubusercontent.com/nytimes/covid-19-data/master/live/us-counties.csv
url = 'The URL of data'
df = pd.read_csv(url)
#df.shape
df.head()

Google Drive Import

Read data by Google Sheets Name

#gspread setup
!pip install --upgrade gspread

#Authenticate access to your Google Drive
from google.colab import auth
auth.authenticate_user()

import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())


import pandas as pd
worksheet = gc.open('Google Sheets NAME').sheet1   
rows = worksheet.get_all_values()    # get_all_values gives a list of rows.
df = pd.DataFrame.from_records(rows)  # Convert to a DataFrame and render.
df.head()

Read data by Google Sheets ID

#gspread setup
!pip install --upgrade gspread

#Authenticate access to your Google Drive
from google.colab import auth
auth.authenticate_user()

import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())


worksheet = gc.open_by_key('Google Sheets ID').worksheet('NAME OF A SHEET TAB')    #Call by Sheet ID & Name
rows = worksheet.get_all_values()    # get_all_values gives a list of rows.
df = pd.DataFrame.from_records(rows[1:], columns=rows[0])   # Convert to a DataFrame and render. 1st Row as Headers
df.head()
!pip install -U -q PyDrive 

from pydrive.auth import GoogleAuth 
from pydrive.drive import GoogleDrive 
from google.colab import auth 
from oauth2client.client import GoogleCredentials 


# Authenticate and create the PyDrive client. 
auth.authenticate_user() 
gauth = GoogleAuth() 
gauth.credentials = GoogleCredentials.get_application_default() 
drive = GoogleDrive(gauth)   #Copy and paste Google Authentication code

link = 'SHARING LINK'  #The sharing link of the data file stored on your Google Drive
id = link.split("/")[-2]
#print(id)
downloaded = drive.CreateFile({'id':id})  
downloaded.GetContentFile('covid_county.csv')   
df = pd.read_csv('covid_county.csv') 
df.head()

Read CSV file from Google Drive - by mounting Google Drive

#Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')  

path = '/content/drive/PATH TO THE FILE'
df = pd.read_csv(path)
df.head()

Last updated