import pandas as pd


series = pd.Series(["BMW", "Toyota", "Honda"])


series

0       BMW
1    Toyota
2     Honda
dtype: object


colors = pd.Series(["Red", "White", "Blue"])


car_data = pd.DataFrame({"Car Make": series, "Color": colors})
car_data


cars = pd.read_csv("./data/cars.csv")
cars

# You can also import a csv directly from a URL


# Exporting a dataframe
cars.to_csv("./data/exported-cars.csv", index=False)


cars.dtypes

Make        object
Colour      object
Odometer     int64
Doors        int64
Price       object
dtype: object


cars.columns

Index(['Make', 'Colour', 'Odometer', 'Doors', 'Price'], dtype='object')


cars.index

RangeIndex(start=0, stop=10, step=1)


cars.describe()


# dataframe.info() provides info from df.dtypes & df.index

cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Make      10 non-null     object
 1   Colour    10 non-null     object
 2   Odometer  10 non-null     int64 
 3   Doors     10 non-null     int64 
 4   Price     10 non-null     object
dtypes: int64(2), object(3)
memory usage: 528.0+ bytes


len(cars)

10


# Selecting a specific column
cars["Odometer"].sum()

786014


cars.head()


cars.tail(3)


elements = pd.Series(["water", "earth", "fire", "air", "lightning", "metal", "blood", "lava"],
                    index=[1,2,3,4,3,2,1,2])
elements

1        water
2        earth
3         fire
4          air
3    lightning
2        metal
1        blood
2         lava
dtype: object


elements.loc[2] # On a series

2    earth
2    metal
2     lava
dtype: object


elements.iloc[2]

'fire'


elements.iloc[4]

'lightning'


# .iloc[:p] get all items up to position p (exclusive)

elements.iloc[:4]

1    water
2    earth
3     fire
4      air
dtype: object


cars.loc[3]

Make               BMW
Colour           Black
Odometer         11179
Doors                5
Price       $22,000.00
Name: 3, dtype: object


# In the case of a dataframe, index matches the position

cars.iloc[:3] # Fetch records up to position 3 (exclusive)


# Filtering by a column

cars[cars["Make"] == "Toyota"]


cars[cars["Odometer"] > 75000]


pd.crosstab(cars["Make"], cars["Doors"])


cars.groupby(["Make"]).mean()


cars["Odometer"].plot()

<AxesSubplot:>


cars["Odometer"].hist()

<AxesSubplot:>


if (cars["Price"].dtype == 'object'):
    cars["Price"] = cars["Price"].str.replace('[\$\,]', '', regex=True).astype(float)

cars.head()


cars["Price"].dtype

dtype('float64')


cars["Price"].plot()

<AxesSubplot:>


cars["Make"].str.lower()

0    toyota
1     honda
2    toyota
3       bmw
4    nissan
5    toyota
6     honda
7     honda
8    toyota
9    nissan
Name: Make, dtype: object


cars_missing_data = pd.read_csv("./data/cars-missing-data.csv")
cars_missing_data


cars_missing_data["Odometer"].fillna(cars_missing_data["Odometer"].mean(), inplace=True)
cars_missing_data


cars_missing_dropped = cars_missing_data.dropna()
cars_missing_dropped


# Adding column from Pandas Series

seat_count = pd.Series([5, 5, 5, 5, 5, 5]) # Create Series

cars["Seats"] = seat_count # Add series as 'Seats' column to end of dataframe
cars


cars["Seats"].fillna(5, inplace=True)
cars


# Adding column from Python List

fuel_economy_mpg = [25.3, 19.2, 22.0, 28, 36, 26.9, 24.4, 24.6, 26.5, 29.8] # Length must match dataframe size exactly

cars["Fuel Efficiency"] = fuel_economy_mpg
cars


# Apply an operation to each element of a dataframe column

cars["Odometer"] = cars["Odometer"].apply(lambda km: km / 1.6093) # Convert kilometers to miles
cars


cars["Total Fuel Consumption"] = cars["Odometer"] / cars["Fuel Efficiency"]
cars


# Creating column from a single value

cars["Wheels"] = 4
cars


cars["Passed Road Safety"] = True
cars.dtypes

Make                       object
Colour                     object
Odometer                  float64
Doors                       int64
Price                     float64
Seats                     float64
Fuel Efficiency           float64
Total Fuel Consumption    float64
Wheels                      int64
Passed Road Safety           bool
dtype: object


cars


if "Wheels" in cars.columns:
    del cars["Wheels"]

cars


# Select a percentage of the dataframe in random order

cars_shuffled = cars.sample(frac=1)
cars_shuffled


cars_shuffled.reset_index(drop=True, inplace=True)
cars_shuffled

	Odometer	Doors
count	10.000000	10.000000
mean	78601.400000	4.000000
std	61983.471735	0.471405
min	11179.000000	3.000000
25%	35836.250000	4.000000
50%	57369.000000	4.000000
75%	96384.500000	4.000000
max	213095.000000	5.000000

	Odometer	Doors
Make
BMW	11179.000000	5.00
Honda	62778.333333	4.00
Nissan	122347.500000	4.00
Toyota	85451.250000	3.75

	Make	Colour	Odometer	Doors	Price
0	Toyota	White	150043.0	4.0	$4,000
1	Honda	Red	87899.0	4.0	$5,000
2	Toyota	Blue	NaN	3.0	$7,000
3	BMW	Black	11179.0	5.0	$22,000
4	Nissan	White	213095.0	4.0	$3,500
5	Toyota	Green	NaN	4.0	$4,500
6	Honda	NaN	NaN	4.0	$7,500
7	Honda	Blue	NaN	4.0	NaN
8	Toyota	White	60000.0	NaN	NaN
9	NaN	White	31600.0	4.0	$9,700

	Make	Colour	Odometer	Doors	Price
0	Toyota	White	150043.000000	4.0	$4,000
1	Honda	Red	87899.000000	4.0	$5,000
2	Toyota	Blue	92302.666667	3.0	$7,000
3	BMW	Black	11179.000000	5.0	$22,000
4	Nissan	White	213095.000000	4.0	$3,500
5	Toyota	Green	92302.666667	4.0	$4,500
6	Honda	NaN	92302.666667	4.0	$7,500
7	Honda	Blue	92302.666667	4.0	NaN
8	Toyota	White	60000.000000	NaN	NaN
9	NaN	White	31600.000000	4.0	$9,700

	Make	Colour	Odometer	Doors	Price
0	Toyota	White	150043.000000	4.0	$4,000
1	Honda	Red	87899.000000	4.0	$5,000
2	Toyota	Blue	92302.666667	3.0	$7,000
3	BMW	Black	11179.000000	5.0	$22,000
4	Nissan	White	213095.000000	4.0	$3,500
5	Toyota	Green	92302.666667	4.0	$4,500

Intro to Pandas¶

Describing Data¶

Viewing and Selecting Data¶

.head(x) & .tail(x) methods¶

.loc(i) & .iloc(p) methods¶

On a Series¶

On a DataFrame¶

pd.crosstab(col1, col2)¶

Matplotlib Setup¶

Manipulating Data¶

Modifying a DataFrame Column¶

2 Methods¶

Deleting a DataFrame Column¶

2 Methods¶

Data Randomization¶

Pandas Docs¶

	Make	Colour	Odometer	Doors	Price
0	Toyota	White	150043	4	$4,000.00
1	Honda	Red	87899	4	$5,000.00
2	Toyota	Blue	32549	3	$7,000.00
3	BMW	Black	11179	5	$22,000.00
4	Nissan	White	213095	4	$3,500.00
5	Toyota	Green	99213	4	$4,500.00
6	Honda	Blue	45698	4	$7,500.00
7	Honda	Blue	54738	4	$7,000.00
8	Toyota	White	60000	4	$6,250.00
9	Nissan	White	31600	4	$9,700.00

	Make	Colour	Odometer	Doors	Price	Seats	Fuel Efficiency
0	Toyota	White	93234.946871	4	4000.0	5.0	25.3
1	Honda	Red	54619.399739	4	5000.0	5.0	19.2
2	Toyota	Blue	20225.563910	3	7000.0	5.0	22.0
3	BMW	Black	6946.498478	5	22000.0	5.0	28.0
4	Nissan	White	132414.714472	4	3500.0	5.0	36.0
5	Toyota	Green	61649.785621	4	4500.0	5.0	26.9
6	Honda	Blue	28396.197104	4	7500.0	5.0	24.4
7	Honda	Blue	34013.546262	4	7000.0	5.0	24.6
8	Toyota	White	37283.290872	4	6250.0	5.0	26.5
9	Nissan	White	19635.866526	4	9700.0	5.0	29.8