import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


x = np.array([1, 2, 3, 4])
y = np.array([11, 22, 33, 44])
plt.plot(x, y);


# 1st Method – pyplot API

fig = plt.figure()
ax = fig.add_subplot()
ax.plot(x,y);


# 2nd Method – Object-oriented API (recommended)

x = np.array([1, 2, 3, 4, 5])

fig, ax = plt.subplots()
ax.plot(x, np.random.randint(25, 250, size=(5,3)));


# 0. Import matlib

# 1. Prepare data
x = np.array([1, 2, 3, 4, 5])
y = np.array([11, 22, 33, 44, 55])

# 2. Setup plot
fig, ax = plt.subplots(figsize=(5,5))

# 3. Plot data
ax.plot(x, y)

# 4. Customize plot
ax.set(title="Simple Plot",
       xlabel="Index",
       ylabel="Values")

# 5. Save & Share
fig.savefig('plots/HelloPlot.png')


x = np.linspace(0.1, 10, 100) # start & stop inclusive
x

array([ 0.1,  0.2,  0.3,  0.4,  0.5,  0.6,  0.7,  0.8,  0.9,  1. ,  1.1,
        1.2,  1.3,  1.4,  1.5,  1.6,  1.7,  1.8,  1.9,  2. ,  2.1,  2.2,
        2.3,  2.4,  2.5,  2.6,  2.7,  2.8,  2.9,  3. ,  3.1,  3.2,  3.3,
        3.4,  3.5,  3.6,  3.7,  3.8,  3.9,  4. ,  4.1,  4.2,  4.3,  4.4,
        4.5,  4.6,  4.7,  4.8,  4.9,  5. ,  5.1,  5.2,  5.3,  5.4,  5.5,
        5.6,  5.7,  5.8,  5.9,  6. ,  6.1,  6.2,  6.3,  6.4,  6.5,  6.6,
        6.7,  6.8,  6.9,  7. ,  7.1,  7.2,  7.3,  7.4,  7.5,  7.6,  7.7,
        7.8,  7.9,  8. ,  8.1,  8.2,  8.3,  8.4,  8.5,  8.6,  8.7,  8.8,
        8.9,  9. ,  9.1,  9.2,  9.3,  9.4,  9.5,  9.6,  9.7,  9.8,  9.9,
       10. ])


fig, ax = plt.subplots()
ax.plot(x, x**2);


fig, ax = plt.subplots()
ax.scatter(x, np.exp(x));


fig, ax = plt.subplots()
ax.scatter(x, np.sin(x));


tea_prices = {"Oolong Tea": 4.75,
              "Jasmine Tea": 5.50, 
              "Green Tea": 5.00,
              "Matcha": 6.00,
              "Hot Cocoa": 6.25}

fig, ax = plt.subplots()

ax.bar(
    tea_prices.keys(),
    tea_prices.values())

ax.set(title="Iroh's Tea Shop",
            xlabel="Drink",
            ylabel="Price ($)");


water_required_to_produce_food = {"Chocolate": 17196,
                                  "Beef": 15415,
                                  "Sheep Meat": 10412,
                                  "Pork": 5988,
                                  "Butter": 5553,
                                  "Chicken meat": 4325,
                                  "Cheese": 3178,
                                  "Olives": 3025,
                                  "Rice": 2497,
                                  "Dry Pasta": 1849,
                                  "Bread": 1608,
                                  "Pizza (Single)": 1239,
                                  "Apple": 822,
                                  "Banana": 790,
                                  "Potatoes": 287,
                                  "Milk": 255,
                                  "Cabbage": 237,
                                  "Tomato": 214,
                                  "Egg (Single)": 196,
                                  "Wine": 109,
                                  "Beer": 74}

fig, ax = plt.subplots(figsize=(12,8))
ax.barh(
    list(water_required_to_produce_food.keys()),
    list(water_required_to_produce_food.values()))

ax.invert_yaxis()

ax.set(
    title="How much water is needed to produce 1kg of food?",
    xlabel="Water Consumed (L)",
    ylabel="Food");

# fig.savefig('./plots/water-for-food-production.jpg')


x = np.random.randn(128)

fig, ax = plt.subplots()

ax.hist(x);


# Explicit

fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(10,5))

ax1.plot(x, abs(x)/2)
ax2.scatter(
    np.random.random(10),
    np.random.random(10))
ax3.bar(
    tea_prices.keys(),
    tea_prices.values())
ax4.hist(x)

(array([ 3.,  6., 11., 14., 26., 29., 24.,  9.,  3.,  3.]),
 array([-2.83220509, -2.26084055, -1.68947602, -1.11811149, -0.54674695,
         0.02461758,  0.59598212,  1.16734665,  1.73871119,  2.31007572,
         2.88144026]),
 <BarContainer object of 10 artists>)


# Implicit 

fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(10,5))

ax[0,0].plot(x, abs(x)/2)
ax[0,1].scatter(
    np.random.random(10),
    np.random.random(10))
ax[1,0].bar(
    tea_prices.keys(),
    tea_prices.values())
ax[1,1].hist(x);


timeseries = pd.Series(np.random.randn(365),
                       index=pd.date_range("1/1/2021", periods=365))
timeseries

2021-01-01    0.198657
2021-01-02    0.811075
2021-01-03   -0.092062
2021-01-04   -2.006635
2021-01-05    0.218282
                ...   
2021-12-27   -0.196479
2021-12-28    1.244913
2021-12-29    0.314448
2021-12-30   -0.464284
2021-12-31   -1.507438
Freq: D, Length: 365, dtype: float64


timeseries.plot();


timeseries.cumsum().plot();


simple_series = np.array([1, 2, 3, 4, 5])

cumsum_example = pd.DataFrame(simple_series, columns=["Seq"])
cumsum_example["Cum"] = cumsum_example["Seq"].cumsum()
cumsum_example


cars = pd.read_csv('data/cars.csv')
cars


if (cars["Price"].dtype == 'object'):
    cars["Price"] = cars["Price"].str.replace('[$,]', '', regex=True).astype(float)
    
cars.head()


cars["Sale Date"] = pd.date_range("1/1/2020", periods=len(cars), freq="W")
cars


cars["Total Sales"] = cars["Price"].cumsum()
cars


cars.plot(x="Sale Date", y="Total Sales");


cars.plot(x="Odometer", y="Price", kind="scatter");


x = np.random.rand(10, 4)

df = pd.DataFrame(x, columns=['a', 'b', 'c', 'd'])
df


df.plot.bar();


df.plot(kind="bar");


avg_odometer_by_make = cars.groupby(["Make"]).mean()["Odometer"]
avg_odometer_by_make

Make
BMW        11179.000000
Honda      62778.333333
Nissan    122347.500000
Toyota     85451.250000
Name: Odometer, dtype: float64


avg_odometer_by_make.plot(kind="bar");


cars["Odometer"].plot(kind="hist", bins=10); # 10 bins by default


heart_disease = pd.read_csv("./data/heart-disease.csv")
heart_disease.head()


heart_disease["age"].plot(kind="hist", bins=20);


heart_disease.plot.hist(figsize=(20,24), subplots=True);


over_50 = heart_disease[heart_disease["age"] > 50]
over_50


# Pyplot Method

over_50.plot(kind='scatter', figsize=(12, 8),
             x='age',
             y='chol',
             c='target');


# OO Method

fig, ax = plt.subplots(figsize=(12, 8))

# ax.set_xlim([45, 100]);

over_50.plot(kind='scatter',
             x='age',
             y='chol',
             c='target',
             ax=ax)

<AxesSubplot:xlabel='age', ylabel='chol'>


# Pure OO Method

fig, ax = plt.subplots(figsize=(12, 8))

# Plot the data
heart_disease_scatter = ax.scatter(x=over_50["age"],
                                        y=over_50["chol"],
                                        c=over_50["target"])
# Customize the labels
ax.set(title="Heart Disease and Cholestrol Levels",
       xlabel="Age",
       ylabel="Cholestrol (mg/dL)")

# Add a legend
ax.legend(*heart_disease_scatter.legend_elements(), title="Target");

# Add a line to show avg cholestrol level
ax.axhline(over_50["chol"].mean(),
           linestyle='--');


# Subplot of chol & thalach by age

fig, (ax0, ax1) = plt.subplots(nrows=2,
                               ncols=1,
                               sharex=True,
                               figsize=(10,10))
# Plot 1: Cholestrol by Age
hd_plot1 = ax0.scatter(x=over_50["age"],
                       y=over_50["chol"],
                       c=over_50["target"])

ax0.set(title="Cholestrol Levels",
        ylabel="Cholestrol")

ax0.legend(*hd_plot1.legend_elements(), title="Heart Disease")
ax0.axhline(over_50["chol"].mean(),
            linestyle='--')

# Plot 2: Thalach by Age
hd_plot2 = ax1.scatter(x=over_50["age"],
                       y=over_50["thalach"],
                       c=over_50["target"])

ax1.set(title="Max Heart Rate",
        xlabel="Age",
        ylabel="Max Heart Rate (bpm)")

ax1.legend(*hd_plot2.legend_elements(), title="Heart Disease")
ax1.axhline(over_50["thalach"].mean(),
            linestyle='--')

# Title the figure
fig.suptitle("Heart Disease Analysis", fontsize=16, fontweight="bold");


plt.style.available

['Solarize_Light2',
 '_classic_test_patch',
 'bmh',
 'classic',
 'dark_background',
 'fast',
 'fivethirtyeight',
 'ggplot',
 'grayscale',
 'seaborn',
 'seaborn-bright',
 'seaborn-colorblind',
 'seaborn-dark',
 'seaborn-dark-palette',
 'seaborn-darkgrid',
 'seaborn-deep',
 'seaborn-muted',
 'seaborn-notebook',
 'seaborn-paper',
 'seaborn-pastel',
 'seaborn-poster',
 'seaborn-talk',
 'seaborn-ticks',
 'seaborn-white',
 'seaborn-whitegrid',
 'tableau-colorblind10']


plt.style.use('seaborn')

cars["Price"].plot();


x = np.random.randn(10,4)

df = pd.DataFrame(x, columns=['a', 'b', 'c', 'd'])

ax = df.plot(kind='bar')

ax.set(title="Random Numbers",
       xlabel="Index",
       ylabel="Value")

ax.legend().set_visible(True);


# Set style
plt.style.use('seaborn-whitegrid')

# Pure OO Method
fig, ax = plt.subplots(figsize=(10, 6))

# Plot the data
heart_disease_scatter = ax.scatter(x=over_50["age"],
                                   y=over_50["chol"],
                                   c=over_50["target"],
                                   cmap='winter')
# Customize the labels
ax.set(title="Cholestrol Levels and Heart Disease",
       xlabel="Age",
       ylabel="Cholestrol (mg/dL)")

# Add a legend
ax.legend(*heart_disease_scatter.legend_elements(), title="Heart Disease");

# Add a line to show avg cholestrol level
ax.axhline(over_50["chol"].mean(),
           linestyle='--');


# Subplot of chol & thalach by age

fig, (ax0, ax1) = plt.subplots(nrows=2,
                               ncols=1,
                               sharex=True,
                               figsize=(10,10))
# Plot 1: Cholestrol by Age
hd_plot1 = ax0.scatter(x=over_50["age"],
                       y=over_50["chol"],
                       c=over_50["target"],
                       cmap='winter')

ax0.set(title="Cholestrol Levels",
        ylabel="Cholestrol")

ax0.set_xlim([50,80])
ax0.set_ylim([100,600])

ax0.legend(*hd_plot1.legend_elements(), title="Heart Disease")

ax0.axhline(over_50["chol"].mean(),
            linestyle='--')

# Plot 2: Thalach by Age
hd_plot2 = ax1.scatter(x=over_50["age"],
                       y=over_50["thalach"],
                       c=over_50["target"],
                       cmap='winter')

ax1.set(title="Max Heart Rate",
        xlabel="Age",
        ylabel="Max Heart Rate (bpm)")

ax1.set_ylim([60,200])

ax1.legend(*hd_plot2.legend_elements(), title="Heart Disease")

ax1.axhline(over_50["thalach"].mean(),
            linestyle='--')

# Title the figure
fig.suptitle("Heart Disease Analysis", fontsize=16, fontweight="bold");


fig.savefig("./plots/heart-disease-analysis.jpg")

	Make	Colour	Odometer	Doors	Price
0	Toyota	White	150043	4	$4,000.00
1	Honda	Red	87899	4	$5,000.00
2	Toyota	Blue	32549	3	$7,000.00
3	BMW	Black	11179	5	$22,000.00
4	Nissan	White	213095	4	$3,500.00
5	Toyota	Green	99213	4	$4,500.00
6	Honda	Blue	45698	4	$7,500.00
7	Honda	Blue	54738	4	$7,000.00
8	Toyota	White	60000	4	$6,250.00
9	Nissan	White	31600	4	$9,700.00

	Make	Colour	Odometer	Doors	Price
0	Toyota	White	150043	4	4000.0
1	Honda	Red	87899	4	5000.0
2	Toyota	Blue	32549	3	7000.0
3	BMW	Black	11179	5	22000.0
4	Nissan	White	213095	4	3500.0

	Make	Colour	Odometer	Doors	Price	Sale Date
0	Toyota	White	150043	4	4000.0	2020-01-05
1	Honda	Red	87899	4	5000.0	2020-01-12
2	Toyota	Blue	32549	3	7000.0	2020-01-19
3	BMW	Black	11179	5	22000.0	2020-01-26
4	Nissan	White	213095	4	3500.0	2020-02-02
5	Toyota	Green	99213	4	4500.0	2020-02-09
6	Honda	Blue	45698	4	7500.0	2020-02-16
7	Honda	Blue	54738	4	7000.0	2020-02-23
8	Toyota	White	60000	4	6250.0	2020-03-01
9	Nissan	White	31600	4	9700.0	2020-03-08

	Make	Colour	Odometer	Doors	Price	Sale Date	Total Sales
0	Toyota	White	150043	4	4000.0	2020-01-05	4000.0
1	Honda	Red	87899	4	5000.0	2020-01-12	9000.0
2	Toyota	Blue	32549	3	7000.0	2020-01-19	16000.0
3	BMW	Black	11179	5	22000.0	2020-01-26	38000.0
4	Nissan	White	213095	4	3500.0	2020-02-02	41500.0
5	Toyota	Green	99213	4	4500.0	2020-02-09	46000.0
6	Honda	Blue	45698	4	7500.0	2020-02-16	53500.0
7	Honda	Blue	54738	4	7000.0	2020-02-23	60500.0
8	Toyota	White	60000	4	6250.0	2020-03-01	66750.0
9	Nissan	White	31600	4	9700.0	2020-03-08	76450.0

	a	b	c	d
0	0.056376	0.840925	0.440115	0.253111
1	0.139327	0.537898	0.288644	0.902088
2	0.212609	0.198575	0.240101	0.202547
3	0.829980	0.686096	0.713809	0.686075
4	0.835714	0.265893	0.787042	0.348383
5	0.837753	0.897795	0.268935	0.001020
6	0.998334	0.812969	0.642296	0.989324
7	0.113705	0.100756	0.614547	0.921541
8	0.943935	0.439403	0.928927	0.031700
9	0.749540	0.182308	0.634020	0.845659

Intro to Matplotlib¶

1. Matplotlib APIs¶

2. Matplotlib Workflow¶

3. Core Plot Types¶

How much water is needed to produce 1kg of food?¶

4. Multiple Plots in a Figure¶

5. Plotting from Pandas DataFrames¶

Cumulative Sum¶

5.1 Using the Matplotlib OO API with Pandas¶

5.2 Styling Plots¶

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	ca	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	0	1	1
3	56	1	1	120	236	0	1	178	0	0.8	2	0	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	0	2	1
5	57	1	0	140	192	0	1	148	0	0.4	1	0	1	1
6	56	0	1	140	294	0	0	153	0	1.3	1	0	2	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
297	59	1	0	164	176	1	0	90	0	1.0	1	2	1	0
298	57	0	0	140	241	0	1	123	1	0.2	1	0	3	0
300	68	1	0	144	193	1	1	141	0	3.4	1	2	3	0
301	57	1	0	130	131	0	1	115	1	1.2	1	1	3	0
302	57	0	1	130	236	0	0	174	0	0.0	1	1	2	0

	Seq	Cum
0	1	1
1	2	3
2	3	6
3	4	10
4	5	15

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	ca	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	0	1	1
3	56	1	1	120	236	0	1	178	0	0.8	2	0	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	0	2	1
5	57	1	0	140	192	0	1	148	0	0.4	1	0	1	1
6	56	0	1	140	294	0	0	153	0	1.3	1	0	2	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
297	59	1	0	164	176	1	0	90	0	1.0	1	2	1	0
298	57	0	0	140	241	0	1	123	1	0.2	1	0	3	0
300	68	1	0	144	193	1	1	141	0	3.4	1	2	3	0
301	57	1	0	130	131	0	1	115	1	1.2	1	1	3	0
302	57	0	1	130	236	0	0	174	0	0.0	1	1	2	0

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	1	1
1	37	1	2	130	250	0	1	187	0	3.5	0	2	1
2	41	0	1	130	204	0	0	172	0	1.4	2	2	1
3	56	1	1	120	236	0	1	178	0	0.8	2	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	2	1

	age	sex	cp	trestbps	chol	fbs	restecg	thalach	exang	oldpeak	slope	ca	thal	target
0	63	1	3	145	233	1	0	150	0	2.3	0	0	1	1
3	56	1	1	120	236	0	1	178	0	0.8	2	0	2	1
4	57	0	0	120	354	0	1	163	1	0.6	2	0	2	1
5	57	1	0	140	192	0	1	148	0	0.4	1	0	1	1
6	56	0	1	140	294	0	0	153	0	1.3	1	0	2	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
297	59	1	0	164	176	1	0	90	0	1.0	1	2	1	0
298	57	0	0	140	241	0	1	123	1	0.2	1	0	3	0
300	68	1	0	144	193	1	1	141	0	3.4	1	2	3	0
301	57	1	0	130	131	0	1	115	1	1.2	1	1	3	0
302	57	0	1	130	236	0	0	174	0	0.0	1	1	2	0