Tags » MatPlotLib

Linear regression example

Final code:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('Droid control - wind speed.csv')
print df.head()
print df.describe()
print df.info()

plt.figure(1)
plt.scatter(df['Wind speed'], df['Control metrics'], color = 'red')
plt.title('Control action / wind speed')
plt.xlabel('Wind speed (km/h)')
plt.ylabel('Control metrics')
plt.savefig('Windspeed.jpg')

X = df.iloc[:, :-1].values
y = df.iloc[:, 1].values

X_squares = X[:,0] ** 2
X_times_Y = X[:,0] * y
N = len(X)

# b = ((∑X^2)(∑Y) – (∑X)(∑XY)) / (N(∑X^2) – (∑X)^2)

b1 = X_squares.sum() * y.sum() # (∑X^2)(∑Y)
b2 = X.sum() * X_times_Y.sum() # (∑X)(∑XY)
b3 = N * X_squares.sum() # N(∑X^2)
b4 = X.sum() ** 2 # (∑X)^2

b = (b1 - b2) / (b3 - b4)

# m = (N(∑XY) – (∑X)(∑Y)) / (N(∑X^2) – (∑X)^2)

m1 = N * X_times_Y.sum() # (∑X^2)(∑Y)
m2 = X.sum() * y.sum() # (∑X)(∑XY)
m3 = N * X_squares.sum() # N(∑X^2)
m4 = X.sum() ** 2 # (∑X)^2

m = (m1 - m2) / (m3 - m4)

manual_linear_regression = []
for el in X:
    f_of_X = b + m * el
    manual_linear_regression = np.append(manual_linear_regression, f_of_X)

lin_equation = 'Y = {} + {}X'.format(b, m)

plt.figure(2)
plt.scatter(df['Wind speed'], df['Control metrics'], color = 'red', label = 'Original data')
plt.scatter(df['Wind speed'], manual_linear_regression, color = 'blue', label = lin_equation)
plt.title('Control action / wind speed')
plt.xlabel('Wind speed (km/h)')
plt.ylabel('Control metrics')
plt.legend(loc = 'upper left')
plt.savefig('Windspeed-linreg.jpg')
… 1,379 more words

Seaborn library for pretty plots

Seaborn is visualization library based on matplotlib (and complementary to matplotlib, you should really understand matplotlib first). It basically makes your work easier and prettier. The library is not really complicated and broad but it makes some thing for you, things that you would have to do in matplotlib on your own. 286 more words

Python

Central Limit Theorem

Final code:

import numpy as np
import matplotlib.pyplot as plt

mu, sigma = 0, 0.1
pop_normal = np.random.normal(mu, sigma, 10000000)

print 'Population Mean:', np.mean(pop_normal)

bins = 15
plt.figure(1)
plt.suptitle('Distribution of population', fontsize=16)
plt.hist(pop_normal, bins = bins)
plt.xlabel('Values')
plt.ylabel('Frequency')
plt.savefig('Popula.jpg')

print 'Random sample:', pop_normal

for i in range(10):
	print i+1, '. 2,041 more words

Visualizing Multidimensional Data in Python

@tachyeonz : Nearly everyone is familiar with two-dimensional plots, and most college students in the hard sciences are familiar with three dimensional plots. However, modern datasets are rarely two- or three-dimensional. 6 more words

Iiot

Visualizing time-dependent wavefunctions

In my previous post, I presented a method of visualizing wavefunctions that are inherently complex-valued using a single plot that shows both the probability density and phase but frozen in time. 460 more words

Complex Function

Flag Design Algorithm

Learning how to program using Python opened up new methods of designing to me. I became interested in writing a process to design something, rather than just designing it, because of the possibilities this method offered. 361 more words

Graphics Projects

Industry average bar plots and scatter plots

Final code:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Loading the data and printing out its properties ---------------------------
df = pd.read_csv('companies.csv')
print df.shape
print df.info()
print df.describe()

# Data wrangling -------------------------------------------------------------

# Fix the CA column
df.loc == 'Cliare', 'CA'] = 'Claire'

# Drop the outlier
df = df < 9000]

# Fix the 'Risk' column
df.loc == 'Very High', 'Risk'] = 'Very high'

# Plotting -------------------------------------------------------------------
plt.figure
plt.gcf().subplots_adjust(bottom=0.15)

#Revenue per industry --------------------------------------------------------
industry_unique = df['Industry'].unique()

industry_count = []
industry_revenue = []
# Listing industry counts and revenues
for el in industry_unique:
	industry_count.append(len(df == el]))
	industry_revenue.append(df.loc == el, 'Renewal fee'].sum())

# Calculating averages per industry
industry_average = []
for i in range(len(industry_unique)):
	industry_average.append(industry_revenue[i] / industry_count[i])

# Ordering industry averages by value
industry_order = {}
# Adding industry unique names and correponding averages to industry_order dictionary
for i in range(len(industry_unique)):
    industry_order] = industry_average[i]
# Sorting industry_unique by industry averages using industry_order
industry_unique = sorted(industry_unique, key=lambda x: industry_order [x])
# Sorting industry_average by value
industry_average = sorted(industry_average)

ax1 = plt.subplot(121)
xs = np.arange(len(industry_unique))
plt.xticks(xs, industry_unique)
plt.xticks(rotation = 30)
plt.bar(xs, industry_average, color = '#112244')
plt.grid(True, linestyle = '--', alpha = 0.6)
ax1.set_axisbelow(True)
plt.title('Average revenue per industry')

#Revenue per risk bar --------------------------------------------------------
risk_unique = df['Risk'].unique()

# Ordering risk_unique from 'Low' to 'Very high'
risk_order = {'Low': '0', 'Moderate': '1', 'High': '2', 'Very high': '3'}
risk_unique = sorted(risk_unique, key=lambda x: risk_order [x])

# Listing risk counts and revenues
risk_count = []
risk_revenue = []
for el in risk_unique:
	risk_count.append(len(df == el]))
	risk_revenue.append(df.loc == el, 'Renewal fee'].sum())

# Calculating averages per risk
risk_average = []
for i in range(len(risk_unique)):
	risk_average.append(risk_revenue[i] / risk_count[i])

ax2 = plt.subplot(122, sharey = ax1)
xs = np.arange(len(risk_unique))
plt.xticks(xs, risk_unique)
plt.xticks(rotation = 30)
plt.bar(xs, risk_average, color = '#881122')
plt.grid(True, linestyle = '--', alpha = 0.6)
ax2.set_axisbelow(True)
plt.title('Average revenue per risk')
plt.savefig('Revenues.jpg')

#Revenue per risk scatter ----------------------------------------------------
# Replacing 'Risk' values with numbers
df['Risk'].replace(risk_order, inplace= True) 

# Creating Dataframe to hold the unique 'Risk'-'Renewal fee' pairs with their number of occurances
df_risk = df.groupby(['Risk', 'Renewal fee']).size().reset_index(name="Size")

# X coordinate is 'Renewal fee'
xs = df_risk['Renewal fee']
# Y coordinate is 'Risk'
ys = df_risk['Risk']
# Size of marker is the number of occurances of the above X and Y
s = df_risk['Size']

plt.figure(2)
plt.gcf().subplots_adjust(bottom=0.15)
ax = plt.subplot(111)
x_range = np.arange(1000, 4000, 250)
plt.xticks(x_range, rotation = 30)
plt.scatter(xs, ys, s = s*300, alpha = 0.9, color = '#cc1122', edgecolor = '#000000')
plt.grid(True, linestyle = '-', alpha = 0.6)
ax.set_axisbelow(True)
plt.ylabel('Risk')
plt.title('Revenue per risk scatter')
plt.savefig('Revenue_per_risk.jpg')

# Risk per industry scatter --------------------------------------------------

# Replacing 'Industry' values with numbers
industry_numeral = {'Computers': '0', 'General': '1', 'Mining': '2', 'Robotics': '3', 'Transport': '4'}
df['Industry'].replace(industry_numeral, inplace= True)
df['Industry'] = df['Industry'].astype(int)

# Creating Dataframe to hold the unique 'Risk'-'Industry' pairs with their number of occurances
df_industry = df.groupby(['Risk', 'Industry']).size().reset_index(name="Size")

# X coordinate is 'Industry'
xs = df_industry['Industry']
# Y coordinate is 'Risk'
ys = df_industry['Risk']
# Size of marker is the number of occurances of the above X and Y
s = df_risk['Size']

plt.figure(3)
plt.gcf().subplots_adjust(bottom=0.15)
ax = plt.subplot(111)
x_range = np.arange(5)
y_range = np.arange(4)
plt.xticks(x_range, ('Computers', 'General', 'Mining', 'Robotics', 'Transport'), rotation = 30)
plt.yticks(y_range, ('Low', 'Moderate', 'High', 'Very high'))
plt.scatter(xs, ys, s = s*300, alpha = 0.9, color = '#1122cc', edgecolor = '#000000')
plt.grid(True, linestyle = '-', alpha = 0.6)
ax.set_axisbelow(True)
plt.ylabel('Risk')
plt.title('Risk per Industry scatter')
plt.savefig('Risk_per_Industry.jpg')

# Risk and industry revenue--------------------------------------------------

plt.figure(4)
plt.gcf().subplots_adjust(bottom=0.15)
ax = plt.subplot(121)
x_range = np.arange(5)
y_range = np.arange(4)
plt.xticks(x_range, ('Computers', 'General', 'Mining', 'Robotics', 'Transport'), rotation = 30)
plt.yticks(y_range, ('Low', 'Moderate', 'High', 'Very high'))
plt.scatter(xs, ys, s = s*300, alpha = 0.9, color = '#cc1122', edgecolor = '#000000')
plt.grid(True, linestyle = '-', alpha = 0.6)
ax.set_axisbelow(True)
plt.ylabel('Risk')
plt.title('Risk per Industry scatter')

ax1 = plt.subplot(123)
xs = np.arange(len(industry_unique))
plt.xticks(xs, industry_unique)
plt.xticks(rotation = 30)
plt.bar(xs, industry_average, color = '#112244')
plt.grid(True, linestyle = '--', alpha = 0.6)
ax1.set_axisbelow(True)
plt.title('Average revenue per industry')
plt.savefig('Risk_and_Industry_revenue.jpg')
… 1,531 more words