-
Notifications
You must be signed in to change notification settings - Fork 0
/
salary_scurve.py
33 lines (22 loc) · 891 Bytes
/
salary_scurve.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('data/cleaned_data.csv')
#figure out salary buckets
min_sal_sorted_df = df.sort_values(by=['min_salary'])
max_sal_sorted_df = df.sort_values(by=['max_salary'])
plt.plot(min_sal_sorted_df['min_salary'].tolist(), 'bo')
plt.plot(max_sal_sorted_df['max_salary'].tolist(), 'ro')
plt.show()
plt.savefig("salary_scurve.png")
plt.clf()
print("Lets clear out giant outlier.")
min_sal_sorted_df = min_sal_sorted_df[ min_sal_sorted_df.min_salary < 500000 ]
max_sal_sorted_df = max_sal_sorted_df[ max_sal_sorted_df.min_salary < 500000 ]
plt.plot(min_sal_sorted_df['min_salary'].tolist(), 'bo')
plt.plot(max_sal_sorted_df['max_salary'].tolist(), 'ro')
plt.xlabel('Job Listings (sorted by Salary)')
plt.ylabel('Salary ($)')
plt.title('S-curve of job listings and salary')
plt.legend(["MinSalary", "MaxSalary"] )
plt.show()
#