import numpy as np
- Create a new variable in a data set as a function of existing variables in the data set.
# Notice here how you can create the BMI column in the data set
# just by naming it
student["BMI"] = student["Weight"] / student["Height"]**2 * 703
print(student.head())
- Create a new variable in a data set using if/else logic of existing variables in the data set.
# Notice the use of the np.where() function for a single condition
student["BMI Class"] = np.where(student["BMI"] < 19.0, "Underweight", "Healthy")
print(student.head())
- Create a new variable in a data set using if/else logic of existing variables in the data set.
# Notice the use of the np.where() function for a single condition
student["BMI Class"] = np.where(student["BMI"] < 19.0, "Underweight", "Healthy")
print(student.head())
- Create new variables in a data set using mathematical functions applied to existing variables in the data set.
Using the np.log(), np.exp(), np.sqrt(), np.where(), and np.abs() functions.
student["LogWeight"] = np.log(student["Weight"])
student["ExpAge"] = np.exp(student["Age"])
student["SqrtHeight"] = np.sqrt(student["Height"])
student["BMI Neg"] = np.where(student["BMI"] < 19.0, -student["BMI"],
student["BMI"])
student["BMI Pos"] = np.abs(student["BMI Neg"])
# Create a Boolean variable
student["BMI Check"] = (student["BMI Pos"] == student["BMI"])
- Drop variables from a data set.
# axis = 1 indicates to drop columns instead of rows
student = student.drop(["LogWeight", "ExpAge", "SqrtHeight", "BMI Neg",
"BMI Pos", "BMI Check"], axis = 1)
print(student.head())
- Sort a data set by a variable.
# Notice kind="mergesort" which indicates to use a stable sorting
# algorithm
student = student.sort_values(by="Age", kind="mergesort")
print(student.head())
Sort data set by a categorical variable.
student = student.sort_values(by="Sex", kind="mergesort")
# Notice that the data is now sorted first by Sex and then within Sex by Age
print(student.head())
- Compute descriptive statistics of continuous variables, grouped by a categorical variable.
print(student.groupby(by="Sex").mean())
- Add a new row to the bottom of a data set.
student = student.append({'Name':'Jane', 'Sex':'F', 'Age':14, 'Height':56.3,
'Weight':77.0, 'BMI':17.077695,
'BMI Class': 'Underweight'},
ignore_index=True)
- Create a user-defined function and apply it to a variable in the data set to create a new variable in the data set.
def toKG(lb):
return (0.45359237 * lb)
student["Weight KG"] = student["Weight"].apply(toKG)
print(student.head())