library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)
library(reticulate)
use_python("/Users/jsh04747/miniforge3/bin/python3", required = TRUE)
Parfumo Fragrance
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
A fantastic gallery of seaborn plot: https://seaborn.pydata.org/examples/index.html
The Data
(First dataset of 2025 was to “bring your own data!”, so I went back and found this one from December 12, 2024: https://github.com/rfordatascience/tidytuesday/tree/main/data/2024/2024-12-10)
This week we’re diving into the fascinating world of fragrances with a dataset sourced from Parfumo, a vibrant community of perfume enthusiasts. Olga G. webscraped these data from the various fragrance sections on the Parfumo website. Here is a description from the author:
This dataset contains detailed information about perfumes sourced from Parfumo, obtained through web scraping. It includes data on perfume ratings, olfactory notes (top, middle, and base notes), perfumers, year of release and other relevant characteristics of the perfumes listed on the Parfumo website.
The data provides a comprehensive look at how various perfumes are rated, which families of scents they belong to, and detailed breakdowns of the key olfactory components that define their overall profile
= pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2024/2024-12-10/parfumo_data_clean.csv')
parfumo
print(parfumo)
Number ... URL
0 455 ... https://www.parfumo.com/Perfumes/Le_Re_Noir/45...
1 0071 ... https://www.parfumo.com/Perfumes/CB_I_Hate_Per...
2 0154 ... https://www.parfumo.com/Perfumes/CB_I_Hate_Per...
3 0162 ... https://www.parfumo.com/Perfumes/CB_I_Hate_Per...
4 0171 ... https://www.parfumo.com/Perfumes/CB_I_Hate_Per...
... ... ... ...
59320 NaN ... https://www.parfumo.com/Perfumes/Pascal_Morabi...
59321 NaN ... https://www.parfumo.com/Perfumes/Pascal_Morabi...
59322 NaN ... https://www.parfumo.com/Perfumes/Pascal_Morabi...
59323 NaN ... https://www.parfumo.com/Perfumes/Pascal_Morabi...
59324 NaN ... https://www.parfumo.com/Perfumes/Pascal_Morabi...
[59325 rows x 13 columns]
Making a barplot
First, I want to take the top 10 brands and label everything else “other.”
= parfumo['Brand'].value_counts()
brand_counts = brand_counts.nlargest(10).index
top_brands
'TopBrand'] = parfumo['Brand'].apply(lambda x: x if x in top_brands else 'Other')
parfumo[
= parfumo[np.isfinite(parfumo['Release_Year'])]
parfumo_sub = parfumo_sub[parfumo_sub['Release_Year'] >= 1900]
parfumo_sub
'Decade'] = (np.floor(parfumo_sub['Release_Year'] / 10) * 10).astype(int) parfumo_sub.loc[:,
=(10,6))
plt.figure(figsize
sns.countplot(=parfumo_sub,
data="Decade", hue="TopBrand",
x="dark", alpha=.6
palette
)'')
plt.xlabel('Number of perfumes released')
plt.ylabel('Most popular brands of perfume, release count per decade')
plt.title('log')
plt.yscale(='Top Brand', loc='upper center',
plt.legend(title=(0.5, -0.15), ncol=2, frameon=False)
bbox_to_anchor
plt.tight_layout()
"parfumo.png", dpi=300, bbox_inches='tight')
plt.savefig(
plt.show()
praise()
[1] "You are incredible!"