Code
library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)
library(tidytext)
library(rvest)library(tidyverse) # ggplot, lubridate, dplyr, stringr, readr...
library(praise)
library(tidytext)
library(rvest)This week we’re exploring Brazilian Companies, curated from Brazil’s open CNPJ (Cadastro Nacional da Pessoa Jurídica) records published by the Brazilian Ministry of Finance / Receita Federal on the national open-data portal (dados.gov.br).
The CNPJ open data is a large-scale public registry of Brazilian legal entities. For this dataset, the raw company records were cleaned and enriched with lookup tables (legal nature, owner qualification, and company size), then filtered to retain firms above a share-capital threshold so the analysis focuses on meaningful variation in capital stock.
Thank you to Marcelo Silva for curating this week’s dataset.
companies <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2026/2026-01-27/companies.csv') |>
mutate(owner = case_when(
owner_qualification == "Administrator / Manager" ~ "Manager",
owner_qualification == "Attorney-in-fact / Legal Representative (Power of Attorney)" ~ "Legal Representative",
owner_qualification == "Beneficial Owner (individual) resident or domiciled in Brazil" ~ "Individual Owner",
owner_qualification == "Director / Officer" ~ "Director",
owner_qualification == "Entrepreneur / Business Owner" ~ "Entrepreneur",
owner_qualification == "Executor / Estate Administrator" ~ "Executor",
owner_qualification == "Intervenor / Court-appointed Administrator" ~ "Intervenor",
owner_qualification == "Judicial Administrator (Court-appointed)" ~ "Judicial",
owner_qualification == "Liquidator" ~ "Liquidator",
owner_qualification == "Managing Partner / Partner-Administrator" ~ "Partner",
owner_qualification == "Ostensible Partner (Managing partner in a silent partnership)" ~ "Ostensible Partner",
owner_qualification == "President / Chair" ~ "President",
owner_qualification == "Sole Owner of an Individual Real Estate Company" ~ "Owner"
))companies |>
group_by(owner_qualification) |>
summarize(n())# A tibble: 13 × 2
owner_qualification `n()`
<chr> <int>
1 Administrator / Manager 15236
2 Attorney-in-fact / Legal Representative (Power of Attorney) 13
3 Beneficial Owner (individual) resident or domiciled in Brazil 442
4 Director / Officer 1634
5 Entrepreneur / Business Owner 15201
6 Executor / Estate Administrator 22
7 Intervenor / Court-appointed Administrator 1
8 Judicial Administrator (Court-appointed) 302
9 Liquidator 29
10 Managing Partner / Partner-Administrator 107027
11 Ostensible Partner (Managing partner in a silent partnership) 32
12 President / Chair 1343
13 Sole Owner of an Individual Real Estate Company 50
quantile_data <- companies |>
filter(owner_qualification != "Intervenor / Court-appointed Administrator") |>
group_by(owner) |>
mutate(capital_stock = log(capital_stock, 10)) |>
summarize(q25 = quantile(capital_stock, p = 0.25),
q50 = quantile(capital_stock, p = 0.50),
q75 = quantile(capital_stock, p = 0.75),
q95 = quantile(capital_stock, p = 0.95),
q99 = quantile(capital_stock, p = 0.99)) |>
mutate(total = q99, q99 = q99 - q95, q95 = q95 - q75, q75 = q75 - q50, q50 = q50 - q25) |>
mutate(owner = forcats::fct_reorder(owner, total, .desc = TRUE)) |>
select(-total) |>
pivot_longer(cols = q25:q99, names_to = "percentile", values_to = "quantile")The coxcomb plot was fun to make, but it probably would have been better as a bar plot. We don’t get any additional information when transformed into polar coordinates.
brazil_palette <- c("#009739", "#84CC16", "#FFDF00", "#002776", "#4DA6FF")
quantile_data |>
mutate(percentile = forcats::fct_rev(percentile)) |>
ggplot(aes(x = owner, y = quantile, fill = percentile)) +
geom_bar(stat = "identity") +
scale_y_continuous(labels = scales::label_math(expr = 10^.x)) +
coord_polar() +
scale_fill_manual(values = brazil_palette) +
theme_test() +
labs(x = "", y = "",
fill = "Declared Share Capital\n Quantile (Brazilian Real)")praise()[1] "You are geometric!"