Are you ready for a thrilling data adventure? Noah’s Rug is an awesome data game that challenges you to solve puzzles using a fictional dataset. Every day of Hanukkah, you can light up a candle and discover a piece of a mysterious tapestry. After the last day, you can test your skills in a speedrun mode. You can use any tools you want to explore the data and find the answers. This is a collection of my solutions with some explanations.
Puzzle 1
read in the data (available on the challenge website)
library(tidyverse)
noahs_customers <- read_csv("../noahs-csv/5784/noahs-customers.csv")
clean the data, especially the names, then split into first and last name
noahs_customers <- noahs_customers |>
mutate(name = str_remove_all(name, " Jr.")) |>
mutate(name = str_remove_all(name, " III")) |>
mutate(name = str_remove_all(name, " II")) |>
mutate(name = str_remove_all(name, " IV")) |>
mutate(phone = str_remove_all(phone, "-")) |>
extract(name, into = c('FirstName', 'LastName'), '(.*)\\s+([^ ]+)$')
function to map letters to their phone buttons
replace_chars <- function(input_string) {
mapping <- letters
replacement <- c("2", "2", "2", "3", "3", "3", "4",
"4", "4", "5", "5", "5", "6", "6", "6", "7", "7",
"7", "7", "8", "8", "8", "9", "9", "9", "9")
for (i in seq_along(mapping)) {
input_string <- gsub(mapping[i], replacement[i], tolower(input_string))
}
return(input_string)
}
apply this function
noahs_customers$LastName_Number <- sapply(noahs_customers$LastName, replace_chars)
find the name where phone number and last name matches
noahs_customers |> filter(LastName_Number == phone)
here’s another solution in Julia
using CSV, DataFrames
noahs_customer = CSV.File("../5784/noahs-customers.csv") |> DataFrame
transform!(noahs_customer, :name => ByRow(x -> join(split(x, " ")[2:end], " ")) => :last_name)
transform!(noahs_customer, :last_name => ByRow(x -> replace(x, " III" => "", " IV" => "", " Jr." => "")) => :last_name)
function replace_chars(input_string)
letters = join(Char.(97:122))
mapping = Dict(zip(letters, ["2", "2", "2", "3", "3", "3", "4", "4", "4", "5", "5", "5", "6", "6", "6", "7", "7", "7", "7", "8", "8", "8", "9", "9", "9", "9"]))
for (k, v) in mapping
input_string = replace(lowercase(input_string), k => v)
end
return input_string
end
transform!(noahs_customer, :last_name => ByRow(x -> replace_chars(x)) => :last_name_transformed)
transform!(noahs_customer, :phone => ByRow(x -> replace(x, "-" => "")) => :phone)
filter(row -> row.phone == row.last_name_transformed, noahs_customer)
Puzzle 2
R
solution:
noahs_customers <- read.csv("../5784/noahs-customers.csv")
noahs_orders <- read.csv("../5784/noahs-orders.csv")
noahs_orders_items <- read_csv("../5784/noahs-orders_items.csv")
noahs_products <- read_csv("../5784/noahs-products.csv")
noahs_customers |>
mutate(name = str_remove_all(name, " Jr.")) |>
mutate(name = str_remove_all(name, " III")) |>
mutate(name = str_remove_all(name, " II")) |>
mutate(name = str_remove_all(name, " IV")) |>
extract(name, into = c('FirstName', 'LastName'), '(.*)\\s+([^ ]+)$') |>
mutate(initials = str_c(str_sub(FirstName, 1, 1), str_sub(LastName, 1, 1))) |>
left_join(noahs_orders, by="customerid") |>
left_join(noahs_orders_items, by="orderid") |>
left_join(noahs_products, by="sku") |>
mutate(order_year = year(shipped)) |>
filter(initials == "JP" & order_year == 2017 & str_detect(desc, "Bagel")) |>
select(phone)
Julia
solution:
@chain noahs_customers begin
leftjoin(noahs_orders, on=:customerid, matchmissing=:equal)
leftjoin(noahs_orders_items, on=:orderid, matchmissing=:equal)
leftjoin(noahs_products, on=:sku, matchmissing=:equal)
transform(:name => ByRow(x -> join(split(x, " ")[2:end], " ")) => :last_name)
transform(:name => ByRow(x -> join(split(x, " ")[1], " ")) => :first_name)
transform(:last_name => ByRow(x -> replace(x, " III" => "", " IV" => "", " Jr." => "")) => :last_name)
@rsubset occursin.("Bagel", coalesce.(:desc, ""))
transform(:ordered => ByRow(x -> DateTime(x, dateformat"yyyy-mm-dd HH:MM:SS")) => :ordered)
filter(row -> startswith(row.first_name, "J") && startswith(row.last_name, "P") && year(row.ordered) == 2017, _)
end
# Joshua Peterson 332-274-4185
Puzzle 3
R
solution:
noahs_customers <- read.csv("../5784/noahs-customers.csv")
noahs_customers |>
# year of rabbit
filter(year(birthdate) %in% c(1927, 1939, 1951, 1963, 1975, 1987, 1999, 2011, 2023)) |>
# cancer
filter(month(birthdate) %in% c(6,7)) |>
# address of person from puzzle 2
filter(str_detect(citystatezip, "Jamaica, NY 11435"))
# select first one because male
Julia
solution:
using CSV, DataFrames, DataFramesMeta, Dates
noahs_customers = CSV.File(".../5784/noahs-customers.csv") |> DataFrame
@chain noahs_customers begin
@rsubset(year(:birthdate) in [1927, 1939, 1951, 1963, 1975, 1987, 1999, 2011, 2023])
@rsubset(month(:birthdate) in [6, 7])
@rsubset occursin.("Jamaica, NY 11435", :citystatezip)
end
Puzzle 4
This one was a bit tricky, the result still contains a list of potential candidates that I tried out manually.
R
solution:
noahs_customers <- read_csv("../5784/noahs-customers.csv")
noahs_orders <- read_csv("../5784/noahs-orders.csv")
noahs_orders_items <- read_csv("../5784/noahs-orders_items.csv")
noahs_products <- read_csv("../5784/noahs-products.csv")
noahs_customers |>
left_join(noahs_orders, by="customerid") |>
left_join(noahs_orders_items, by="orderid") |>
left_join(noahs_products, by="sku") |>
# filter bakery items by SKU number
filter(str_detect(sku, "BKY")) |>
# the person likes to shop early
filter(hour(ordered) < 5 & hour(shipped) < 5) |>
# I assumed that the person is not too old because she likes to bike a lot
filter(year(birthdate) > 1970) |>
# I assumed the person lives in a neighbouring are of Jamaica, NY, because she biked there
filter(str_detect(citystatezip, "Brooklyn") | str_detect(citystatezip, "Queens"))
Julia
solution:
using CSV, DataFrames, DataFramesMeta, Dates
noahs_customers = CSV.File("../5784/noahs-customers.csv") |> DataFrame
noahs_orders = CSV.File("../5784/noahs-orders.csv") |> DataFrame
noahs_orders_items = CSV.File("../5784/noahs-orders_items.csv") |> DataFrame
noahs_products = CSV.File("../5784/noahs-products.csv") |> DataFrame
@chain noahs_customers begin
leftjoin(noahs_orders, on=:customerid, matchmissing=:equal)
leftjoin(noahs_orders_items, on=:orderid, matchmissing=:equal)
leftjoin(noahs_products, on=:sku, matchmissing=:equal)
@rsubset occursin.("BKY", coalesce.(:sku, ""))
@rsubset(hour(DateTime(:ordered, "yyyy-mm-dd HH:MM:SS")) < 5)
@rsubset(hour(DateTime(:shipped, "yyyy-mm-dd HH:MM:SS")) < 5)
@rsubset(year(:birthdate) > 1970)
@rsubset occursin.("Brooklyn", coalesce.(:citystatezip, "")) || occursin.("Manhattan", coalesce.(:citystatezip, ""))
end
# Renee Harmon 607-231-3605
Puzzle 5
Julia
solution
@chain noahs_customers begin
leftjoin(noahs_orders, on=:customerid, matchmissing=:equal)
leftjoin(noahs_orders_items, on=:orderid, matchmissing=:equal)
leftjoin(noahs_products, on=:sku, matchmissing=:equal)
@rsubset occursin.("Cat", coalesce.(:desc, ""))
@rsubset occursin.("Staten Island", coalesce.(:citystatezip, ""))
@rsubset :qty >= 10
end
# Nicole Wilson 631-507-6048
Puzzle 6
R
solution
noahs_customers |>
left_join(noahs_orders, by="customerid") |>
left_join(noahs_orders_items, by="orderid") |>
left_join(noahs_products, by="sku") |>
group_by(orderid) |>
mutate(total_cost = sum(wholesale_cost),
profit = total - total_cost,
negative_profit_count = sum(profit < 0)) |>
arrange(desc(negative_profit_count)) |>
first()
# Sherri Long 585-838-9161
Julia
solution
@chain noahs_customers begin
leftjoin(noahs_orders, on=:customerid, matchmissing=:equal)
leftjoin(noahs_orders_items, on=:orderid, matchmissing=:equal)
leftjoin(noahs_products, on=:sku, matchmissing=:equal)
groupby(:orderid)
@transform(:total_cost = sum(:wholesale_cost); ungroup=false)
@transform(:profit = :total .- :total_cost; ungroup=false)
@transform(:negative_profit_count = sum(:profit .< 0))
@orderby -:negative_profit_count
end
# Sherri Long 585-838-9161
Puzzle 7
Julia
solution
df = @chain noahs_customers begin
leftjoin(noahs_orders, on=:customerid, matchmissing=:equal)
leftjoin(noahs_orders_items, on=:orderid, matchmissing=:equal)
leftjoin(noahs_products, on=:sku, matchmissing=:equal)
@rsubset occursin.("COL", coalesce.(:sku, ""))
transform(:ordered => ByRow(x -> DateTime(x, dateformat"yyyy-mm-dd HH:MM:SS")) => :ordered)
# remove colors in parenthesis
transform(:desc => ByRow(x -> replace(x, r"\(\w+\)" => "")) => :desc_clean)
end
sherri_items = @chain df begin
subset(:name => ByRow(x -> x == "Sherri Long"))
_.desc_clean
end
sherri_dates = @chain df begin
subset(:name => ByRow(x -> x == "Sherri Long"))
_.ordered
end
@chain df begin
@rsubset(Date(:ordered) in Date.(sherri_dates))
@rsubset(:desc_clean in sherri_items)
end
# Carlos Myers 838-335-7157
Puzzle 8
Julia
solution
@chain noahs_customers begin
leftjoin(noahs_orders, on=:customerid, matchmissing=:equal)
leftjoin(noahs_orders_items, on=:orderid, matchmissing=:equal)
leftjoin(noahs_products, on=:sku, matchmissing=:equal)
@rsubset occursin.("COL", coalesce.(:sku, ""))
groupby(:name)
transform(:name => length)
@orderby -:name_length
first()
end
# James Smith 212-547-3518