library(tidyverse)
input <- "uqwqemis"
# get md5 function from digest package
# when using it remember to set serialize = FALSE
# see help for more details
md5 <- digest::getVDigest(algo = "md5")Day 5: How About a Nice Game of Chess?
md5() tests
# example 1
md5(paste0("abc", 3231929), serialize = FALSE)
## [1] "00000155f8105dff7f56ee10fa9b9abd"
# example 2
md5(paste0("abc", 5017308), serialize = FALSE)
## [1] "000008f82c5b3924a1ecbebf60344e00"
# example 3
md5(paste0("abc", 5278568), serialize = FALSE)
## [1] "00000f9a2c309875e05c5a5d09f1b8c4"Part One
if(!dir.exists("temp")) dir.create("temp")
if(!dir.exists("temp/temp_2016_5_part1")) dir.create("temp/temp_2016_5_part1")
# number of attempts, this is empirically enough
n <- 1.7e7
# CODE TO CREATE THE FILE "temp/md5_target.csv"
if (!fs::file_exists("temp/temp_2016_5_part1/md5_target.csv")) {
md5_hashes <- tibble(number = 1:n,
string = paste0(input, number),
hash = map_chr(.x = string,
.f = \(x) md5(x, serialize = FALSE),
.progress = TRUE))
md5_target <- md5_hashes |>
filter(str_sub(hash, start = 1, end = 5) == "00000")
md5_target |>
write_csv(file = "temp/temp_2016_5_part1/md5_target.csv")
}
# output
md5_target <- read_csv(
file = "temp/temp_2016_5_part1/md5_target.csv",
show_col_types = FALSE
)
md5_target |>
mutate(char = str_sub(string = hash, start = 6, end = 6)) |>
summarise(password = str_flatten(char)) |>
pull(password) |>
cat()
## 1a3099aaPart Two
library(arrow)
# CODE TO CREATE THE CSV FILES IN "temp_2016_5_csv"
if(!dir.exists("temp/temp_2016_5_part2")) dir.create("temp/temp_2016_5_part2")
if(!dir.exists("temp/temp_2016_5_part2/csv")) dir.create("temp/temp_2016_5_part2/csv")
# number of attempts, this is empirically enough
n <- 1:3e7 # number of attempts
n_parts <- 1e2 # number of parts
parts <- split(n, cut_number(n, n_parts))
if (!all(paste0("temp/temp_2016_5_part2/csv/chunk", 1:100, ".csv") |> fs::file_exists())) {
map2(.x = parts,
.y = 1:n_parts,
.f = \(parts, id) tibble(id = id,
number = parts,
string = paste0(input, number),
hash = map_chr(.x = string,
.f = \(x) md5(x, serialize = FALSE))),
.progress = TRUE) |>
walk2(.x = _,
.y = 1:n_parts,
.f = \(data, id) write_csv(data, file = paste0("temp/temp_2016_5_part2/csv/chunk", id, ".csv")),
.progress = TRUE)
}
csvs <- open_dataset(sources = "temp/temp_2016_5_part2/csv", format = "csv")
if(!dir.exists("temp/temp_2016_5_part2/parquet")) dir.create("temp/temp_2016_5_part2/parquet")
csvs |>
write_dataset(path = "temp/temp_2016_5_part2/parquet",
format = "parquet",
partitioning = "id")
md5_hashes <- open_dataset(sources = "temp/temp_2016_5_part2/parquet",
schema = schema(number = int64(),
string = string(),
hash = string()))
md5_hashes_filtered <- md5_hashes |>
filter(str_sub(string = hash, start = 1, end = 5) == "00000") |>
as_tibble() |>
mutate(position = str_sub(string = hash, start = 6, end = 6),
char = str_sub(string = hash, start = 7, end = 7))
md5_hashes_valid_positions <- md5_hashes_filtered |>
filter(position %in% 0:7)
md5_hashes_first_positions <- md5_hashes_valid_positions |>
arrange(number) |>
summarise(across(everything(), first), .by = position)
md5_hashes_sorted_positions <- md5_hashes_first_positions |>
arrange(position)
md5_hashes_sorted_positions |>
summarise(password = str_flatten(char)) |>
pull(password) |>
cat()
## 694190cd