Day 5: How About a Nice Game of Chess?

Reference

library(tidyverse)

input <- "uqwqemis"

# get md5 function from digest package
# when using it remember to set serialize = FALSE
# see help for more details
md5 <- digest::getVDigest(algo = "md5")

md5() tests

# example 1
md5(paste0("abc", 3231929), serialize = FALSE)
## [1] "00000155f8105dff7f56ee10fa9b9abd"

# example 2
md5(paste0("abc", 5017308), serialize = FALSE)
## [1] "000008f82c5b3924a1ecbebf60344e00"

# example 3
md5(paste0("abc", 5278568), serialize = FALSE)
## [1] "00000f9a2c309875e05c5a5d09f1b8c4"

Part One

if(!dir.exists("temp")) dir.create("temp")
if(!dir.exists("temp/temp_2016_5_part1")) dir.create("temp/temp_2016_5_part1")

# number of attempts, this is empirically enough
n <- 1.7e7

# CODE TO CREATE THE FILE "temp/md5_target.csv"
if (!fs::file_exists("temp/temp_2016_5_part1/md5_target.csv")) {
    md5_hashes <- tibble(number = 1:n,
                         string = paste0(input, number),
                         hash   = map_chr(.x = string,
                                          .f = \(x) md5(x, serialize = FALSE),
                                          .progress = TRUE))
    
    md5_target <- md5_hashes |>
        filter(str_sub(hash, start = 1, end = 5) == "00000")
    
    
    md5_target |>
        write_csv(file = "temp/temp_2016_5_part1/md5_target.csv")
}

# output
md5_target <- read_csv(
    file = "temp/temp_2016_5_part1/md5_target.csv",
    show_col_types = FALSE
)

md5_target |> 
    mutate(char = str_sub(string = hash, start = 6, end = 6)) |> 
    summarise(password = str_flatten(char)) |> 
    pull(password) |> 
    cat()
## 1a3099aa

Part Two

library(arrow)

# CODE TO CREATE THE CSV FILES IN "temp_2016_5_csv"
if(!dir.exists("temp/temp_2016_5_part2")) dir.create("temp/temp_2016_5_part2")
if(!dir.exists("temp/temp_2016_5_part2/csv")) dir.create("temp/temp_2016_5_part2/csv")

# number of attempts, this is empirically enough
n <- 1:3e7 # number of attempts
n_parts <- 1e2  # number of parts
parts <- split(n, cut_number(n, n_parts))

if (!all(paste0("temp/temp_2016_5_part2/csv/chunk", 1:100, ".csv") |> fs::file_exists())) {
    map2(.x = parts,
         .y = 1:n_parts,
         .f = \(parts, id) tibble(id     = id,
                                  number = parts,
                                  string = paste0(input, number),
                                  hash   = map_chr(.x = string,
                                                   .f = \(x) md5(x, serialize = FALSE))),
         .progress = TRUE) |>
        
        walk2(.x = _,
              .y = 1:n_parts,
              .f = \(data, id) write_csv(data, file = paste0("temp/temp_2016_5_part2/csv/chunk", id, ".csv")),
              .progress = TRUE)
}

csvs <- open_dataset(sources = "temp/temp_2016_5_part2/csv", format = "csv")

if(!dir.exists("temp/temp_2016_5_part2/parquet")) dir.create("temp/temp_2016_5_part2/parquet")

csvs |>
    write_dataset(path         = "temp/temp_2016_5_part2/parquet",
                  format       = "parquet",
                  partitioning = "id")

md5_hashes <- open_dataset(sources = "temp/temp_2016_5_part2/parquet",
                           schema  = schema(number = int64(),
                                            string = string(),
                                            hash   = string()))

md5_hashes_filtered <- md5_hashes |> 
    filter(str_sub(string = hash, start = 1, end = 5) == "00000") |> 
    as_tibble() |> 
    mutate(position = str_sub(string = hash, start = 6, end = 6),
           char     = str_sub(string = hash, start = 7, end = 7))

md5_hashes_valid_positions <- md5_hashes_filtered |> 
    filter(position %in% 0:7)

md5_hashes_first_positions <- md5_hashes_valid_positions |> 
    arrange(number) |> 
    summarise(across(everything(), first), .by = position)

md5_hashes_sorted_positions <- md5_hashes_first_positions |> 
    arrange(position) 

md5_hashes_sorted_positions |> 
    summarise(password = str_flatten(char)) |> 
    pull(password) |> 
    cat()
## 694190cd