const std = @import("std"); const mem = std.mem; const HashMap = std.HashMap; const ArrayList = std.ArrayList; const StringHashMap = std.StringHashMap(u32); /// Returns the counts of the words in `s`. /// Caller owns the returned memory. /// "That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled. pub fn countWords(allocator: mem.Allocator, s: []const u8) !StringHashMap { var hashMap = StringHashMap.init(allocator); try read_words(allocator, s, 0, null, &hashMap); return hashMap; } fn read_words(allocator: mem.Allocator, s: []const u8, index: usize, currentWord: ?[]const u8, words: *StringHashMap) !void { const next_idx = index + 1; const isIntraWord = index < s.len and (isWordLetter(s[index]) or (s[index] == '\'' and isBetweenLetters(s, index))); if (!isIntraWord and currentWord != null) { const l = currentWord.?.len; var word = try ArrayList(u8).initCapacity(allocator, l); for (currentWord.?, 0..) |n, i| { try word.insert(i, toLowerCase(n)); } const kword = try word.toOwnedSlice(); const count = words.get(kword); try words.put(kword, (count orelse 0) + 1); if (count != null) { defer allocator.free(kword); } } if (index == s.len) { return; } if (isIntraWord and currentWord == null) { return read_words(allocator, s, next_idx, s[index..next_idx], words); } else if (isIntraWord) { return read_words(allocator, s, next_idx, growSubslice(s, currentWord.?), words); } else { return read_words(allocator, s, next_idx, null, words); } } fn isBetweenLetters(s: []const u8, index: usize) bool { return index > 0 and index < (s.len - 1) and isWordLetter(s[index - 1]) and isWordLetter(s[index + 1]); } fn isWordLetter(letter: u8) bool { return (letter >= 'A' and letter <= 'Z') or (letter >= 'a' and letter <= 'z') or (letter >= '0' and letter <= '9'); } fn isUpperCase(letter: u8) bool { return letter >= 'A' and letter <= 'Z'; } fn toLowerCase(letter: u8) u8 { return if (isUpperCase(letter)) letter + 32 else letter; } fn growSubslice(root: []const u8, child: []const u8) []const u8 { const max_ptr = @intFromPtr(root.ptr) + root.len; if (max_ptr > (@intFromPtr(child.ptr) + child.len)) { const start = @intFromPtr(child.ptr) - @intFromPtr(root.ptr); const end = child.len + start + 1; return root[start..end]; } else { return child; } }