exercism/zig/word-count/word_count.zig
2024-07-28 17:45:02 -04:00

74 lines
2.5 KiB
Zig

const std = @import("std");
const mem = std.mem;
const HashMap = std.HashMap;
const ArrayList = std.ArrayList;
const StringHashMap = std.StringHashMap(u32);
/// Returns the counts of the words in `s`.
/// Caller owns the returned memory.
/// "That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.
pub fn countWords(allocator: mem.Allocator, s: []const u8) !StringHashMap {
var hashMap = StringHashMap.init(allocator);
try read_words(allocator, s, 0, null, &hashMap);
return hashMap;
}
fn read_words(allocator: mem.Allocator, s: []const u8, index: usize, currentWord: ?[]const u8, words: *StringHashMap) !void {
const next_idx = index + 1;
const isIntraWord = index < s.len and (isWordLetter(s[index]) or
(s[index] == '\'' and isBetweenLetters(s, index)));
if (!isIntraWord and currentWord != null) {
const l = currentWord.?.len;
var word = try ArrayList(u8).initCapacity(allocator, l);
for (currentWord.?, 0..) |n, i| {
try word.insert(i, toLowerCase(n));
}
const kword = try word.toOwnedSlice();
const count = words.get(kword);
try words.put(kword, (count orelse 0) + 1);
if (count != null) {
defer allocator.free(kword);
}
}
if (index == s.len) {
return;
}
if (isIntraWord and currentWord == null) {
return read_words(allocator, s, next_idx, s[index..next_idx], words);
} else if (isIntraWord) {
return read_words(allocator, s, next_idx, growSubslice(s, currentWord.?), words);
} else {
return read_words(allocator, s, next_idx, null, words);
}
}
fn isBetweenLetters(s: []const u8, index: usize) bool {
return index > 0 and index < (s.len - 1) and isWordLetter(s[index - 1]) and isWordLetter(s[index + 1]);
}
fn isWordLetter(letter: u8) bool {
return (letter >= 'A' and letter <= 'Z') or (letter >= 'a' and letter <= 'z') or (letter >= '0' and letter <= '9');
}
fn isUpperCase(letter: u8) bool {
return letter >= 'A' and letter <= 'Z';
}
fn toLowerCase(letter: u8) u8 {
return if (isUpperCase(letter)) letter + 32 else letter;
}
fn growSubslice(root: []const u8, child: []const u8) []const u8 {
const max_ptr = @intFromPtr(root.ptr) + root.len;
if (max_ptr > (@intFromPtr(child.ptr) + child.len)) {
const start = @intFromPtr(child.ptr) - @intFromPtr(root.ptr);
const end = child.len + start + 1;
return root[start..end];
} else {
return child;
}
}