74 lines
2.5 KiB
Zig
74 lines
2.5 KiB
Zig
const std = @import("std");
|
|
const mem = std.mem;
|
|
const HashMap = std.HashMap;
|
|
const ArrayList = std.ArrayList;
|
|
|
|
const StringHashMap = std.StringHashMap(u32);
|
|
/// Returns the counts of the words in `s`.
|
|
/// Caller owns the returned memory.
|
|
/// "That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.
|
|
pub fn countWords(allocator: mem.Allocator, s: []const u8) !StringHashMap {
|
|
var hashMap = StringHashMap.init(allocator);
|
|
try read_words(allocator, s, 0, null, &hashMap);
|
|
return hashMap;
|
|
}
|
|
|
|
fn read_words(allocator: mem.Allocator, s: []const u8, index: usize, currentWord: ?[]const u8, words: *StringHashMap) !void {
|
|
const next_idx = index + 1;
|
|
const isIntraWord = index < s.len and (isWordLetter(s[index]) or
|
|
(s[index] == '\'' and isBetweenLetters(s, index)));
|
|
|
|
if (!isIntraWord and currentWord != null) {
|
|
const l = currentWord.?.len;
|
|
var word = try ArrayList(u8).initCapacity(allocator, l);
|
|
for (currentWord.?, 0..) |n, i| {
|
|
try word.insert(i, toLowerCase(n));
|
|
}
|
|
const kword = try word.toOwnedSlice();
|
|
|
|
const count = words.get(kword);
|
|
|
|
try words.put(kword, (count orelse 0) + 1);
|
|
if (count != null) {
|
|
defer allocator.free(kword);
|
|
}
|
|
}
|
|
if (index == s.len) {
|
|
return;
|
|
}
|
|
if (isIntraWord and currentWord == null) {
|
|
return read_words(allocator, s, next_idx, s[index..next_idx], words);
|
|
} else if (isIntraWord) {
|
|
return read_words(allocator, s, next_idx, growSubslice(s, currentWord.?), words);
|
|
} else {
|
|
return read_words(allocator, s, next_idx, null, words);
|
|
}
|
|
}
|
|
|
|
fn isBetweenLetters(s: []const u8, index: usize) bool {
|
|
return index > 0 and index < (s.len - 1) and isWordLetter(s[index - 1]) and isWordLetter(s[index + 1]);
|
|
}
|
|
|
|
fn isWordLetter(letter: u8) bool {
|
|
return (letter >= 'A' and letter <= 'Z') or (letter >= 'a' and letter <= 'z') or (letter >= '0' and letter <= '9');
|
|
}
|
|
|
|
fn isUpperCase(letter: u8) bool {
|
|
return letter >= 'A' and letter <= 'Z';
|
|
}
|
|
|
|
fn toLowerCase(letter: u8) u8 {
|
|
return if (isUpperCase(letter)) letter + 32 else letter;
|
|
}
|
|
|
|
fn growSubslice(root: []const u8, child: []const u8) []const u8 {
|
|
const max_ptr = @intFromPtr(root.ptr) + root.len;
|
|
|
|
if (max_ptr > (@intFromPtr(child.ptr) + child.len)) {
|
|
const start = @intFromPtr(child.ptr) - @intFromPtr(root.ptr);
|
|
const end = child.len + start + 1;
|
|
return root[start..end];
|
|
} else {
|
|
return child;
|
|
}
|
|
}
|