{"id":3408,"date":"2022-12-14T15:37:40","date_gmt":"2022-12-14T14:37:40","guid":{"rendered":"https:\/\/kudzia.eu\/b\/?p=3408"},"modified":"2023-06-05T15:20:33","modified_gmt":"2023-06-05T14:20:33","slug":"getting-unique-string-stats-for-large-file-where-data-has-small-cardinality","status":"publish","type":"post","link":"https:\/\/kudzia.eu\/b\/2022\/12\/getting-unique-string-stats-for-large-file-where-data-has-small-cardinality\/","title":{"rendered":"getting unique string stats for large file; where data has small cardinality"},"content":{"rendered":"<div class=\"wp-block-syntaxhighlighter-code \"><pre class=\"brush: plain; title: ; notranslate\" title=\"\">\n#!\/bin\/bash\n\npath=\/some\/path\/to\/logs\n(\nfor f in $( ls -1 $path|grep access.log|grep 2022110 ) ; do\n zcat &quot;$path\/$f&quot;|awk &#039;{print $7}&#039;|awk -F &#039;?&#039; &#039;{print $1}&#039;\ndone\n)|awk &#039;{unique_servlets&#x5B;$0]++}END{for (servlet_name in unique_servlets){ print unique_servlets&#x5B;servlet_name]&quot; &quot;servlet_name  } }&#039;|sort -n\n\n<\/pre><\/div>\n\n\n<p>sadly, not all of our logs are in clickhouse, meaning chewing them can be time consuming and not-so-fun.<\/p>\n","protected":false},"excerpt":{"rendered":"<p>sadly, not all of our logs are in clickhouse, meaning chewing them can be time consuming and not-so-fun.<\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[51],"tags":[],"class_list":["post-3408","post","type-post","status-publish","format-standard","hentry","category-unimportant"],"_links":{"self":[{"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/posts\/3408","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/comments?post=3408"}],"version-history":[{"count":2,"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/posts\/3408\/revisions"}],"predecessor-version":[{"id":3410,"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/posts\/3408\/revisions\/3410"}],"wp:attachment":[{"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/media?parent=3408"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/categories?post=3408"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/kudzia.eu\/b\/wp-json\/wp\/v2\/tags?post=3408"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}