Words Frequencies using hash table!
I wrote this script to learn hash tables.
Copy the source in your script editor and you get a new menu tab called Words.
The Top10 and Bottom10 aliases where taken from the official mirc documentation
;
; Collects words and frequencies. Last Modified: January, 19, 2021
;
; By gennarino
;
; Creates Hash table Words => Frequencies
;
On *:Start:{
if (!$hget(Words)) { hmake Words 100 }
if ($isfile($scriptdir/Words.hsh)) { hload Words $scriptdir/Words.hsh }
}
On *:Exit: { if ($hget(Words)) { hsave Words $scriptdir/Words.hsh } }
On *:Disconnect: { if ($hget(Words)) { hsave Words $scriptdir/Words.hsh } }
;End Hash Table handler
on *:TEXT:*:#: {
if ( http isin $1- ) halt ; skips sentences containing links
;Removes all control codes (bold/underline/italics/color/reverse)
;echo 4 -a $1-
var %sentence = $strip($1-,buricmo)
;echo 4 -a %sentence
%sentence = $replace(%sentence,#,$chr(32),?,$chr(32),.,$chr(32),!,$chr(32),",$chr(32),',$chr(32),$chr(44),$chr(32),:,$chr(32),$chr(40),$chr(32),$chr(41),$chr(32),/,$chr(32))
; echo 4 -a { %sentence }
var %Lengn = 0
var %k = 1
var %aw = $NULL
set %single $NULL
tokenize 32 %sentence
while ( %k <= $0 ) {
%aw = $ [ $+ [ %k ] ]
%Lengn = $len(%aw)
if (%Lengn > 3) { ; Accept only words greater than 3
if ($hget(Words,%aw)) {
hinc Words %aw 1
}
else {
hadd Words %aw 1
%single = %aw
; echo 8 -a %single
}
}
inc %k
}
}
menu * {
♡ --- Words -->
.-
.Save Hash Table: .hsave Words $scriptdir/Words.hsh
.Save Ini File: .hsave -i Words $scriptdir/Words.ini
.Show Top10: .top10
.Show Bottom10: .bottom10
.Show LastUnknown: //echo -a Last single word used: %single
.Prune lowest: Wprune $?="Enter Upper limit"
.Search Word: Wget $?="Enter word: "
; .Print All: .print_All_Words ; Warning: could hang MIRC
.-
}
Alias print_All_Words {
var %i = 1
echo -a Words Table:
; iterate over each item
while ($hget(Words, %i).item) {
echo -a %i $+ ) $v1 => $hget(Words, $v1)
inc %i
}
}
alias top10 {
hsave -no Words top10.txt
filter -ffcteun 1 32 top10.txt top10.txt
var %i = 1
while (%i <= 10) {
var %top10.item = $hget(Words,$gettok($read(top10.txt,nt,%i),1,32)).item
set %top10 %top10 %top10.item ( $+ $hget(Words,%top10.item) $+ )
inc %i
}
echo -a TOP 10: $replace(%top10,$chr(32),$+($chr(44),$chr(32)))
unset %top10
}
alias bottom10 {
hsave -no Words bottom10.txt
filter -ffctun 1 32 bottom10.txt bottom10.txt
var %i = 1
while (%i <= 10) {
var %bottom10.item = $hget(Words,$gettok($read(bottom10.txt,nt,%i),1,32)).item
set %bottom10 %bottom10 %bottom10.item ( $+ $hget(Words,%bottom10.item) $+ )
inc %i
}
echo -a BOTTOM 10: $replace(%bottom10,$chr(32),$+($chr(44),$chr(32)))
unset %bottom10
}
alias wdel hdel Words $$1 | echo 4 -a $$1 => deleted
alias wget if ($hget(Words,$$1)) //say The word 4 $$1 has been used 4 $hget(Words, $1) times!
alias Wprune {
if ( $1 ) var %q = $1
else var %q = 2
var %c = 1
var %p = 0
var %key = $hget(Words, %c).item
var %data = $hget(Words, %c).data
echo -a You have $hget(Words, 0).item items in your hash table
while ( %key ) {
if ( %data < %q ) {
hdel Words %key
inc %p 1
}
inc %c 1
%key = $hget(Words, %c).item
%data = $hget(Words, %c).data
}
echo 4 -a Pruned %p entries
hsave Words $scriptdir/Words.hsh
}
That's all folks ....