Words Frequencies

By gennarino on Sep 05, 2019

Words Frequencies using hash table!
I wrote this script to check hash tables. Copy the source in your script editor and you get a new menu tab called Words.
The Top10 and Bottom10 aliases where taken from the official mirc documentation

;
; Collects words and frequencies
; 
;                    By gennarino
;
; Creates Hash table Words => Frequencies
;
On *:Start:{
  if (!$hget(Words)) { hmake Words 1000 }
  if ($isfile($scriptdir/Words.hsh)) { hload  Words $scriptdir/Words.hsh }
}

On *:Exit: { if ($hget(Words)) { hsave Words $scriptdir/Words.hsh } }
On *:Disconnect: { if ($hget(Words)) { hsave Words $scriptdir/Words.hsh } }

;End Hash Table handler

on *:TEXT:*:#: {
  if ( http isin $1- ) halt   ; skips sentences containing links
  var %sentence = $replace($1-,?,$chr(32),.,$chr(32),!,$chr(32),",$chr(32),',$chr(32),$chr(44),$chr(32),:,$chr(32))
  var %Lengn = 0

  var %k = 1
  var %aw = $NULL
  tokenize 32 %sentence
  while (  %k <= $0 ) { 
    %aw  = $ [ $+ [ %k ] ]
    %Lengn = $len(%aw)
    if (%Lengn > 3) {             ; Accept only words greater than %LengM
      if ($hget(Words,%aw)) { hinc Words %aw }
      else { hadd Words %aw 1 }
    }
    inc %k
  }
}

menu * {
  -
  ♡  --- Words --> 
  .-
  .Save Hash Table: .hsave Words $scriptdir/Words.hsh
  .Save Ini File: .hsave -i Words $scriptdir/Words.ini
  .Show Top10: .top10
  .Show Bottom10: .bottom10
  .Print All: .print_All_Words 
  .-
}

Alias print_All_Words {
  var %i = 1
  echo -a Words Table:
  ; iterate over each item
  while ($hget(Words, %i).item) {
    echo -a %i $+ ) $v1 => $hget(Words, $v1)
    inc %i
  }
}

alias top10 {
  hsave -no Words top10.txt
  filter -ffcteun 1 32 top10.txt top10.txt
  var %i = 1 | while (%i <= 10) {
    var %top10.item = $hget(Words,$gettok($read(top10.txt,nt,%i),1,32)).item
    set %top10 %top10 %top10.item  ( $+ $hget(Words,%top10.item) $+ )
    inc %i
  }
  echo -a TOP 10: $replace(%top10,$chr(32),$+($chr(44),$chr(32)))
  unset %top10
}

alias bottom10 {
  hsave -no Words bottom10.txt
  filter -ffctun 1 32 bottom10.txt bottom10.txt
  var %i = 1 | while (%i <= 10) {
    var %bottom10.item = $hget(Words,$gettok($read(bottom10.txt,nt,%i),1,32)).item
    set %bottom10 %bottom10 %bottom10.item ( $+ $hget(Words,%bottom10.item) $+ )
    inc %i
  }
  echo -a BOTTOM 10: $replace(%bottom10,$chr(32),$+($chr(44),$chr(32)))
  unset %bottom10
}

That's all folks ....

Comments

Sign in to comment.
Are you sure you want to unfollow this person?
Are you sure you want to delete this?
Click "Unsubscribe" to stop receiving notices pertaining to this post.
Click "Subscribe" to resume notices pertaining to this post.