Clicky

Fortran Wiki
wc

Word count utility written by David Frank. Originally posted at http://home.earthlink.net/~dave_gemini/wc.f90.

program word_counts   ! process file text -> output file words,counts
implicit none         ! e.g.  wc.exe bible.txt >counts.txt
real :: times(4)
integer,parameter :: hashbits=17, maxw=2**hashbits, wlen=30
integer,parameter :: ia = ichar('a'), iz = ichar('z'), iblank=ichar(' '), iquote=ichar("'")
integer,external  :: ftell
integer(1),allocatable :: text(:)
character(256)    :: code = "                                       '                         abcdefghijklmnopqrstuvwxyz      abcdefghijklmnopqrstuvwxyz     " // &
                            "  " // char(130) // "       " // char(138)
integer(1)        :: acode(0:255) ; equivalence (acode,code)
character(wlen)   :: word, fname*80
integer(1)        :: aword(wlen)  ; equivalence (aword,word)
integer           :: ich, i, k, n, nchars, nc=0, collisions=0, total=0, unique=0, odd, even
type word_count
  union
    map ; character(wlen) :: word   ; end map   ! CVF, Intel, etc? extension
    map ; integer(1) :: aword(wlen) ; end map
  end union
  integer :: k, count
end type
type (word_count) :: wc(maxw)
wc%word = ' ' ; wc%k = 0 ; wc%count = 0
! ---------------------
call cpu_time(times(1))           ! get benchmark start time
call getarg(1,fname)              ! get input file name
if (fname == ' ') then
   write (*,*) 'usage:  e.g. >wc.exe  bible.txt >bible.out' ; stop
end if

open (1,file=fname,form='binary',position='append')  ! to end of file
nchars = ftell(1)                 ! get #chars in text file
allocate ( text(nchars) )
rewind(1) ; read(1) text          ! input file -> text
call cpu_time(times(2))           ! get benchmark time 2
! ---------------------
text = acode(text)    ! upper->lower, non-alpha->blank, except '
                      ! and 2 extended ascii chars (130), (138)

do ; nc = nc+1 ; if (nc > nchars) exit
   ich = text(nc) ; if (ich < ia.or.ich > iz) cycle   ! skip until alpha start
   k = 1 ; aword(1) = ich                      ! found start new word
   odd = ich ; even = 1                        ! init. hash with 1st char
   do ; nc = nc+1
      ich = text(nc) ; if (ich == iblank) exit ! blank is end word
      if (ich == iquote) cycle                 ! delete ' from word
      k = k+1 ; aword(k) = ich                 ! accumulate word
      if (iand(k,1) == 0) then
         even = ieor(ishftc(even,5,hashbits),ich)  ! accum even hash
      else
         odd  = ieor(ishftc(odd, 5,hashbits),ich)  ! accum odd  hash
      end if
   end do
   n = ieor(ishft(odd*even,-hashbits-2),odd*even)  ! hash product pieces
   n = iand(n,maxw-1)                              ! positive index
   do ; n = n+1 ; if (n > maxw-1) n = 1            ! reset index
      if (wc(n)%count == 0) then
         wc(n)%word(1:k) = word(1:k) ; wc(n)%k = k ; wc(n)%count = 1 ; exit  ! initial entry
      else if (word(1:k) == wc(n)%word(1:wc(n)%k)) then
         wc(n)%count = wc(n)%count+1 ; exit        ! count occurances
      else
         collisions = collisions+1
      end if
   end do
end do
! ---------------------
call cpu_time(times(3))              ! get process words,counts time
n = 0
do i = 1,maxw                        ! make entries contiguous from wc(1:
   if (wc(i)%count == 0) cycle
   n = n+1 ; wc(n) = wc(i)
   total = total + wc(i)%count ; unique = unique+1
end do
call qsort(0,n-1)                    ! quicksort wc(1:n) entries
write (*,'(i5,2x,a)') (wc(i)%count, wc(i)%word(1:wc(i)%k), i=1,n)
write (*,*) 'file processed = ',trim(fname)
write (*,*) 'total words  =',total
write (*,*) 'unique words =',unique
write (*,*) 'collisions   =',collisions
! ---------------------
call cpu_time(times(4))              ! get benchmark stop time
write (*,91) times(2)-times(1),' Sec  input'
write (*,91) times(3)-times(2),' Sec  process'
write (*,91) times(4)-times(3),' Sec  output'
write (*,91) times(4)-times(1),' Sec  total  2.8ghz pentium4'
91 format (f0.3,a)
stop
contains
! -----------------------------------
recursive subroutine qsort(l,r)
integer :: l, r, i,j
i = l ; j = r ; word = wc((l+r)/2+1)%word
do while (i <= j)
  do while (wc(i+1)%word < word .and. i < r)
    i = i+1
  end do
  do while (word < wc(j+1)%word .and. j > l)
    j = j-1
  end do
  if (i <= j) then
    wc([i+1,j+1]) = wc([j+1,i+1])   ! swap words,counts
    i = i+1
    j = j-1
  end if
end do
if (l < j) call qsort(l, j)
if (i < r) call qsort(i, r)
end subroutine qsort
end program

! example bible.txt file -> words counts processing
! 8177  a
!  319  aaron
!   ..........
!    5  zurishaddai
!    1  zuzims
! bible.txt
! total words  =      789781
! unique words =       12691
! collisions   =        4486
! 0.016 Sec  input
! 0.188 Sec  process
! 0.063 Sec  output
! 0.266 Sec  total  2.8ghz pentium4