#! /bin/sh
#
# tagusage
#
# count up tags between and in an SGML document
# provide output in tags sorted by element name.
#
# Original script by Heiki-Jaan Kaalep (hkaalep@psych.ut.ee)
# that counted the tag usage was modified by Greg Priest-Dorman
# (priestdo@cs.vassar.edu) to give output in a format that
# could be pasted into a tagsdecl element. Developed in the
# context of the MULTEXT and MULTEXT-EAST projects.
# 10/95
#
#
# requires gnu sed and possibly gnu grep
#
tr '\012' ' ' |\
sed 's/^..*/>@/g' | tr '@' '\012' | \
grep "<[^<>/]*>" | \
sed 's/ [^>]*>/>/g' | \
sort| uniq -c | \
sed 's/^ *//'| \
sed 's/^\([0-9]*\)\(.*\)$/\2 \1/' | \
#
# the next grep will remove the text tag from the count
# grep -v "" |
#
# the next grep will remove the comments from the count
grep -v "<\!--" |
sort | \
sed 's/^. \([0-9]*\)/ occurs=\1><\/tagusage>/'
#