#! /bin/sh # # tagusage # # count up tags between and in an SGML document # provide output in tags sorted by element name. # # Original script by Heiki-Jaan Kaalep (hkaalep@psych.ut.ee) # that counted the tag usage was modified by Greg Priest-Dorman # (priestdo@cs.vassar.edu) to give output in a format that # could be pasted into a tagsdecl element. Developed in the # context of the MULTEXT and MULTEXT-EAST projects. # 10/95 # # # requires gnu sed and possibly gnu grep # tr '\012' ' ' |\ sed 's/^..*/>@/g' | tr '@' '\012' | \ grep "<[^<>/]*>" | \ sed 's/ [^>]*>/>/g' | \ sort| uniq -c | \ sed 's/^ *//'| \ sed 's/^\([0-9]*\)\(.*\)$/\2 \1/' | \ # # the next grep will remove the text tag from the count # grep -v "" | # # the next grep will remove the comments from the count grep -v "<\!--" | sort | \ sed 's/^. \([0-9]*\)/ occurs=\1><\/tagusage>/' #