import java.io.*; import java.lang.String; class HtmlTokenizer extends StreamTokenizer { //定义各标记,这里的标记仅是本例中必须的, 可根据需要自行扩充\ static int HTML_TEXT=-1; static int HTML_UNKNOWN=-2; static int HTML_EOF=-3; static int HTML_IMAGE=-4; static int HTML_FRAME=-5; static int HTML_BACKGROUND=-6; static int HTML_APPLET=-7; boolean outsideTag=true; //判断是否在标记之中 //构造器,定义该令牌流的语法表。 public HtmlTokenizer(BufferedReader r) { super(r); this.resetSyntax(); //重置语法表\ this.wordChars(0,255); //令牌范围为全部字符\ this.ordinaryChar(’<’); //HTML标记两边的分割符\ this.ordinaryChar(’>’); } //end of constructor public int nextHtml(){ int token; //令牌 try{ switch(token=this.nextToken()){ case StreamTokenizer.TT_EOF: //如果已读到流的尽头,则返回TT_EOF return HTML_EOF; case ’<’: //进入标记字段 outsideTag=false; return nextHtml(); case ’>’: //出标记字段 outsideTag=true; return nextHtml(); ...