FileDocCategorySizeDatePackage
Grep.javaAPI DocExample8804Wed Apr 10 19:58:04 BST 2002com.ronsoft.books.nio.regex

Grep.java

package com.ronsoft.books.nio.regex;

import java.io.File;
import java.io.FileReader;
import java.io.LineNumberReader;
import java.io.IOException;
import java.util.List;
import java.util.LinkedList;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A file searching class, similar to grep, which returns information about
 * lines matched in the specified files.  Instances of this class are tied
 * to a specific regular expression pattern and may be applied repeatedly
 * to multiple files.  Instances of Grep are thread safe, they may be shared.
 *
 * @author Michael Daudel (mgd@ronsoft.com) (original)
 * @author Ron Hitchens (ron@ronsoft.com) (hacked)
 * @version $Id: Grep.java,v 1.3 2002/04/11 02:58:04 ron Exp $
 */
public class Grep
{
	// The pattern to use for this instance
	private Pattern pattern;

	/**
	 * Instantiate a Grep object for the given pre-compiled Pattern object.
	 * @param pattern A java.util.regex.Pattern object specifying the
	 *  pattern to search for.
	 */
	public Grep (Pattern pattern)
	{
		this.pattern = pattern;
	}

	/**
	 * Instantiate a Grep object and compile the given regular expression
	 * string.
	 * @param regex The regular expression string to compile into a
	 *  Pattern for internal use.
	 * @param ignoreCase If true, pass Pattern.CASE_INSENSITIVE to the
	 *  Pattern constuctor so that seaches will be done without regard
	 *  to alphabetic case.  Note, this only applies to the ASCII
	 *  character set.  Use embedded expressions to set other options.
	 */
	public Grep (String regex, boolean ignoreCase)
	{
		this.pattern = Pattern.compile (regex,
			(ignoreCase) ? Pattern.CASE_INSENSITIVE : 0);
		
	}

	/**
	 * Instantiate a Grep object with the given regular expression string,
	 * with default options.
	 */
	public Grep (String regex)
	{
		this (regex, false);
	}

	// ---------------------------------------------------------------

	/**
	 * Perform a grep on the given file.
	 * @param file A File object denoting the file to scan for the
	 *  regex given when this Grep instance was constructed.
	 * @return A type-safe array of Grep.MatchedLine objects describing
	 *  the lines of the file matched by the pattern.
	 * @exception IOException If there is a problem reading the file.
	 */
	public MatchedLine [] grep (File file)
		throws IOException
	{
		List list = grepList (file);
		MatchedLine matches [] = new MatchedLine [list.size()];

		list.toArray (matches);

		return (matches);
	}

	/**
	 * Perform a grep on the given file.
	 * @param file A String filename denoting the file to scan for the
	 *  regex given when this Grep instance was constructed.
	 * @return A type-safe array of Grep.MatchedLine objects describing
	 *  the lines of the file matched by the pattern.
	 * @exception IOException If there is a problem reading the file.
	 */
	public MatchedLine [] grep (String fileName)
		throws IOException
	{
		return (grep (new File (fileName)));
	}

	/**
	 * Perform a grep on the given list of files.  If a given file cannot
	 * be read, it will be ignored as if empty.
	 * @param files An array of File objects to scan.
	 * @return A type-safe array of Grep.MatchedLine objects describing
	 *  the lines of the file matched by the pattern.
	 */
	public MatchedLine [] grep (File [] files)
	{
		List aggregate = new LinkedList();

		for (int i = 0; i < files.length; i++) {
			try {
				List temp = grepList (files [i]);

				aggregate.addAll (temp);
			} catch (IOException e) {
				// ignore I/O exceptions
			}
		}

		MatchedLine matches [] = new MatchedLine [aggregate.size()];

		aggregate.toArray (matches);

		return (matches);
	}

	// -------------------------------------------------------------

	/**
	 * Encapsulation of a matched line from a file.  This immutable
	 * object has three read-only properties:<ul>
	 * <li>getFile(): The File this match pertains to.</li>
	 * <li>getLineNumber(): The line number (1-relative) within the
	 *   file where the match was found.</li>
	 * <li>getLineText(): The text of the matching line</li>
	 * <li>start(): The index within the line where the matching
	 *  pattern begins.</li>
	 * <li>end(): The index, plus one, of the end of the matched
	 *  character sequence.</li>
	 * </ul>
	 */
	public static class MatchedLine
	{
		private File file;
		private int lineNumber;
		private String lineText;
		private int start;
		private int end;

		MatchedLine (File file, int lineNumber, String lineText,
			int start, int end)
		{
			this.file = file;
			this.lineNumber = lineNumber;
			this.lineText = lineText;
			this.start = start;
			this.end = end;
		}

		public File getFile()
		{
			return (this.file);
		}

		public int getLineNumber()
		{
			return (this.lineNumber);
		}

		public String getLineText()
		{
			return (this.lineText);
		}

		public int start()
		{
			return (this.start);
		}

		public int end()
		{
			return (this.end);
		}
	}

	// -----------------------------------------------------------

	/**
	 * Run the grepper on the given File.
	 * @return A (non-type-safe) List of MatchedLine objects.
	 */
	private List grepList (File file)
		throws IOException
	{
		if ( ! file.exists()) {
			throw new IOException ("Does not exist: " + file);
		}

		if ( ! file.isFile()) {
			throw new IOException ("Not a regular file: " + file);
		}

		if ( ! file.canRead()) {
			throw new IOException ("Unreadable file: " + file);
		}

		LinkedList list = new LinkedList();
		FileReader fr = new FileReader (file);
		LineNumberReader lnr = new LineNumberReader (fr);
		Matcher matcher = this.pattern.matcher ("");
		String line;

		while ((line = lnr.readLine()) != null) {
			matcher.reset (line);

			if (matcher.find()) {
				list.add (new MatchedLine (file,
					lnr.getLineNumber(), line,
					matcher.start(), matcher.end()));
			}
		}

		lnr.close();

		return (list);
	}

	// ---------------------------------------------------------------

	/**
	 * Test code to run grep operations.  Accepts two command-line
	 * options: -i or --ignore-case, compile the givn pattern so
	 * that case of alpha characters is ignored.  Or -1, which runs
	 * the grep operation on each individual file, rather that passing
	 * them all to one invocation.  This is just to test the different
	 * methods.  The printed ouptut is slightly different when -1 is
	 * specified.
	 */
	public static void main (String [] argv)
	{
		// set defaults
		boolean ignoreCase = false;
		boolean onebyone = false;
		List argList = new LinkedList();	// to gather args

		// loop through the args, looking for switches and saving
		// off the pattern an file names
		for (int i = 0; i < argv.length; i++) {
			if (argv [i].startsWith ("-")) {
				if (argv [i].equals ("-i")
					|| argv [i].equals ("--ignore-case"))
				{
					ignoreCase = true;
				}

				if (argv [i].equals ("-1")) {
					onebyone = true;
				}

				continue;
			}

			// not a switch, add it to the list
			argList.add (argv [i]);
		}

		// enough args to run?
		if (argList.size() < 2) {
			System.err.println ("usage: [options] pattern filename ...");
			return;
		}

		// first arg on the list will be taken as the regex pattern
		// pass the pattern to the new Grep object, along with the
		// current value of the ignore case flag
		Grep grepper = new Grep ((String) argList.remove (0),
			ignoreCase);

		// somewhat arbitrarily split into two ways of calling the
		// grepper and printing out the results.
		if (onebyone) {
			Iterator it = argList.iterator();

			// loop through the filenames and grep them
			while (it.hasNext()) {
				String fileName = (String) it.next();

				// print the filename once before each grep
				System.out.println (fileName + ":");

				MatchedLine [] matches = null;

				// catch exceptions 
				try {
					matches = grepper.grep (fileName);
				} catch (IOException e) {
					System.err.println ("\t*** " + e);
					continue;
				}

				// print out info about the matched lines
				for (int i = 0; i < matches.length; i++) {
					MatchedLine match = matches [i];

					System.out.println ("  "
						+ match.getLineNumber()
						+ " [" + match.start()
						+ "-" + (match.end() - 1)
						+ "]: "
						+ match.getLineText());
				}
			}
		} else {
			// convert the filename list to an array of File
			File [] files = new File [argList.size()];

			for (int i = 0; i < files.length; i++) {
				files [i] = new File ((String) argList.get (i));
			}

			// run the grepper, unreadable files are ignored
			MatchedLine [] matches = grepper.grep (files);

			// print out info about the matched lines
			for (int i = 0; i < matches.length; i++) {
				MatchedLine match = matches [i];

				System.out.println (match.getFile().getName()
					+ ", " + match.getLineNumber() + ": "
					+ match.getLineText());
			}
		}
	}
}