import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This is code that was stripped out of my solution, with portions of my code removed. This is an example of how you
* might parse a passage that is input from the user. This is not integrated into the BibleReader code so it doesn't
* validate references (so Herman 2:3 would be parsed correctly, but this code does not check whether or not Herman 2:3
* is actually a valid reference). It also leaves some cases out--I can't give you everything. You may use and modify
* this code as you desire, but make sure you attribute it to the original author.
*
* @author Chuck Cusack, February 2013.
*/
public class PassageParser {
// The regular expressions used to parse input.
// Learn more about Pattern and regular expressions here:
// http://java.sun.com/javase/6/docs/api/java/util/regex/Pattern.html
// The first thing to know is that the \ character is an escape character, and when seen in a regular
// expression, it means "the next character should be interpreted differently than normal".
// Thus, since \ normally means escape character, to produce a \, we need to escape it, so we use \\.
// Thus, every time you see \\ in a regular expression string, imagine it is really just a \.
//
// . means match any single character
// | means match either or, so "blah|foo" looks for either blah or foo.
// \s means a whitespace character (space, tab, newline, carriage return, etc.)
// \w means word character
// \d means any digit (0-9).
// * means 0 or more occurrences of the previous token. In this case the matching is "greedy",
// which essentially means "match as much of the string as you can with this pattern".
// This is a little technical, so see the above link for more details about this.
// Thus, \s* means 0 or more whitespace characters
// + means 1 or more occurrences of the previous token.
// Thus, \d+ means 1 or more number. This will capture an integer.
// () around something means treat it as a capture group which can be asked about later.
// For instance, given the regular expression
// "blah(\\d+)foo(\\s*)"
// If we try to match it given the string
// "stuff blah123foo other stuff"
// it will match the pattern starting at "blah", and group 1 is "123", and group 2 is " ".
// Group 0 is the whole matching portion, so in this case "blah123foo ".
// Below, I give the interpretation of some of the regular expressions I use.
// By the way, these are the sorts of comments you should consider removing when you
// copy code. They are meant to help you understand the code and not necessarily to document it.
// Match zero or more whitespace characters followed by one or more numbers followed by
// zero or more whitespace characters.
// So, for instance " 234 " and "12 " will match, but " 123 44 " will not.
public static final String number = "\\s*(\\d+)\\s*";
//---------------------------------------------------------------------------------------------------------
// If you need to use parentheses to group something, but don't want it to count as a
// capturing group, you put "?: at the start. For instance, "(?:1|2)" means match "1" or "2",
// but do not count this as a group. I use this below.
//
// The next one is trying to match things that start with a book name and then (possibly) other stuff.
// More specifically, match either
// (a) zero or more whitespaces characters followed by either a 1, 2, 3, I, II, or III,
// then 0 or more whitespaces, then 1 or more word characters, OR
// (b) 1 or more words consisting of only alphabetic characters.
// Then following either (a) or (b) is 0 or more whitespace characters followed by zero or more of any character.
//
// Thus, given a reference to a passage, this will capture the name of the book as group 1 (notice
// the location of parentheses) and everything else as group 2.
// For instance:
// "1 John 3:4" will match "1 John" as group 1 and "3:4" as group 2.
// " 3 Peter 3:1-5" will match "3 Peter" as group 1 and "3:1-5" as group 2.
// "1John 2:3" will not match because there is no space between the "1" and the "John".
public static Pattern bookPattern = Pattern.compile("\\s*((?:1|2|3|I|II|III)\\s*\\w+|(?:\\s*[a-zA-Z]+)+)\\s*(.*)");
//---------------------------------------------------------------------------------------------------------
// Two examples of patterns that are valid. More are needed.
// This one matches things like "3:4-7:3"
public static Pattern cvcvPattern= Pattern.compile(number + ":" + number + "-" + number + ":" + number);
// This one matches things like "3-5"
public static Pattern ccPattern = Pattern.compile(number + "-" + number);
public void getPassage(String passageRef) {
String theRest = null;
String book = null;
int chapter1, chapter2, verse1, verse2;
// First, split the input into the book and the rest, if possible.
Matcher m = bookPattern.matcher(passageRef);
// Now see if it matches.
if (m.matches()) {
// It matches. Good.
book = m.group(1);
theRest = m.group(2);
// Now we need to parse theRest to see what format it is.
// Notice that I have omitted some of the cases.
// You should think about whether or not the order the
// possibilities occurs matters if you use this and add more cases.
if (theRest.length() == 0) {
// It looks like they want a whole book.
// So now you need to do something about it.
} else if ((m = cvcvPattern.matcher(theRest)).matches()) {
chapter1 = Integer.parseInt(m.group(1));
verse1 = Integer.parseInt(m.group(2));
chapter2 = Integer.parseInt(m.group(3));
verse2 = Integer.parseInt(m.group(4));
// They want something of the form book chapter1:verse1-chapter2:verse2
// So now you need to do something about it.
} else if ((m = ccPattern.matcher(theRest)).matches()) {
chapter1 = Integer.parseInt(m.group(1));
chapter2 = Integer.parseInt(m.group(2));
// They want something of the form book chapter1-chapter2
// So now you need to do something about it.
} else {
// They want something else that I haven't taken into account yet.
}
} else {
// It doesn't match the overall format of "BOOK Stuff".
}
}
}