import java.awt.*;
import java.awt.event.*;
import javax.swing.*;
import javax.swing.border.*;
import java.io.*;
import java.util.*;
/**
* WordCount
*
* A simple application that allows the user to open a file or cut and paste text into a window.
* It then does an analysis of the text--doing word and letter counts.
*
* @author Charles Cusack
* @version October, 2008
*/
public class WordCount extends JFrame {
//-------------------------------------------------------------------------------------------------------
// This is just a string to start with so there is something in the input box.
// The input box is editable, so you can cut and paste into/from it. You can also
// just open a file and include the tyext from a file.
public static final String startString =
"Pebble It is a game about graph pebbling, a mathematical problem dealing with moving\n"
+"resources from one location to another.\n"
+"It is a very difficult problem for computers to solve algorithmically, as it relies\n"
+"heavily on logic and understanding of the problem as a whole, a faculty only humans possess.\n"
+"Pebble It is a fun and easy way for casual gamers to contribute to creating better algorithms\n"
+"for solving such problems.";
//-------------------------------------------------------------------------------------------------------
/**
* getDigramCount
*
* Get a count of how many times each digram occurs in an input String.
* A digram, in case you don't know, is just a pair of letters.
*
* @param text a string containing the text you wish to analyze
* @return a map containing entries whose keys are digrams, and
* whose values correspond to the number of times that digram occurs
* in the input String text.
*/
public Map<String,Integer> getDigramCount(String text)
{
// This one is done.
// Create a Map to store the word/frequency data
// This can be a TreeMap or HashMap.
// Try to change it to HashMap and see how the output changes.
Map<String,Integer> digramMap = new TreeMap<String,Integer>();
// Convert the text to all lower case so 'A' and 'a' are the same, etc.
text = text.toLowerCase();
// Now get rid of anything that is not an alphabetic character.
text = text.replaceAll("\\W|[0-9]|_","");
// text now just contains the characters from the original string.
// We now look at every digram in the text, one at at time. We
// first look at the first two characters, then the second and third,
// then the third and fourth, etc. Notice that these digrams overlap.
for(int i=0;i<text.length()-1;i++)
{
// Get the digram starting with character i.
// Since substring is exclusive, we need i+2 for the upper limit.
String digram = text.substring(i,i+2);
if(!digramMap.containsKey(digram))
{
// If the digram is not already in the Map, add it, and set
// the frequency to 1--since this is the first occurrence.
digramMap.put(digram,1);
} else {
// If it is already in the list, get the frequency of the letter
int freq = digramMap.get(digram);
// Now increment it by one.
freq++;
// Now put it back in the list.
// Notice that Java is doing a little magic here--we are storing
// ints in the Map, but the actual type is Integer. Java automatically
// converts ints to Integers when it needs to. Also notice that when
// we call the put method, we are actually replacing the current Inetger
// object with a new one. We do this because Integers, like Strings,
// are immutable. That is, we can't change them. So we just replace
// it with a new Integer object which is 1 higher than the current one.
digramMap.put(digram,freq);
}
}
// Now return the Map.
return digramMap;
}
/**
* updateDigramCount
*
* Use the getDigramCount method to get the digram counts from the
* input text area, and then update the appropriate output area with
* the information.
*/
public void updateDigramCount()
{
// This one is done.
// Get a String representation of what is in the input text area.
String words = theText.getText();
// Get a map by calling the getDigramCount method
Map<String,Integer> wordCountList = getDigramCount(words);
// Process through the Map--go through each entry in the map
// and put it into a readable format. We start by creating
// a StringBuffer to append the output to.
StringBuffer sb = new StringBuffer();
// The entrySet method of a Map is used to get a representaton of
// the entries of the Map as a Set--so we can iterate through them.
// The Map does not have an iterator itself.
// The entries of a Map are of type Map.Entry<S,T>, where S and T
// are whatever the key and value types are.
Set<Map.Entry<String,Integer>> values = wordCountList.entrySet();
// Now iterate through each element from the Map (by iterating through
// the elements of the Set reprsentation of the Map.)
for(Map.Entry<String,Integer> me : values)
{
// We will only print the digrams that occur at least 5 times.
if(me.getValue() >= 5)
{
// You can look at the Map.Entry API to see other methods,
// but for now we mainly need getKey and getValue.
sb.append(me.getKey()+" "+me.getValue()+"\n");
}
}
// Now convert the StringBuffer into a String and set the text of the
// digram output area to that String.
digramCountText.setText(sb.toString());
}
/**
* getLetterCount
*
* Get a count of how many times each letter occurs in an input String.
*
* @param text a string containing the text you wish to analyze
* @return a map containing entries whose keys are alphabetic letters, and
* whose values correspond to the number of times that letter occurs
* in the input String text.
*/
public Map<Character,Integer> getLetterCount(String text)
{
// Not implemented.
//
// This will be similar to getDigramCount, but since single characters
// can be stored in a Character object, we do that instead.
// Use the charAt method to get individual characters from the text,
// and use a Map that has Character as key instead of String.
// Otherwise, this will be very similar to getDigramCount.
// returing an empty list instead of null so the program doesn't crash.
return new TreeMap<Character,Integer>();
}
/**
* updateLetterCount
*
* Use the getLetterCount method to get the letter counts from the
* input text area, and then update the appropriate output area with
* the information.
*/
public void updateLetterCount()
{
// Not implemented yet
// You need to implement this method and then change the following
// line so it replaces that string with the appropriate frequencies.
// You should be able to implement this if you take a look at
// updateDigramCount.
}
/**
* getTrigramCount
*
* Get a count of how many times each trigram occurs in an input String.
* A trigram, in case you don't know, is just a triple of letters.
*
* @param text a string containing the text you wish to analyze
* @return a map containing entries whose keys are trigrams, and
* whose values correspond to the number of times that trigram occurs
* in the input String text.
*/
public Map<String,Integer> getTrigramCount(String text)
{
// Not implemented yet.
// returing an empty list instead of null so the program doesn't crash.
return new TreeMap<String,Integer>();
}
/**
* updateTrigramCount
*
* Use the getTrigramCount method to get the trigram counts from the
* input text area, and then update the appropriate output area with
* the information.
*/
public void updateTrigramCount()
{
// Not implemented yet
// You need to implement this method and then change the following
// line so it replaces that string with the appropriate frequencies.
// You should be able to implement this if you take a look at
// updateDigramCount.
trigramCountText.setText("Not\nimplemented\nyet");
}
/**
* getWordCount
*
* Get a count of how many times each word occurs in an input String.
*
* @param text a string containing the text you wish to analyze
* @return a map containing entries whose keys are words, and
* whose values correspond to the number of times that word occurs
* in the input String text.
*/
public Map<String,Integer> getWordCount(String text)
{
// I'll give you the first part: First trim the text to
// get rid of any whitespace at the beginning or end--this
// is important because of the way split works.
// Then split the string so that it splits on all non-word
// characters and "'s" (so that "karl's kitchen" will be
// split as ["karl", "kitche"], since that makes more sense
String[] parts = text.trim().split("('s|\\W)+");
// THIS ONE IS NOT FINISHED.
// Now you have an array of Strings--each one a word.
// Create a Map, and populate it with word frequencies.
// Then return it.
// returing an empty list instead of null so the program doesn't crash.
return new TreeMap<String,Integer>();
}
/**
* updateWordCount
*
* Use the getWordCount method to get the word counts from the
* input text area, and then update the appropriate output area with
* the information.
*/
public void updateWordCount()
{
// This method is almost identical to updateDigramCount,
// except if updates a different output area, and it prints
// the words and frequencies in a different order.
// I have left out comments so you can see the whole better.
// This one is done.
String words = theText.getText();
Map<String,Integer> wordCountList = getWordCount(words);
StringBuffer sb = new StringBuffer();
Set<Map.Entry<String,Integer>> values = wordCountList.entrySet();
for(Map.Entry<String,Integer> me : values)
{
sb.append(me.getValue()+" "+me.getKey()+"\n");
}
wordCountText.setText(sb.toString());
}
/**
* getNgramCount: BONUS METHOD!!!!!
*
* Get a count of how many times each N-gram occurs in an input String.
* An N-gram, in case you don't know, is just a sequence of N letters.
*
* @param text a string containing the text you wish to analyze
* @param n the value of N for N-grams
* @return a map containing entries whose keys are N-grams, and
* whose values correspond to the number of times that N-gram occurs
* in the input String text.
*/
public Map<String,Integer> getNgramCount(String text, int n)
{
// This is not implemented yet.
// You do not have to implement this method.
// However, for bonus points, implement this generic
// version, and use it in updateLetterCount, updateDigramCount,
// and updateTrigramCount instead of each individual method.
// (You still need to implement getLetterCount, getDigramCount,
// and getTrigramCount, but if you do this method, you won't
// actually call them.)
//
return null;
}
//---------------------------------------------------------------------
// You can safely ignore anything past here, unless you want to learn
// something about how to do the GUI stuff.
//---------------------------------------------------------------------
JTextArea wordCountText;
JTextArea letterCountText;
JTextArea digramCountText;
JTextArea trigramCountText;
JTextArea theText;
JFileChooser fc;
/**
* Constructor
*/
public WordCount() {
Container contentPane = this.getContentPane();
contentPane.setLayout(new BorderLayout());
this.setTitle("A very simple editor with word counter");
JButton readButton = new JButton("Open");
JButton updateButton = new JButton("Update Counts");
Box buttonBox=Box.createHorizontalBox();
buttonBox.add(Box.createHorizontalStrut(20));
buttonBox.add(updateButton);
buttonBox.add(Box.createHorizontalStrut(20));
buttonBox.add(readButton);
theText= new JTextArea(10,70);
theText.setBorder(new TitledBorder("The Text"));
theText.setText(startString);
wordCountText = new JTextArea();
// Always a good idea to not allow editing in output windows
wordCountText.setEditable(false);
wordCountText.setBorder(new TitledBorder("Word Counts"));
letterCountText = new JTextArea();
letterCountText.setEditable(false);
letterCountText.setBorder(new TitledBorder("Letter Counts"));
digramCountText = new JTextArea();
digramCountText.setEditable(false);
digramCountText.setBorder(new TitledBorder("Digram Counts"));
trigramCountText = new JTextArea();
trigramCountText.setEditable(false);
trigramCountText.setBorder(new TitledBorder("Trigram Counts"));
Box outputBox = Box.createHorizontalBox();
outputBox.add(wordCountText);
outputBox.add(letterCountText);
outputBox.add(digramCountText);
outputBox.add(trigramCountText);
JPanel textPanel = new JPanel();
textPanel.setLayout(new GridLayout(2,1));
textPanel.add(new JScrollPane(theText));
textPanel.add(new JScrollPane(outputBox));
this.getContentPane().add(buttonBox, BorderLayout.NORTH);
this.getContentPane().add(textPanel, BorderLayout.CENTER);
// A JFileChooser is used to get access to files from the file system.
// You can do more interesting things file file choosers, like restrict
// the sorts of files that it will show. For simplicity, I have not
// done that for this example.
//
fc = new JFileChooser();
updateButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
updateStuff();
} });
readButton.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent e) {
getFromFile();
updateStuff();
} });
pack();
this.setSize(new Dimension(800,800));
setVisible(true);
}
/**
* The main method so we can run the class as an application.
*/
public static void main(String[] args) {
new WordCount();
}
public void updateStuff()
{
updateWordCount();
updateLetterCount();
updateDigramCount();
updateTrigramCount();
}
/**
*Overridden so we can exit when window is closed
*/
protected void processWindowEvent(WindowEvent e) {
super.processWindowEvent(e);
if (e.getID() == WindowEvent.WINDOW_CLOSING) {
System.exit(0);
}
}
/**
* Get a file, open it, read the text from the file, and put it in the
* input field so we can manipulate it and get word counts, etc.
*/
public void getFromFile() {
// This will pop up a window which allows the user to pick a
// file from the file system.
int returnVal = fc.showDialog(this,"Open");
// We check whether or not they clicked the "Open" button
if(returnVal == JFileChooser.APPROVE_OPTION) {
// We get a reference to the file that the user selected.
File file = fc.getSelectedFile();
// Make sure it actually exists.
if(!file.exists()) {
JOptionPane.showMessageDialog(this,
"That file does not exist!.",
"File Error", JOptionPane.INFORMATION_MESSAGE);
} else {
try {
FileReader inStream=new FileReader(file);
BufferedReader inData=new BufferedReader(inStream);
StringBuffer theInput=new StringBuffer("");
String in=inData.readLine();
while(in!=null) {
theInput.append(in+"\n");
in=inData.readLine();
}
inData.close();
theText.setText(theInput.toString());
}
catch(IOException e) {
System.out.println("Error opening file");
}
}
}
}
}