I was working on a project which required pulling down and parsing a .html page from a server, then parsing it for content. I searched a string for two values as a unit test, then saved each of them to a List, then compared them to a manually created String[]. The code is below:
SiteGrabber.java:
//some imports
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
import javax.swing.JOptionPane;
public class SiteGrabber {
//constructor and java.net stuff
public List<String> getWords(String content){
int prev = 0;
List<String> res = new ArrayList<String>();
String tar = "<tr> <td></td><td><li>";
int tarlen = tar.length();
while(content.indexOf(tar, prev) != -1){
int contentind = content.indexOf("</li>", prev);
if(contentind != -1){
res.add(
content.substring(
content.indexOf(tar, prev) + tarlen,
content.indexOf("</li>", content.indexOf(tar, prev))));
prev = contentind + 5;
}
else{break;}
}
return res;
}
}
SiteGrabberTest.java:
import java.util.List;
import org.junit.Test;
import junit.framework.Assert;
public class SiteGrabberTest {
String htsTest="<tr> <td>List of scrambled words: </td> <td><li>nielle</li></td> </tr><tr> <td></td><td><li>ierneb</li></td> </tr>";
//I want the text between the </td><td><li>...</li> tags.
//2 working tests that show that it sets the List.size() to 0 on a dummy string
//and that it records the right number of results in the List on a valid input.
@Test public void ValidContentTest(){
SiteGrabber myGrabber = new SiteGrabber();
List<String> mylst = myGrabber.getWords(htsTest);
String[] expected = new String[] {"nielle", "ierneb"};
Assert.assertEquals("wrong size", expected.length, mylst.size());
for (int i = 0; i < expected.length; i++) {
Assert.assertEquals("wrong word", expected[i], mylst.get(i));
//breaks on 1st iteration, saying it expects "nielle" and got
//">ierneb", implying some sort of off-by-one error.
}
}
}