We have around 500 accounts with one particular company but no way to compile bills into a centralized location for easy processing. Each week a staff member would have to log into the company’s website and download a PDF for each account and then make payments. The company would/could not provide us with a more streamlined process. So I was tasked with creating a program which would scrape the site for our bills and centralize the information for more simplified processing by our staff.

I decided to use Java since there were libraries already designed to do headless browsing. I used IntelliJ Idea Ultimate for the code. For plugins I used:

As usual I used a private repository on Github for code tracking. I use the current UNIX time stamp for error codes to avoid repetition.

The overall application process is simple:

  1. Read credentials from a text file (this could just have easily been a database, etc.)
  2. Log into the site with those credentials
  3. Get a list of accounts associated with provided credentials
  4. Go to each account page and download the bill as a PDF
  5. Store PDF file by account number-date-filedownloadnumber
  6. Read each PDF and extract information into an excel file with appropriate fields filled in such as bill amount, balance due, etc..
  7. Email notice that the process has been finished.

The Main file’s actions in order:

  • Implements the logging settings
  • Instantiates an instance of the getbills class to pull all pdfs from each account
  • Instantiates an instance of the PDF processing class for reading the PDF files after the downloads
  • Instantiates an instance of the getcredentials class to pull credentials from the credentials file and pulls those credentials
  • creates a date named directory for files
  • creates a CSV file for PDF extracted data
  • It loops through each entry in the credential file and processes them with the getbills class
  • gets a list of the pdfs after they have all been downloaded
  • loops over that list one by one entering the extracted data into the CSV file
  • Notifies by email that process has completed

Main Class

package Bills;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import java.io.File;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;

public class Main {

    protected static final Logger logger = Logger.getLogger(Main.class);

    public static void main(String[] args) {
        
		java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(java.util.logging.Level.OFF);

        logger.setLevel(Level.ERROR);
        GetBills gb = new GetBills();
        DateFormat dateFormat = new SimpleDateFormat("yyyyMMdd");
        Date date = new Date();
        String nowdate = dateFormat.format(date);
        ProcessPdfs ppdfs = new ProcessPdfs();
        GetCredentialsFromFile cr = new GetCredentialsFromFile();
        cr.LoadFileList("Credentials.txt");
        String username = "";
        String password = "";

        boolean success = false;

        if (!new File("./" + nowdate).exists()) {
            success = (new File("./" + nowdate)).mkdirs();
        } else {
            success = true;
        }
        if (!success) {
            logger.debug("Xception: Directory creation failed");
            System.out.println("Xception: Directory creation failed");
        } else {
            ppdfs.createOutputFile(nowdate);
            for (int i = 0; i < cr.size(); i++) {
                username = cr.GetUserName(i);
                password = cr.GetPassword(i);
                try{
                    gb.Login(username,password,nowdate);
                }catch(Exception e){
                    logger.error("Xception: 1550872136 ",e);
                    System.out.println("error: 1550872136 " + e);
                    System.exit(0);
                    break;
                }
                try {
                    Double sleeptime = Math.random()*((10000-4000)+1)+10000;
                    Thread.sleep(10);
                } catch (InterruptedException e) {
                    logger.error("Xception: 1550872151 ", e);
                    System.out.println("error: 1550872151 " + e);
                    break;
                }
            }

            File f = new File(".//" + nowdate);
            File[] list = f.listFiles();

            for(int i = 0 ; i < list.length ; i++) {
                System.out.println(list[i]);
                if(list[i].toString().indexOf("pdf") >= 0) {
                    try {
                        if (ppdfs.openfile(list[i].toString())) {
                            ppdfs.processFile(list[i].toString());
                            ppdfs.closeFile();
                        }
                    } catch (Exception e) {
                        logger.error("Xception: 1550872157 ", e);
                        System.out.println("error 1550872157 : ___ " + list[i].toString() + " ___ " + e);
                    }
                }
            }

            try {
                System.out.println("Done");
                Email.sendEmail("", "", "", "", "", "Eversource Obtention Done", "Eversource Obtention Done");
            } catch (Exception e) {
                logger.error("Xception: 1550872167 ", e);
                System.out.println("error: 1550872167 " + e);
            }
        }
    }
}

GetCredentials Class

This creates an object containing a list of credentials. Pretty straightforward.

package Bills;

import org.apache.log4j.Logger;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;

public class GetCredentialsFromFile {

    static Logger logger = Logger.getLogger(Main.class.getName());
    protected String email = "";
    protected String userName = "";
    protected String password = "";
    protected List<String> records = new ArrayList<String>();

    public void LoadFileList(String filename) {
        try {
            BufferedReader reader = new BufferedReader(new FileReader(filename));
            String line;
            while ((line = reader.readLine()) != null) {
                records.add(line);
            }
            reader.close();
        } catch (Exception e) {
            System.err.format("Exception occurred trying to read '%s'.", filename);
            e.printStackTrace();
        }
    }

    public String GetEmail(int index) {        
        return records.get(index).split("\t")[0];
    }

    public String GetUserName(int index) {
        return records.get(index).split("\t")[2];
    }

    public String GetPassword(int index) {
        return records.get(index).split("\t")[1];
    }

    public int size() {
        return records.size();
    }
}

The Credential file is called Credentials.txt and is formatted like so:

email1@somedomain.com	email1username	email1pw
email2@somedomain.com	email2username	email2pw

GetBills Class

The GetBills class is the heavy lifter. This is where we log into the site, navigate, and download the bills as pdfs. The production version is filled with old code commented out as every time the site is updated changes must be made. Sometimes those changes require me to simply uncomment some old code, sometimes I must rewrite navigation routines.

I will not go into the logic of this process too deeply as it is wholly dependent on the site being browsed but suffices to say it uses a combination of id tags, name attributes, class names, etc. to find and click through. The important thing to pull from this is the use of waitForBackgroundJavaScriptStartingBefore and waitForBackgroundJavaScript after every page request and the use of cleanUp so you don’t run into memory errors. HtmlUnit is slow!

This site has an expand button to show all accounts so there is a loop to keep expanding until all accounts are shown. Also, the final PDF is actually an iframe displaying the PDF so there was no link to click, just load the iframe into an unexpected page type and there it is.

package Bills;

import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.UnexpectedPage;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.*;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;

import java.io.File;
import java.io.FileOutputStream;
import java.util.Arrays;
import java.util.List;

public class GetBills {

    static Logger logger = Logger.getLogger(Main.class.getName());

    public void Login(String id, String pw, String nowdate) throws Exception {

        java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(java.util.logging.Level.OFF);
        String[] alreadydownloaded = getAccountAlreadyDownloaded(nowdate);
        List<String> alreadydownloadedlist = Arrays.asList(alreadydownloaded);

        try (final WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
            webClient.setJavaScriptTimeout(20000);
            webClient.getOptions().setJavaScriptEnabled(true);
            webClient.getOptions().setUseInsecureSSL(true);
            webClient.getOptions().setDownloadImages(true);
            webClient.getOptions().setCssEnabled(true);
            webClient.getOptions().setRedirectEnabled(true);
            webClient.setAjaxController(new NicelyResynchronizingAjaxController());
            webClient.getCookieManager().setCookiesEnabled(true);
            webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
            webClient.getOptions().setThrowExceptionOnScriptError(false);

            //added 10-11-19
            webClient.getCache().setMaxSize(5);

            java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(java.util.logging.Level.OFF);
            HtmlPage page1 = webClient.getPage("https://www.somesite.com/accountlogin");
            webClient.waitForBackgroundJavaScriptStartingBefore(2000);
            webClient.waitForBackgroundJavaScript(4000);

            // Get the form that we are dealing with and within that form,
            // find the submit button and the field that we want to change.
            final HtmlForm form = page1.getForms().get(0);
            final HtmlTextInput nameInput = form.getInputByName("WebId");
            final HtmlPasswordInput passwordInput = form.getInputByName("Password");

            //Change the value of the name field
            nameInput.type(id); //"batch1");

            //Change the value of the password field
            passwordInput.type(pw);//"Springfield1!");

            final HtmlInput button = form.getFirstByXPath("//*[@id=\"submit\"]");

            // Now submit the form by clicking the button and get back the second page.
            final HtmlPage page2 = button.click();
            webClient.waitForBackgroundJavaScriptStartingBefore(1000);
            webClient.waitForBackgroundJavaScript(2000);
            HtmlPage page3 = page2;

            //get the link to expand the account list
            HtmlButton htmlAnchor3 = (HtmlButton) page3.getElementById("show_more_account");
            HtmlPage page4 = null;
            if (htmlAnchor3 != null) {
                int hasmore = -1;
                while (hasmore < 0) {
                    page4 = htmlAnchor3.click();
                    webClient.waitForBackgroundJavaScriptStartingBefore(1000);
                    webClient.waitForBackgroundJavaScript(2000);
                    htmlAnchor3 = (HtmlButton) page4.getElementById("show_more_account");
                    String htmlAnchor3AsXml = htmlAnchor3.asXml();
                    hasmore = htmlAnchor3AsXml.indexOf("display: none;");
                }
            } else {
                page4 = page3;
            }

            page1.cleanUp();
            page2.cleanUp();
            page3.cleanUp();

            //get account links
            String[] accountnumbers = getAccountNumbers(page4);

            List <HtmlAnchor> accountlinks = page4.getByXPath("//a[@class=\"o-link view-bill\"]");
            HtmlPage accountpages = null;
            int accountnumbertracker = 0;

            for (int i = 0; i < accountlinks.size(); i++) {
                try {
                        if (!alreadydownloadedlist.contains(accountnumbers[accountnumbertracker].trim())) {
                            System.out.println("1 | " + accountlinks.size() + " | " +  accountlinks.get(i) + " | " + accountnumbers[accountnumbertracker] );

                            try {
                                accountpages = (HtmlPage) accountlinks.get(i).click();
                                webClient.waitForBackgroundJavaScriptStartingBefore(1000);
                                webClient.waitForBackgroundJavaScript(2000);
                            }catch(Exception e){
                                System.out.println("error: 1569602618 " +  e.toString());
                            }

                            try {
                                accountpages = (HtmlPage) accountpages.getEnclosingWindow().getEnclosedPage();
                                webClient.waitForBackgroundJavaScriptStartingBefore(1000);
                                webClient.waitForBackgroundJavaScript(2000);
                            }catch(Exception e){
                                System.out.println("error: 1569602581 " +  e.toString());
                            }

                            String ispaid = "";
                            final String formAsXml = accountpages.asXml();
                            if(formAsXml.indexOf("id=\"main_hidAmountDue\" value=\"0.00\"") > 0){
                                ispaid = "_paid";
                            }

                            accountpages.cleanUp();
                            page4.cleanUp();

                            try {
                                List<FrameWindow> frames = accountpages.getFrames();
                                for (FrameWindow frame : frames) {
                                    UnexpectedPage pdfpage1 =  (UnexpectedPage) frame.getEnclosedPage();
                                    if (pdfpage1.getWebResponse().getContentType().equals("application/pdf")) {
                                        IOUtils.copy(pdfpage1.getWebResponse().getContentAsStream(), new FileOutputStream("./" + nowdate.substring(0, 8) + "/" + id + "_" + (accountnumbertracker + 1) + "_of_" + accountlinks.size() + "_" + accountnumbers[accountnumbertracker]  + ispaid + ".pdf", false));
                                        System.out.print("3 ./" + nowdate.substring(0, 8) + "/" + id + "_" + accountnumbers[accountnumbertracker] + ".pdf\n");
                                        pdfpage1.cleanUp();
                                    }
                                }
                            } catch (Exception e) {
                                System.out.println("1569603484 " + e.toString());
                            }

                            page3.cleanUp();
                            page3 = webClient.getPage("https://www.eversource.com/cg/customer/accountoverview");
                            webClient.waitForBackgroundJavaScriptStartingBefore(2000);
                            webClient.waitForBackgroundJavaScript(4000);

                            //get the link to expand the account list
                            htmlAnchor3 = (HtmlButton) page3.getElementById("show_more_account");
                            page4 = null;
                            if (htmlAnchor3 != null) {
                                int hasmore = -1;
                                while (hasmore < 0) {
                                    page4 = htmlAnchor3.click();
                                    webClient.waitForBackgroundJavaScriptStartingBefore(1000);
                                    webClient.waitForBackgroundJavaScript(2000);
                                    htmlAnchor3 = (HtmlButton) page4.getElementById("show_more_account");
                                    String htmlAnchor3AsXml = htmlAnchor3.asXml();
                                    hasmore = htmlAnchor3AsXml.indexOf("display: none;");
                                }
                            } else {
                                page4 = page3;
                            }

                            accountnumbers = getAccountNumbers(page3);
                            accountlinks = page3.getByXPath("//a[@class=\"o-link view-bill\"]");
                            accountnumbertracker++;
                            System.out.println("4 " + accountnumbers.length + "\n");

                        }else{
                            System.out.println("already exists: " + accountnumbers[accountnumbertracker]);
                            accountnumbertracker++;
                        }

                } catch (Exception e) {
                    System.out.println("error: 1553626667 " +  e.toString());
                    accountnumbertracker++;
                }
                webClient.getCache().clear();
            }
        } catch (Exception e) {
            System.out.println("error: 1582041714 " + e.toString() + " " + e.getLocalizedMessage() + " " + e.getMessage());
            System.exit(0);
        }
    }

    private String[] getAccountNumbers(HtmlPage page) {

        String pageAsXml = page.asXml().replace("\n", "").replace("\r", "");
        String[] results = new String[0];
        try{
            int index = pageAsXml.indexOf("u-acct-card-header__number");
            while (index >= 0) {
                //ignore the first instance since its a css reference
                if(index < 10000){
                    index = pageAsXml.indexOf("u-acct-card-header__number", index + "u-acct-card-header__number".length());
                }else{
                    String tempstring = pageAsXml.substring(index + 49, index + 60).replaceAll("\"", "");
                    String[] newArray = new String[results.length + 1];
                    System.arraycopy(results, 0, newArray, 0, results.length);
                    newArray[newArray.length - 1] = tempstring; //newArray[newArray.length-1] = String.valueOf(index);
                    results = newArray;
                    //jump to the end of this found string and find the next instance
                    index = pageAsXml.indexOf("u-acct-card-header__number", index + "u-acct-card-header__number".length());
                }
            }

            //there are two instances of the text for each link, 0 based index so we add one to the length and divide by two to get the number of links
            int elements = (results.length + 1) / 2;

            //create the return array with correct ids for payment links
            String[] finalresults = new String[elements];
        }catch(Exception e) {
            System.out.println("error: 1582041726 " + e.toString());
            System.exit(0);
        }
        return results;
    }

    private String[] getAccountAlreadyDownloaded(String nowdate) {
        File f = new File(".//" + nowdate);
        File[] list = f.listFiles();
        String[] filelist = new String[0];
        String tempstring;
        ProcessPdfs checkfile = new ProcessPdfs();
        for (int i = 0; i < list.length; i++) {
            String[] newArray = new String[filelist.length + 1];
            System.arraycopy(filelist, 0, newArray, 0, filelist.length);
            newArray[newArray.length - 1] = list[i].toString().trim();
            filelist = newArray;
            if(filelist[i].indexOf("_") >= 0) {
                try{
                    checkfile.openfile(filelist[i]);
                    checkfile.extractAccountNumber();
                } catch (Exception e) {
                    System.out.println(filelist[i] + "     " + e.toString());
                }
                tempstring = filelist[i].substring(filelist[i].indexOf("_")+1);
                tempstring = tempstring.substring(0,tempstring.indexOf("."));
                filelist[i] = tempstring.trim();
                if(!filelist[i].equals(checkfile.accountNumber.replaceAll(" ",""))){
                    filelist[i] = "";
                }
            }
        }
        return filelist;
    }

}

ProcessPdfs Class

This class reads through each PDF, extracts certain information, and inputs that information into a csv file.

package Bills;

import org.apache.log4j.Logger;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.encryption.AccessPermission;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.PDFTextStripperByArea;

import java.awt.geom.Rectangle2D;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;

public class ProcessPdfs {

    static Logger logger = Logger.getLogger(Main.class.getName());
    public PDDocument document = null;
    public String documentText = null;
    public String accountNumber = null;
    public String serviceReferenceNumber = null;
    public String electricSupplyService = null;
    public String deliverServices = null;
    public String balanceForward = null;
    public String lateFees = null;
    public String totalAmountDue = null;
    public String outputfileName = null;
    public String eversourceId = null;
    public String clmLoanMonthlyFee = null;
    public String statementDate = null;
    public String dueDate = null;
    public String otherChargesorCredits = null;

    public void extractAccountNumber() {
        try {
            accountNumber = "-";
            for (int p = 1; p <= document.getNumberOfPages(); ++p) {
                String[] splits = documentText.toUpperCase().split("ACCOUNT NUMBER:");
                if (splits.length >= 2) {
                    accountNumber = splits[1].trim().substring(0, 13).trim();
                    if(accountNumber.indexOf(" ") < 0){
                        accountNumber = accountNumber.substring(0,4) + " " + accountNumber.substring(4,7) + " " + accountNumber.substring(7,11);
                    }
                    break;
                }
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871683 " + e);
        }
    }

    public void extractServiceReferenceNumber() {
        try {
            serviceReferenceNumber = "-";
            for (int p = 1; p <= document.getNumberOfPages(); ++p) {
                String[] splits = documentText.split("Service reference:");
                if (splits.length >= 2) {
                    serviceReferenceNumber = splits[1].trim().substring(0, 9).trim();
                    if(serviceReferenceNumber.length()<9){
                        serviceReferenceNumber = "0" + serviceReferenceNumber;
                    }
                    break;
                }
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871702 " + e);
        }
    }

    public void extractElectricSupplyServices() {
        try {
            electricSupplyService = "-";
            for (int p = 1; p <= document.getNumberOfPages(); ++p) {
                String[] splits = documentText.split("Supply Services");
                if (splits.length >= 2) {
                    electricSupplyService = splits[1].trim().substring(0, splits[1].indexOf("\r\n")).trim();
                    electricSupplyService = electricSupplyService.replaceAll("[A-Za-z \r\n]","");
                    break;
                }
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871711 " + e);
        }
    }

    public void extractDeliveryServices() {
        try {
            deliverServices = "-";
            for (int p = 1; p <= document.getNumberOfPages(); ++p) {
                String[] splits = documentText.split("Delivery Services");
                if (splits.length >= 2) {
                    deliverServices = splits[1].trim().substring(0, splits[1].indexOf(System.lineSeparator())).trim();
                    deliverServices = deliverServices.replaceAll("[A-Za-z \r\n]","");
                    break;
                }
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871723 " + e);
        }
    }

    public void extractClmLoanMonthlyFee(){
        try {
            clmLoanMonthlyFee = "-";
            for (int p = 1; p <= document.getNumberOfPages(); ++p) {
                String[] splits = documentText.split("LM Loan monthly fee");
                if (splits.length >= 2) {
                    clmLoanMonthlyFee = splits[1].trim().substring(0, splits[1].indexOf(System.lineSeparator())).trim();
                    clmLoanMonthlyFee = clmLoanMonthlyFee.replaceAll("[A-Za-z \r\n]","");
                    break;
                }
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871730 " + e);
        }
    }

    public void extractOtherChargesorCredits(){
        try {
            otherChargesorCredits = "-";
            for (int p = 1; p <= document.getNumberOfPages(); ++p) {
                String[] splits = documentText.split("Other Charges or Credits");
                if (splits.length >= 2) {
                    otherChargesorCredits = splits[1].trim().substring(0, splits[1].indexOf(System.lineSeparator())).trim();
                    otherChargesorCredits = otherChargesorCredits.replaceAll("[A-Za-z \r\n]","");
                    break;
                }
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871745 " + e);
        }
    }

    public void extractBalanceForward() {
        try {
            balanceForward = "-";
            for (int p = 1; p <= document.getNumberOfPages(); ++p) {
                String[] splits = documentText.split("Balance Forward");
                if (splits.length >= 2) {
                    balanceForward = splits[1].trim().substring(0, splits[1].indexOf("\r\n")).trim();
                    balanceForward = balanceForward.replaceAll("[A-Za-z \r\n]","");
                    break;
                }
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871753 " + e);
        }
    }

    public void extractLateFees() {
        try {
            lateFees = "-";
            String[] splits = documentText.split("Late Payment Charge");
            if (splits != null && splits.length >= 2) {
                for (int p = 1; p < splits.length ; ++p) {
                    if(splits[p-1].substring(splits[p-1].length()-10,splits[p-1].length()).indexOf("Cancel") >= 0 || splits[p].substring(19,25).indexOf("if") >= 0) {
                        lateFees = "-";
                        //System.out.println("lf 1 " + accountNumber + " ||||||||||||||||" + splits[p-1].substring(splits[p-1].length()-10).replaceAll(System.lineSeparator() ,"") + " ---||||||||--- " + lateFees);
                    }else {
                        lateFees = splits[p].substring((splits[p].indexOf("$") - 1), splits[p].indexOf("\r\n")).trim();
                        //System.out.println("lf 2 " + accountNumber + " ||||||||||||||||" + splits[p-1].substring(splits[p-1].length()-8).replaceAll(System.lineSeparator(),"") + " ***||||||||*** " + lateFees);
                        break;
                    }
                }
            }
        } catch (Exception e) {
            logger.error("Xception: 1550871759", e);
            System.out.println("error: 1550871759 " + e);
        }
    }

    public void extractDueDate() {
        try {
            dueDate = "-";
            PDFTextStripperByArea textStripper = new PDFTextStripperByArea();
            Rectangle2D rect = new java.awt.geom.Rectangle2D.Float(300, 40, 50, 25);
            textStripper.addRegion("region", rect);
            PDPage docpage = document.getPage(0);
            textStripper.extractRegions(docpage);
            String initialtextForRegion = textStripper.getTextForRegion("region").trim();

            if(initialtextForRegion.trim().equals("")){
                rect = new java.awt.geom.Rectangle2D.Float(260, 80, 100, 25);
                textStripper.addRegion("region", rect);
                textStripper.extractRegions(docpage);
                String textForRegion = textStripper.getTextForRegion("region").trim();
                if(textForRegion.indexOf("Balance Forward") >= 0){
                    dueDate = "-";
                }else {
                    dueDate = formatDate(textForRegion.trim());//.replaceAll("[A-Za-z \r\n]","");
                }
            }else{
                if(initialtextForRegion.indexOf("Balance Forward") >= 0){
                    dueDate = "-";
                }else {
                    dueDate = formatDate(initialtextForRegion.trim().replaceAll("[A-Za-z \r\n]",""));
                }
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871767 " + e);
        }
    }

    public void extractStatementDate() {
        try {
            statementDate = "-";
            PDFTextStripperByArea textStripper = new PDFTextStripperByArea();
            Rectangle2D rect = new java.awt.geom.Rectangle2D.Float(105, 60, 50, 10);
            textStripper.addRegion("region", rect);
            PDPage docpage = document.getPage(0);
            textStripper.extractRegions(docpage);
            String textForRegion = textStripper.getTextForRegion("region").trim();

            if(!textForRegion.trim().equals("")){
                statementDate = formatDate(textForRegion.replaceAll("[A-Za-z \r\n]",""));
            }else{
                rect = new java.awt.geom.Rectangle2D.Float(530, 60, 75, 25);
                textStripper.addRegion("region", rect);
                textStripper.extractRegions(docpage);
                textForRegion = textStripper.getTextForRegion("region").trim();
                statementDate = formatDate(textForRegion.trim()); //.replaceAll("[A-Za-z \r\n]","");
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871775 " + e);
        }
    }

    public void extractTotalAmountDue() {
        try {
            totalAmountDue = "-";
            PDFTextStripperByArea textStripper = new PDFTextStripperByArea();
            Rectangle2D rect = new java.awt.geom.Rectangle2D.Float(475, 25, 100, 35);
            textStripper.addRegion("region", rect);
            PDPage docpage = document.getPage(0);
            textStripper.extractRegions(docpage);
            String textForRegion = textStripper.getTextForRegion("region").trim();

            if(textForRegion.indexOf("CITY OF SPRINGFIELD") < 0) {
                totalAmountDue = textForRegion.replaceAll("[A-Za-z \r\n]", "");
            }else{
                rect = new java.awt.geom.Rectangle2D.Float(340, 80, 100, 20);
                textStripper.addRegion("region", rect);
                textStripper.extractRegions(docpage);
                textForRegion = textStripper.getTextForRegion("region").trim();
                totalAmountDue = textForRegion.replaceAll("[A-Za-z \r\n]","");
            }
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871781 " + e);
        }
    }

    public boolean openfile(String fname) throws IOException {
        try {
            documentText = "-";
            document = PDDocument.load(new File(fname));

            AccessPermission ap = document.getCurrentAccessPermission();
            if (!ap.canExtractContent()) {
                throw new IOException("You do not have permission to extract text");
            }
            eversourceId = fname.substring(8,fname.indexOf("_"));
            PDFTextStripper stripper = new PDFTextStripper();
            stripper.setSortByPosition(true);
            documentText = stripper.getText(document);
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871788 " + e);
            return false;
        }
        return true;
    }

    public void closeFile() {
        try {
            document.close();
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871795 " + e);
        }
    }

    public boolean createOutputFile(String filename) {
        boolean success = false;
        try {
            if (!new File("./" + filename + "/" + filename + ".csv").exists()) {
                File file = new File("./" + filename + "/" + filename + ".csv");
                if (file.createNewFile()) {
                    success = true;
                } else {
                    success = false;
                }
            } else {
                success = true;
            }
            outputfileName = "./" + filename + "/" + filename + ".csv";
            BufferedWriter writer = new BufferedWriter(new FileWriter(outputfileName));
            writer.write("Eversource ID" + "," + "Account Number" + "," + "Service Reference Number" + "," + "Electric Supply Service" + "," + "Delivery Services" + "," + "Balance Forward" + "," + "Late Fees" + "," + "C&LM Loan Monthly Fee" + "," + "Other Charges or Credits" + "," + "Total Amount Due" + "," + "Statement Date" + "," + "Due Date" + "," + System.lineSeparator());
            writer.close();
            return success;
        } catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871806 " + e);
        }
        return success;
    }

    public boolean processFile(String filename){
        try {
            BufferedWriter writer = new BufferedWriter(new FileWriter(outputfileName,true));

            this.extractAccountNumber();
            this.extractServiceReferenceNumber();
            this.extractElectricSupplyServices();
            this.extractDeliveryServices();
            this.extractBalanceForward();
            this.extractLateFees();
            if(filename.indexOf("paid") > 0){
                this.totalAmountDue = "0";
            }else {
                this.extractTotalAmountDue();
            }
            this.extractClmLoanMonthlyFee();
            this.extractOtherChargesorCredits();
            this.extractDueDate();
            this.extractStatementDate();

            writer.write("\"" + eversourceId + "\",\"\'" + accountNumber + "\'\",\"\'" + serviceReferenceNumber + "\'\",\"" + electricSupplyService + "\",\"" + deliverServices + "\",\"" + balanceForward + "\",\"" + lateFees + "\",\"" + clmLoanMonthlyFee + "\",\"" + otherChargesorCredits + "\",\"" + totalAmountDue + "\",\"" + statementDate + "\",\"" + dueDate + "\"" + System.lineSeparator());
            writer.close();
        }catch (Exception e) {
            logger.error("Xception: ", e);
            System.out.println("error: 1550871815 " + e);
        }
        return true;
    }

    public String formatDate(String rawdate){
        String newdate = "";
        if(rawdate.indexOf(",") >= 0){
            String month = rawdate.substring(0,rawdate.indexOf(" ")).trim();
            String day = rawdate.substring(rawdate.indexOf(" "),rawdate.indexOf(",")).trim();
            String year = rawdate.substring(rawdate.indexOf(",")+1).trim();
            newdate = getnumericmonth(month) + "/" + day + "/" + year;
        }else{
            newdate = rawdate;
        }
        return newdate;
    }

    public String getnumericmonth(String month){
        String numericmonth = "0";
        switch(month.toUpperCase()) {
            case "JAN":
                numericmonth = "1";
                break;
            case "FEB":
                numericmonth = "2";
                break;
            case "MAR":
                numericmonth = "3";
                break;
            case "APR":
                numericmonth = "4";
                break;
            case "MAY":
                numericmonth = "5";
                break;
            case "JUN":
                numericmonth = "6";
                break;
            case "JUL":
                numericmonth = "7";
                break;
            case "AUG":
                numericmonth = "8";
                break;
            case "SEP":
                numericmonth = "9";
                break;
            case "OCT":
                numericmonth = "10";
                break;
            case "NOV":
                numericmonth = "11";
                break;
            case "DEC":
                numericmonth = "12";
                break;
        }
        return numericmonth;
    }
}

 60 total views,  1 views today