clothing_search_engine.crawler
Class PageExtractor

java.lang.Object
  extended by clothing_search_engine.crawler.PageExtractor

public class PageExtractor
extends java.lang.Object

Author:
Sara

Constructor Summary
PageExtractor(java.lang.String m_range)
          Constructor
 
Method Summary
 java.lang.String cleanPage(java.lang.String webPage)
          Clean the page to make it useful for search engine analysis
 java.lang.String getPage(java.net.URL url)
          Return the entire webpage into one string
static void main(java.lang.String[] args)
           
 
Methods inherited from class java.lang.Object
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Constructor Detail

PageExtractor

public PageExtractor(java.lang.String m_range)
Constructor

Parameters:
m_range - regular expression of range of page
Method Detail

getPage

public java.lang.String getPage(java.net.URL url)
Return the entire webpage into one string

Parameters:
url - url of webpage to get
Returns:
String unprocessed webpage

cleanPage

public java.lang.String cleanPage(java.lang.String webPage)
Clean the page to make it useful for search engine analysis

Parameters:
webPage - unprocessed webpage
Returns:
String processed/cleaned webpage

main

public static void main(java.lang.String[] args)
Parameters:
args -