public class UpdateHostDbMapper extends Mapper<Text,Writable,Text,NutchWritable>
Mapper.Context
Modifier and Type | Field and Description |
---|---|
protected String[] |
args |
protected String |
buffer |
protected CrawlDatum |
crawlDatum |
protected boolean |
filter |
protected URLFilters |
filters |
protected Text |
host |
protected HostDatum |
hostDatum |
protected boolean |
normalize |
protected URLNormalizers |
normalizers |
protected boolean |
readingCrawlDb |
protected String |
reprUrl |
Constructor and Description |
---|
UpdateHostDbMapper() |
Modifier and Type | Method and Description |
---|---|
protected String |
filterNormalize(String url)
Filters and or normalizes the input URL
|
void |
map(Text key,
Writable value,
Mapper.Context context)
Mapper ingesting records from the HostDB, CrawlDB and plaintext host
scores file.
|
void |
setup(Mapper.Context context) |
protected Text host
protected HostDatum hostDatum
protected CrawlDatum crawlDatum
protected String reprUrl
protected String buffer
protected String[] args
protected boolean filter
protected boolean normalize
protected boolean readingCrawlDb
protected URLFilters filters
protected URLNormalizers normalizers
public void setup(Mapper.Context context)
protected String filterNormalize(String url)
url
- public void map(Text key, Writable value, Mapper.Context context) throws IOException, InterruptedException
map
in class Mapper<Text,Writable,Text,NutchWritable>
key
- value
- context
- IOException
InterruptedException
Copyright © 2021 The Apache Software Foundation