public class CrawlDb extends NutchTool implements Tool
Modifier and Type | Field and Description |
---|---|
static String |
CRAWLDB_ADDITIONS_ALLOWED |
static String |
CRAWLDB_PURGE_404 |
static String |
CRAWLDB_PURGE_ORPHANS |
static String |
CURRENT_NAME |
static String |
LOCK_NAME |
currentJob, currentJobNum, numJobs, results, status
Constructor and Description |
---|
CrawlDb() |
CrawlDb(Configuration conf) |
Modifier and Type | Method and Description |
---|---|
static Job |
createJob(Configuration config,
Path crawlDb) |
static void |
install(Job job,
Path crawlDb) |
static Path |
lock(Configuration job,
Path crawlDb,
boolean force) |
static void |
main(String[] args) |
Map<String,Object> |
run(Map<String,Object> args,
String crawlId)
Runs the tool, using a map of arguments.
|
int |
run(String[] args) |
void |
update(Path crawlDb,
Path[] segments,
boolean normalize,
boolean filter) |
void |
update(Path crawlDb,
Path[] segments,
boolean normalize,
boolean filter,
boolean additionsAllowed,
boolean force) |
getProgress, getStatus, killJob, stopJob
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
public static final String CRAWLDB_ADDITIONS_ALLOWED
public static final String CRAWLDB_PURGE_404
public static final String CRAWLDB_PURGE_ORPHANS
public static final String CURRENT_NAME
public static final String LOCK_NAME
public CrawlDb()
public CrawlDb(Configuration conf)
public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter) throws IOException, InterruptedException, ClassNotFoundException
public void update(Path crawlDb, Path[] segments, boolean normalize, boolean filter, boolean additionsAllowed, boolean force) throws IOException, InterruptedException, ClassNotFoundException
public static Job createJob(Configuration config, Path crawlDb) throws IOException
IOException
public static Path lock(Configuration job, Path crawlDb, boolean force) throws IOException
IOException
public static void install(Job job, Path crawlDb) throws IOException
IOException
Copyright © 2021 The Apache Software Foundation