public class CrawlDbReader extends AbstractChecker implements Closeable
Modifier and Type | Class and Description |
---|---|
static class |
CrawlDbReader.CrawlDatumCsvOutputFormat |
static class |
CrawlDbReader.CrawlDatumJsonOutputFormat |
static class |
CrawlDbReader.CrawlDbDumpMapper |
static class |
CrawlDbReader.CrawlDbStatMapper |
static class |
CrawlDbReader.CrawlDbStatReducer |
static class |
CrawlDbReader.CrawlDbTopNMapper |
static class |
CrawlDbReader.CrawlDbTopNReducer |
static class |
CrawlDbReader.JsonIndenter |
Modifier and Type | Field and Description |
---|---|
protected String |
crawlDb |
keepClientCnxOpen, stdin, tcpPort, usage
Constructor and Description |
---|
CrawlDbReader() |
Modifier and Type | Method and Description |
---|---|
void |
close() |
CrawlDatum |
get(String crawlDb,
String url,
Configuration config) |
static void |
main(String[] args) |
protected int |
process(String line,
StringBuilder output) |
void |
processDumpJob(String crawlDb,
String output,
Configuration config,
String format,
String regex,
String status,
Integer retry,
String expr,
Float sample) |
void |
processStatJob(String crawlDb,
Configuration config,
boolean sort) |
void |
processTopNJob(String crawlDb,
long topN,
float min,
String output,
Configuration config) |
Object |
query(Map<String,String> args,
Configuration conf,
String type,
String crawlId) |
void |
readUrl(String crawlDb,
String url,
Configuration config,
StringBuilder output) |
int |
run(String[] args) |
getProtocolOutput, parseArgs, processSingle, processStdin, processTCP, run
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
protected String crawlDb
public void close()
close
in interface Closeable
close
in interface AutoCloseable
public void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException, InterruptedException, ClassNotFoundException
public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException
IOException
protected int process(String line, StringBuilder output) throws Exception
process
in class AbstractChecker
Exception
public void readUrl(String crawlDb, String url, Configuration config, StringBuilder output) throws IOException
IOException
public void processDumpJob(String crawlDb, String output, Configuration config, String format, String regex, String status, Integer retry, String expr, Float sample) throws IOException, ClassNotFoundException, InterruptedException
public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException, ClassNotFoundException, InterruptedException
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, Exception
run
in interface Tool
IOException
InterruptedException
ClassNotFoundException
Exception
Copyright © 2021 The Apache Software Foundation