List of Nutch Properties
This list of Nutch configuration properties is intended for development. It includes deprecated properties and properties used only "internally". The list is generated from nutch-default.xml and Java sources.
Legend:
*Def. *
*Used *
*Temp. *
*Depr. *
*(owr.) *
*(test) *
|
|
Trunk |
|
|
|
|
2.x |
|
Property |
Def. |
Used |
Temp. |
Depr. |
Def. |
Used |
Temp. |
Depr. |
CrawlDBScanner.regex |
X |
X |
|
|
|
|
|
|
CrawlDBScanner.status |
X |
X |
|
|
|
|
|
|
anchorIndexingFilter.deduplicate |
X |
X |
(test) |
|
X |
X |
(test) |
|
arc.url.version |
X |
|
|
|
|
|
|
|
content.server.port |
X |
|
|
X |
|
|
||
crawl.gen.delay |
X |
X |
|
|
X |
X |
|
|
crawldb.url.filters |
X |
X |
|
|
|
X |
|
|
crawldb.url.normalizers |
X |
|
|
|
|
|
|
|
crawldb.url.normalizers.scope |
X |
|
|
|
|
|
|
|
creativecommons.exclude.unlicensed |
X |
|
|
X |
|
|
||
db.default.fetch.interval |
X |
X |
|
NUTCH-1409 |
X |
X |
|
NUTCH-1409 |
db.fetch.interval.default |
X |
X |
(test) |
|
X |
X |
|
|
db.fetch.interval.max |
X |
X |
|
|
X |
X |
|
|
db.fetch.retry.max |
X |
X |
|
|
X |
X |
|
|
db.fetch.schedule.adaptive.dec_rate |
X |
X |
|
|
X |
X |
|
|
db.fetch.schedule.adaptive.inc_rate |
X |
X |
|
|
X |
X |
|
|
db.fetch.schedule.adaptive.max_interval |
X |
X |
|
|
X |
X |
|
|
db.fetch.schedule.adaptive.min_interval |
X |
X |
|
|
X |
X |
|
|
db.fetch.schedule.adaptive.sync_delta |
X |
X |
|
|
X |
X |
|
|
db.fetch.schedule.adaptive.sync_delta_rate |
X |
X |
|
|
X |
X |
|
|
db.fetch.schedule.class |
X |
X |
(test) |
|
X |
X |
|
|
db.fetch.schedule.mime.file |
X |
X |
|
|
|
|
|
|
db.ignore.external.links |
X |
X |
|
|
X |
X |
|
|
db.ignore.internal.links |
X |
X |
|
|
X |
|
|
|
db.injector.overwrite |
X |
X |
|
|
|
|
|
|
db.injector.update |
X |
X |
(test) |
|
|
|
|
|
db.max.anchor.length |
X |
X |
|
|
X |
|
|
|
db.max.fetch.interval |
X |
|
NUTCH-1409 |
X |
|
NUTCH-1409 |
||
db.max.inlinks |
X |
X |
|
|
X |
|
|
|
db.max.outlinks.per.page |
X |
X |
|
|
X |
X |
|
|
db.parsemeta.to.crawldb |
X |
X |
|
|
X |
|
|
|
db.preserve.backup |
X |
X |
|
|
|
|
|
|
db.reader.stats.sort |
X |
X |
|
X |
X |
|
||
db.reader.topn |
X |
X |
|
|
|
|
|
|
db.reader.topn.min |
X |
X |
|
|
|
|
|
|
db.score.count.filtered |
X |
X |
|
|
X |
X |
|
|
db.score.injected |
X |
X |
|
|
X |
X |
|
|
db.score.link.external |
X |
X |
|
|
X |
X |
|
|
db.score.link.internal |
X |
X |
|
|
X |
X |
|
|
db.signature.class |
X |
X |
|
|
X |
X |
|
|
db.signature.text_profile.min_token_len |
X |
X |
|
|
X |
X |
|
|
db.signature.text_profile.quant_rate |
X |
X |
|
|
X |
X |
|
|
db.update.additions.allowed |
X |
X |
|
|
X |
X |
|
|
db.update.max.inlinks |
X |
X |
|
|
X |
X |
|
|
db.update.purge.404 |
X |
X |
|
|
|
|
|
|
dc.language |
X |
|
|
|
|
|
|
|
domain.statistics.mode |
X |
X |
|
X |
X |
|
||
elastic.index |
|
|
|
|
X |
|
|
|
elastic.max.bulk.docs |
|
|
|
|
X |
|
|
|
elastic.max.bulk.size |
|
|
|
|
X |
|
|
|
encodingdetector.charset.min.confidence |
X |
|
|
X |
|
|
||
fail.on.job.failure |
|
|
|
|
X |
|
|
|
fetcher.exit |
X |
|
|
|
|
|
|
|
fetcher.follow.outlinks.depth |
X |
X |
|
|
|
|
|
|
fetcher.follow.outlinks.depth.divisor |
X |
X |
|
|
|
|
|
|
fetcher.follow.outlinks.ignore.external |
X |
X |
|
|
|
|
|
|
fetcher.follow.outlinks.num.links |
X |
X |
|
|
|
|
|
|
fetcher.job.resume |
|
|
|
|
X |
|
|
|
fetcher.max.crawl.delay |
X |
X |
|
|
X |
X |
|
|
fetcher.max.exceptions.per.queue |
X |
X |
|
|
X |
|
|
|
fetcher.parse |
X |
X |
(test) |
|
X |
X |
|
|
fetcher.queue.depth.multiplier |
X |
X |
|
|
X |
X |
|
|
fetcher.queue.mode |
X |
X |
|
|
X |
X |
|
|
fetcher.queue.use.host.settings |
|
|
|
|
X |
X |
|
|
fetcher.server.delay |
X |
X |
|
|
X |
X |
|
|
fetcher.server.min.delay |
X |
X |
|
|
X |
X |
|
|
fetcher.store.content |
X |
X |
|
|
X |
X |
|
|
fetcher.threads.fetch |
X |
X |
(owr.) |
|
X |
X |
|
|
fetcher.threads.per.host |
|
|
|
NUTCH-1409 |
|
|
|
NUTCH-1409 |
fetcher.threads.per.host.by.ip |
|
|
|
|
X |
|
|
|
fetcher.threads.per.queue |
X |
X |
|
|
X |
X |
|
|
fetcher.threads.timeout.divisor |
X |
X |
|
|
|
|
|
|
fetcher.throughput.threshold.check.after |
X |
X |
(owr.) |
|
X |
X |
|
|
fetcher.throughput.threshold.pages |
X |
X |
|
|
X |
X |
|
|
fetcher.throughput.threshold.retries |
X |
X |
|
|
|
|
|
|
fetcher.throughput.threshold.sequence |
|
|
|
|
X |
X |
|
|
fetcher.timelimit |
X |
X |
|
X |
X |
|
||
fetcher.timelimit.mins |
X |
X |
|
|
X |
X |
|
|
fetcher.verbose |
X |
X |
|
|
X |
|
|
|
file.content.ignored |
X |
|
|
X |
|
|
||
file.content.limit |
X |
X |
(test) |
|
X |
X |
(test) |
|
file.crawl.parent |
X |
X |
|
|
X |
X |
|
|
free.generator.filter |
X |
|
|
|
|
|
|
|
free.generator.normalize |
X |
|
|
|
|
|
|
|
ftp.content.limit |
X |
X |
|
|
X |
X |
|
|
ftp.follow.talk |
X |
X |
|
|
X |
X |
|
|
ftp.keep.connection |
X |
X |
|
|
X |
X |
|
|
ftp.password |
X |
X |
|
|
X |
X |
|
|
ftp.server.timeout |
X |
X |
|
|
X |
X |
|
|
ftp.timeout |
X |
X |
|
|
X |
X |
|
|
ftp.username |
X |
X |
|
|
X |
X |
|
|
generate.batch.id |
|
|
|
|
X |
|
|
|
generate.count.mode |
X |
X |
|
|
X |
X |
|
|
generate.curTime |
X |
|
|
X |
|
|
||
generate.filter |
X |
|
|
X |
|
|
||
generate.max.count |
X |
X |
|
|
X |
X |
|
|
generate.max.distance |
|
|
|
|
X |
X |
|
|
generate.max.num.segments |
X |
|
|
|
|
|
|
|
generate.max.per.host |
X |
X |
|
NUTCH-1409 |
|
|
|
NUTCH-1409 |
generate.max.per.host.by.ip |
X |
|
NUTCH-1409 |
|
|
|
NUTCH-1409 |
|
generate.min.interval |
X |
X |
|
|
|
|
|
|
generate.min.score |
X |
X |
|
|
X |
|
|
|
generate.normalise |
X |
|
|
X |
|
|
||
generate.partition.seed |
|
|
|
|
X |
|
|
|
generate.restrict.status |
X |
|
|
|
|
|
|
|
generate.topN |
X |
|
|
X |
|
|
||
generate.update.crawldb |
X |
X |
|
|
X |
X |
|
|
hostdb.concurrency.level |
|
|
|
|
X |
|
|
|
hostdb.lru.size |
|
|
|
|
X |
|
|
|
htmlparsefilter.order |
X |
X |
|
|
X |
X |
|
|
http.accept |
X |
X |
|
|
X |
X |
|
|
http.accept.language |
X |
X |
|
|
X |
X |
|
|
http.agent.description |
X |
X |
|
|
X |
X |
|
|
http.agent.email |
X |
X |
|
|
X |
X |
|
|
http.agent.host |
X |
X |
|
|
X |
X |
|
|
http.agent.name |
X |
X |
(test) |
|
X |
X |
(test) |
|
http.agent.url |
X |
X |
|
|
X |
X |
|
|
http.agent.version |
X |
X |
|
|
X |
X |
|
|
http.auth.file |
X |
X |
|
|
X |
X |
|
|
http.auth.verbose |
X |
|
|
X |
|
|
||
http.content.limit |
X |
X |
|
|
X |
X |
|
|
http.max.delays |
X |
|
|
X |
|
|
||
http.proxy.host |
X |
X |
(test) |
|
X |
X |
(test) |
|
http.proxy.password |
X |
X |
|
|
X |
X |
|
|
http.proxy.port |
X |
X |
(test) |
|
X |
X |
(test) |
|
http.proxy.realm |
X |
X |
|
|
X |
X |
|
|
http.proxy.username |
X |
X |
|
|
X |
X |
|
|
http.redirect.max |
X |
X |
|
|
|
|
|
|
http.robots.403.allow |
X |
X |
|
|
X |
X |
|
|
http.robots.agents |
X |
X |
(test) |
|
X |
X |
(test) |
|
http.timeout |
X |
X |
|
|
X |
X |
|
|
http.useHttp11 |
X |
X |
|
|
X |
X |
|
|
http.verbose |
X |
X |
|
|
X |
X |
|
|
index.content.md |
X |
X |
|
|
|
|
|
|
index.db.md |
X |
X |
|
|
|
|
|
|
index.parse.md |
X |
X |
(test) |
|
|
|
|
|
index.replace.regexp |
X |
X |
|
|
|
|
|
|
index.static |
X |
X |
|
|
|
|
|
|
indexer.add.domain |
X |
X |
|
|
|
|
|
|
indexer.delete |
X |
|
|
|
|
|
|
|
indexer.delete.robots.noindex |
X |
|
|
|
|
|
|
|
indexer.max.content.length |
X |
X |
|
|
|
|
|
|
indexer.max.title.length |
X |
X |
|
|
X |
X |
(test) |
|
indexer.score.power |
X |
X |
|
|
X |
X |
|
|
indexer.skip.notmodified |
X |
X |
|
|
|
|
|
|
indexer.url.filters |
X |
X |
|
|
|
X |
|
|
indexer.url.normalizers |
X |
|
|
|
|
|
|
|
indexer.writer.classes |
X |
X |
|
X |
X |
|
||
indexingfilter.order |
X |
X |
|
|
X |
X |
|
|
injector.current.time |
X |
X |
|
X |
X |
|
||
lang.analyze.max.length |
X |
X |
|
|
X |
|
|
|
lang.extraction.policy |
X |
X |
|
|
X |
X |
|
|
lang.identification.only.certain |
X |
X |
|
|
X |
X |
|
|
lang.ngram.max.length |
|
|
|
|
X |
|
|
|
lang.ngram.min.length |
|
|
|
|
X |
|
|
|
link.analyze.damping.factor |
X |
X |
|
|
|
|
|
|
link.analyze.initial.score |
X |
X |
|
|
|
|
|
|
link.analyze.iteration |
X |
X |
|
|
|
|
|
|
link.analyze.normalize.score |
X |
|
|
X |
|
|
||
link.analyze.num.iterations |
X |
X |
|
|
|
|
|
|
link.analyze.rank.one |
X |
X |
|
|
|
|
|
|
link.delete.gone |
X |
X |
|
|
|
|
|
|
link.ignore.internal.domain |
X |
X |
|
|
|
|
|
|
link.ignore.internal.host |
X |
X |
|
|
|
|
|
|
link.ignore.limit.domain |
X |
X |
|
|
|
|
|
|
link.ignore.limit.page |
X |
X |
|
|
|
|
|
|
link.loops.depth |
X |
X |
|
|
|
|
|
|
link.score.updater.clear.score |
X |
X |
|
|
|
|
|
|
linkdb.url.filters |
X |
X |
|
|
|
X |
|
|
linkdb.url.normalizer |
X |
|
|
|
|
|
|
|
linkdb.url.normalizer.scope |
X |
|
|
|
|
|
|
|
metatag.description |
X |
|
|
|
|
|
|
|
metatag.keywords |
X |
|
|
|
|
|
|
|
metatags.names |
X |
X |
(test) |
|
|
|
|
|
mime.type.magic |
X |
X |
|
|
X |
X |
|
|
mime.types.file |
X |
X |
|
|
X |
X |
|
|
moreIndexingFilter.indexMimeTypeParts |
X |
X |
(test) |
|
X |
X |
(test) |
|
moreIndexingFilter.mapMimeTypes |
X |
X |
|
|
|
|
|
|
nutch.conf.uuid |
X |
|
|
X |
|
|
||
parse.filter.urls |
X |
X |
(owr.) |
|
|
|
|
|
parse.job.force |
|
|
|
|
X |
|
|
|
parse.job.resume |
|
|
|
|
X |
|
|
|
parse.normalize.urls |
X |
X |
(owr.) |
|
|
|
|
|
parse.plugin.file |
X |
X |
(test) |
|
X |
X |
(test) |
|
parser.caching.forbidden.policy |
X |
X |
|
|
X |
X |
|
|
parser.character.encoding.default |
X |
X |
|
|
X |
X |
|
|
parser.fix.embeddedparams |
X |
|
|
|
|
|
|
|
parser.html.form.use_action |
X |
X |
(test) |
|
X |
X |
(test) |
|
parser.html.impl |
X |
X |
|
|
X |
X |
|
|
parser.html.outlinks.ignore_tags |
X |
X |
|
|
X |
X |
|
|
parser.skip.truncated |
X |
X |
|
|
X |
X |
|
|
parser.timeout |
X |
X |
|
|
X |
X |
|
|
partition.url.mode |
X |
X |
|
|
X |
X |
|
|
partition.url.seed |
X |
X |
|
X |
|
|
||
plugin.auto-activation |
X |
X |
|
|
X |
X |
|
|
plugin.excludes |
X |
X |
|
|
X |
X |
|
|
plugin.folders |
X |
X |
|
|
X |
X |
|
|
plugin.includes |
X |
X |
(test) |
|
X |
X |
(test) |
|
schema.prefix |
|
|
|
|
|
|
X |
|
scoring.filter.order |
X |
X |
|
|
X |
X |
|
|
segment.dump.dir |
X |
|
|
|
|
|
|
|
segment.merger.filter |
X |
X |
|
|
|
|
|
|
segment.merger.normalizer |
X |
X |
|
|
|
|
|
|
segment.merger.segmentName |
X |
X |
|
|
|
|
|
|
segment.merger.slice |
X |
X |
|
|
|
|
|
|
segment.proxy.port |
X |
|
|
X |
|
|
||
segment.reader.co |
X |
X |
|
|
|
|
|
|
segment.reader.fe |
X |
X |
|
|
|
|
|
|
segment.reader.ge |
X |
X |
|
|
|
|
|
|
segment.reader.pa |
X |
X |
|
|
|
|
|
|
segment.reader.pd |
X |
X |
|
|
|
|
|
|
segment.reader.pt |
X |
X |
|
|
|
|
|
|
sftp.password |
|
|
|
|
X |
|
|
|
sftp.port |
|
|
|
|
X |
|
|
|
sftp.server |
|
|
|
|
X |
|
|
|
sftp.user |
|
|
|
|
X |
|
|
|
solr.auth |
X |
X |
|
|
|
|
|
|
solr.auth.password |
X |
|
|
|
|
|
|
|
solr.auth.username |
X |
|
|
|
|
|
|
|
solr.commit.index |
X |
X |
|
|
X |
X |
|
|
solr.commit.size |
X |
X |
|
|
X |
X |
|
|
solr.mapping.file |
X |
X |
|
|
X |
X |
|
|
solr.params |
X |
|
|
|
|
|
|
|
solr.server.url |
X |
|
|
X |
|
|
||
storage.crawl.id |
|
|
|
|
X |
X |
|
|
storage.data.store.class |
|
|
|
|
X |
X |
(test) |
|
storage.schema.host |
|
|
|
|
X |
X |
|
|
storage.schema.webpage |
|
|
|
|
X |
X |
|
|
subcollection.default.field |
X |
|
|
|
|
|
|
|
subcollection.default.fieldname |
X |
|
|
|
|
|
|
|
subcollections.config |
X |
|
|
X |
|
|
||
subcollections.xml |
X |
|
|
X |
|
|
||
tika.config.file |
X |
|
|
|
|
|
|
|
urlfilter.automaton.file |
X |
X |
|
|
X |
X |
|
|
urlfilter.automaton.rules |
X |
|
|
X |
|
|
||
urlfilter.domain.file |
X |
X |
|
|
X |
X |
|
|
urlfilter.domain.rules |
X |
|
|
X |
|
|
||
urlfilter.domainblacklist.file |
X |
|
|
|
|
|
|
|
urlfilter.domainblacklist.rules |
X |
|
|
|
|
|
|
|
urlfilter.order |
X |
X |
|
|
X |
X |
|
|
urlfilter.prefix.file |
X |
X |
|
|
X |
X |
|
|
urlfilter.prefix.rules |
X |
|
|
X |
|
|
||
urlfilter.regex.file |
X |
X |
|
|
X |
X |
|
|
urlfilter.regex.rules |
X |
|
|
X |
|
|
||
urlfilter.suffix.file |
X |
X |
(test) |
|
X |
X |
(test) |
|
urlfilter.suffix.rules |
X |
|
|
X |
|
|
||
urlmeta.tags |
X |
X |
|
|
|
|
|
|
urlnormalizer.hosts.file |
X |
|
|
|
|
|
|
|
urlnormalizer.hosts.rules |
X |
|
|
|
|
|
|
|
urlnormalizer.loop.count |
X |
X |
|
|
X |
X |
|
|
urlnormalizer.order |
X |
X |
(test) |
|
X |
X |
(test) |
|
urlnormalizer.regex.file |
X |
X |
|
|
X |
X |
|
|
urlnormalizer.regex.rules |
X |
|
|
X |
|
|
||
webgraph.url.filters |
X |
X |
|
|
|
X |
|
|
webgraph.url.normalizers |
X |
|
|
|
|
|
|
|
webtable.dump.content |
|
|
|
|
X |
|
|
|
webtable.dump.headers |
|
|
|
|
X |
|
|
|
webtable.dump.links |
|
|
|
|
X |
|
|
|
webtable.dump.text |
|
|
|
|
X |
|
|
|
webtable.url.regex |
|
|
|
|
X |
|
|
back to FrontPage