fix: 1. duplicate verbose flag 2.inconsistency in argument name --profile-name 3. duplicate initialisaiton of env_defaults
This commit is contained in:
@@ -272,7 +272,7 @@ def build_arg_parser() -> argparse.ArgumentParser:
|
|||||||
parser.add_argument("--title-filters", default="Product,Engineering", help="comma list of job keywords")
|
parser.add_argument("--title-filters", default="Product,Engineering", help="comma list of job keywords")
|
||||||
parser.add_argument("--max-companies", type=int, default=1000)
|
parser.add_argument("--max-companies", type=int, default=1000)
|
||||||
parser.add_argument("--max-people", type=int, default=500)
|
parser.add_argument("--max-people", type=int, default=500)
|
||||||
parser.add_argument("--profile-path", default=str(pathlib.Path.home() / ".crawl4ai/profiles/profile_linkedin_uc"))
|
parser.add_argument("--profile-name", default=str(pathlib.Path.home() / ".crawl4ai/profiles/profile_linkedin_uc"))
|
||||||
parser.add_argument("--outdir", default="./output")
|
parser.add_argument("--outdir", default="./output")
|
||||||
parser.add_argument("--concurrency", type=int, default=4)
|
parser.add_argument("--concurrency", type=int, default=4)
|
||||||
parser.add_argument("--log-level", default="info", choices=["debug", "info", "warn", "error"])
|
parser.add_argument("--log-level", default="info", choices=["debug", "info", "warn", "error"])
|
||||||
@@ -355,8 +355,7 @@ async def async_main(opts):
|
|||||||
user_agent_generator_config= {
|
user_agent_generator_config= {
|
||||||
"platforms": "mobile",
|
"platforms": "mobile",
|
||||||
"os": "Android"
|
"os": "Android"
|
||||||
},
|
}
|
||||||
verbose=False,
|
|
||||||
)
|
)
|
||||||
crawler = AsyncWebCrawler(config=bc)
|
crawler = AsyncWebCrawler(config=bc)
|
||||||
|
|
||||||
@@ -366,7 +365,7 @@ async def async_main(opts):
|
|||||||
# crawler = await next_crawler().start()
|
# crawler = await next_crawler().start()
|
||||||
try:
|
try:
|
||||||
# Build LinkedIn search URL
|
# Build LinkedIn search URL
|
||||||
search_url = f"https://www.linkedin.com/search/results/companies/?keywords={quote(opts.query)}&geoUrn={opts.geo}"
|
search_url = f'https://www.linkedin.com/search/results/companies/?keywords={quote(opts.query)}&companyHqGeo="{opts.geo}"'
|
||||||
logging.info("Seed URL => %s", search_url)
|
logging.info("Seed URL => %s", search_url)
|
||||||
|
|
||||||
companies: List[Dict] = []
|
companies: List[Dict] = []
|
||||||
@@ -425,14 +424,13 @@ def main():
|
|||||||
if cli_opts.debug:
|
if cli_opts.debug:
|
||||||
opts = detect_debug_defaults(force=True)
|
opts = detect_debug_defaults(force=True)
|
||||||
else:
|
else:
|
||||||
env_defaults = detect_debug_defaults()
|
|
||||||
env_defaults = detect_debug_defaults()
|
env_defaults = detect_debug_defaults()
|
||||||
opts = env_defaults if env_defaults else cli_opts
|
opts = env_defaults if env_defaults else cli_opts
|
||||||
|
|
||||||
if not getattr(opts, "cmd", None):
|
if not getattr(opts, "cmd", None):
|
||||||
opts.cmd = "full"
|
opts.cmd = "full"
|
||||||
|
|
||||||
exit_code = asyncio.run(async_main(opts))
|
exit_code = asyncio.run(async_main(cli_opts))
|
||||||
sys.exit(exit_code)
|
sys.exit(exit_code)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user