|
@@ -0,0 +1,394 @@
|
|
|
+
|
|
|
+<!DOCTYPE HTML>
|
|
|
+<html lang="" >
|
|
|
+ <head>
|
|
|
+ <meta charset="UTF-8">
|
|
|
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type">
|
|
|
+ <title>目录 · GitBook</title>
|
|
|
+ <meta http-equiv="X-UA-Compatible" content="IE=edge" />
|
|
|
+ <meta name="description" content="">
|
|
|
+ <meta name="generator" content="GitBook 3.2.3">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <link rel="stylesheet" href="../gitbook/style.css">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <link rel="stylesheet" href="../gitbook/gitbook-plugin-highlight/website.css">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <link rel="stylesheet" href="../gitbook/gitbook-plugin-search/search.css">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <link rel="stylesheet" href="../gitbook/gitbook-plugin-fontsettings/website.css">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <meta name="HandheldFriendly" content="true"/>
|
|
|
+ <meta name="viewport" content="width=device-width, initial-scale=1, user-scalable=no">
|
|
|
+ <meta name="apple-mobile-web-app-capable" content="yes">
|
|
|
+ <meta name="apple-mobile-web-app-status-bar-style" content="black">
|
|
|
+ <link rel="apple-touch-icon-precomposed" sizes="152x152" href="../gitbook/images/apple-touch-icon-precomposed-152.png">
|
|
|
+ <link rel="shortcut icon" href="../gitbook/images/favicon.ico" type="image/x-icon">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ </head>
|
|
|
+ <body>
|
|
|
+
|
|
|
+<div class="book">
|
|
|
+ <div class="book-summary">
|
|
|
+
|
|
|
+
|
|
|
+<div id="book-search-input" role="search">
|
|
|
+ <input type="text" placeholder="Type to search" />
|
|
|
+</div>
|
|
|
+
|
|
|
+
|
|
|
+ <nav role="navigation">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+<ul class="summary">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <li class="chapter " data-level="1.1" data-path="../">
|
|
|
+
|
|
|
+ <a href="../">
|
|
|
+
|
|
|
+
|
|
|
+ 简介
|
|
|
+
|
|
|
+ </a>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ </li>
|
|
|
+
|
|
|
+ <li class="chapter " data-level="1.2" >
|
|
|
+
|
|
|
+ <span>
|
|
|
+
|
|
|
+
|
|
|
+ 安装以及配置
|
|
|
+
|
|
|
+ </span>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <ul class="articles">
|
|
|
+
|
|
|
+
|
|
|
+ <li class="chapter " data-level="1.2.1" data-path="../install/">
|
|
|
+
|
|
|
+ <a href="../install/#install">
|
|
|
+
|
|
|
+
|
|
|
+ 安装
|
|
|
+
|
|
|
+ </a>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ </li>
|
|
|
+
|
|
|
+ <li class="chapter " data-level="1.2.2" data-path="../install/">
|
|
|
+
|
|
|
+ <a href="../install/#config">
|
|
|
+
|
|
|
+
|
|
|
+ 配置
|
|
|
+
|
|
|
+ </a>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ </li>
|
|
|
+
|
|
|
+
|
|
|
+ </ul>
|
|
|
+
|
|
|
+ </li>
|
|
|
+
|
|
|
+ <li class="chapter " data-level="1.3" >
|
|
|
+
|
|
|
+ <span>
|
|
|
+
|
|
|
+
|
|
|
+ 目录结构
|
|
|
+
|
|
|
+ </span>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <ul class="articles">
|
|
|
+
|
|
|
+
|
|
|
+ <li class="chapter " data-level="1.3.1" data-path="./">
|
|
|
+
|
|
|
+ <a href="./#code">
|
|
|
+
|
|
|
+
|
|
|
+ 目录
|
|
|
+
|
|
|
+ </a>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ </li>
|
|
|
+
|
|
|
+
|
|
|
+ </ul>
|
|
|
+
|
|
|
+ </li>
|
|
|
+
|
|
|
+ <li class="chapter " data-level="1.4" >
|
|
|
+
|
|
|
+ <span>
|
|
|
+
|
|
|
+
|
|
|
+ 部署
|
|
|
+
|
|
|
+ </span>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <ul class="articles">
|
|
|
+
|
|
|
+
|
|
|
+ <li class="chapter " data-level="1.4.1" data-path="../run/">
|
|
|
+
|
|
|
+ <a href="../run/#run">
|
|
|
+
|
|
|
+
|
|
|
+ 部署
|
|
|
+
|
|
|
+ </a>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ </li>
|
|
|
+
|
|
|
+
|
|
|
+ </ul>
|
|
|
+
|
|
|
+ </li>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <li class="divider"></li>
|
|
|
+
|
|
|
+ <li>
|
|
|
+ <a href="https://www.gitbook.com" target="blank" class="gitbook-link">
|
|
|
+ Published with GitBook
|
|
|
+ </a>
|
|
|
+ </li>
|
|
|
+</ul>
|
|
|
+
|
|
|
+
|
|
|
+ </nav>
|
|
|
+
|
|
|
+
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <div class="book-body">
|
|
|
+
|
|
|
+ <div class="body-inner">
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+<div class="book-header" role="navigation">
|
|
|
+
|
|
|
+
|
|
|
+ <!-- Title -->
|
|
|
+ <h1>
|
|
|
+ <i class="fa fa-circle-o-notch fa-spin"></i>
|
|
|
+ <a href=".." >目录</a>
|
|
|
+ </h1>
|
|
|
+</div>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <div class="page-wrapper" tabindex="-1" role="main">
|
|
|
+ <div class="page-inner">
|
|
|
+
|
|
|
+<div id="book-search-results">
|
|
|
+ <div class="search-noresults">
|
|
|
+
|
|
|
+ <section class="normal markdown-section">
|
|
|
+
|
|
|
+ <h1 id="code">code</h1>
|
|
|
+<p>目录结构</p>
|
|
|
+<pre><code>│ app.js //处理路由和异常捕捉
|
|
|
+│ common.js //配置环境config
|
|
|
+│ index.js //多进程启动
|
|
|
+│
|
|
|
+├─bin
|
|
|
+│ │ checkProxyPool.js //从redis代理池中拿出ip直接请求某个网址,如果失效则删除
|
|
|
+│ │ checkZombieChrome.js //使用headless浏览器模式会出现很多僵尸chrome进程,用此脚本杀死
|
|
|
+│ │ checkZombieSpider.js //检查所有crawlworker并记录到表proc_log,并且杀死proc_log中运行太久的僵尸程序
|
|
|
+│ │ crawl.js //获取浏览器模式和普通模式的规则然后进行任务
|
|
|
+│ │ crawlMaster.js //抓取任务的master分配入redis队列
|
|
|
+│ │ crawlWorker.js //从redis获取抓取任务
|
|
|
+│ │ fetchPage.js
|
|
|
+│ │ test.js
|
|
|
+│ │
|
|
|
+│ ├─linux_bash
|
|
|
+│ │ crontab.sh
|
|
|
+│ │ supervisor.ini
|
|
|
+│ │
|
|
|
+│ └─NameClient
|
|
|
+│ subNsEvent.js
|
|
|
+│
|
|
|
+├─conf //配置目录
|
|
|
+│ │ code.inc.js
|
|
|
+│ │ config.dev.inc.js
|
|
|
+│ │ config.form.inc.js
|
|
|
+│ │ config.inc.js
|
|
|
+│ │ r2m_config.inc.js
|
|
|
+│ │
|
|
|
+│ └─conf_ns //名字服务器配置
|
|
|
+│ config.code.inc.js
|
|
|
+│ config.globals.inc.js
|
|
|
+│ config.r2m.inc.js
|
|
|
+│ config.shop.inc.js
|
|
|
+│
|
|
|
+├─controllers //爬虫开放的api,用于预览爬取获取页面和网页上执行任务查看任务执行情况
|
|
|
+│ DefaultController.js
|
|
|
+│
|
|
|
+├─extensions
|
|
|
+│ function_extend.js
|
|
|
+│
|
|
|
+├─models
|
|
|
+│ AmcMsg.js //爬虫爬取报警上报
|
|
|
+│ Browser.js //headless浏览器模式下的浏览器类
|
|
|
+│ JTool.js //选择器使用的类工具,例如格式化时间等
|
|
|
+│ MapData.js //名字服务中配置的数据库表内字段的操作类
|
|
|
+│ ProxyPool.js //代理池类
|
|
|
+│ Spider.js //爬虫类,非常重要,包含了爬取过程中的一系列函数
|
|
|
+│
|
|
|
+└─views
|
|
|
+ │ doc.ejs
|
|
|
+ │ error.ejs
|
|
|
+ │ index.ejs
|
|
|
+ │
|
|
|
+ └─name_server
|
|
|
+ js.ejs
|
|
|
+</code></pre><hr>
|
|
|
+<h1 id="系统示意图">系统示意图</h1>
|
|
|
+<p><img src="../img/图1-2.jpg" alt="图片"></p>
|
|
|
+<h1 id="请求代理示意图">请求代理示意图</h1>
|
|
|
+<p><img src="../img/图1-1.jpg" alt="图片"></p>
|
|
|
+<h1 id="可视化流程示意图">可视化流程示意图</h1>
|
|
|
+<p><img src="../img/图1-3.jpg" alt="图片"></p>
|
|
|
+
|
|
|
+
|
|
|
+ </section>
|
|
|
+
|
|
|
+ </div>
|
|
|
+ <div class="search-results">
|
|
|
+ <div class="has-results">
|
|
|
+
|
|
|
+ <h1 class="search-results-title"><span class='search-results-count'></span> results matching "<span class='search-query'></span>"</h1>
|
|
|
+ <ul class="search-results-list"></ul>
|
|
|
+
|
|
|
+ </div>
|
|
|
+ <div class="no-results">
|
|
|
+
|
|
|
+ <h1 class="search-results-title">No results matching "<span class='search-query'></span>"</h1>
|
|
|
+
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+</div>
|
|
|
+
|
|
|
+ </div>
|
|
|
+ </div>
|
|
|
+
|
|
|
+ </div>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ </div>
|
|
|
+
|
|
|
+ <script>
|
|
|
+ var gitbook = gitbook || [];
|
|
|
+ gitbook.push(function() {
|
|
|
+ gitbook.page.hasChanged({"page":{"title":"目录","level":"1.3.1","depth":2,"next":{"title":"部署","level":"1.4","depth":1,"ref":"","articles":[{"title":"部署","level":"1.4.1","depth":2,"anchor":"#run","path":"run/README.md","ref":"run/README.md#run","articles":[]}]},"previous":{"title":"目录结构","level":"1.3","depth":1,"ref":"","articles":[{"title":"目录","level":"1.3.1","depth":2,"anchor":"#code","path":"code/README.md","ref":"code/README.md#code","articles":[]}]},"dir":"ltr"},"config":{"gitbook":"*","theme":"default","variables":{},"plugins":["livereload"],"pluginsConfig":{"livereload":{},"highlight":{},"search":{},"lunr":{"maxIndexSize":1000000,"ignoreSpecialCharacters":false},"sharing":{"facebook":true,"twitter":true,"google":false,"weibo":false,"instapaper":false,"vk":false,"all":["facebook","google","twitter","weibo","instapaper"]},"fontsettings":{"theme":"white","family":"sans","size":2},"theme-default":{"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"},"showLevel":false}},"structure":{"langs":"LANGS.md","readme":"README.md","glossary":"GLOSSARY.md","summary":"SUMMARY.md"},"pdf":{"pageNumbers":true,"fontSize":12,"fontFamily":"Arial","paperSize":"a4","chapterMark":"pagebreak","pageBreaksBefore":"/","margin":{"right":62,"left":62,"top":56,"bottom":56}},"styles":{"website":"styles/website.css","pdf":"styles/pdf.css","epub":"styles/epub.css","mobi":"styles/mobi.css","ebook":"styles/ebook.css","print":"styles/print.css"}},"file":{"path":"code/README.md","mtime":"2018-10-10T06:52:47.360Z","type":"markdown"},"gitbook":{"version":"3.2.3","time":"2018-10-10T07:06:25.421Z"},"basePath":"..","book":{"language":""}});
|
|
|
+ });
|
|
|
+ </script>
|
|
|
+</div>
|
|
|
+
|
|
|
+
|
|
|
+ <script src="../gitbook/gitbook.js"></script>
|
|
|
+ <script src="../gitbook/theme.js"></script>
|
|
|
+
|
|
|
+
|
|
|
+ <script src="../gitbook/gitbook-plugin-livereload/plugin.js"></script>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <script src="../gitbook/gitbook-plugin-search/search-engine.js"></script>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <script src="../gitbook/gitbook-plugin-search/search.js"></script>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <script src="../gitbook/gitbook-plugin-lunr/lunr.min.js"></script>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <script src="../gitbook/gitbook-plugin-lunr/search-lunr.js"></script>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <script src="../gitbook/gitbook-plugin-sharing/buttons.js"></script>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ <script src="../gitbook/gitbook-plugin-fontsettings/fontsettings.js"></script>
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ </body>
|
|
|
+</html>
|
|
|
+
|