larbin源碼分析---NamedSite

此類事用來存儲已經訪問過的網站,每個NamedSite都對應着相應的IPsite ide

 

class NamedSite { fetch

 private: 網站

  /* string used for following CNAME chains (just one jump) */ ui

  char *cname; this

  /** we've got a good dns answer url

   * get the robots.txt */ spa

  void dnsOK (); orm

  /** Cannot get the inet addr server

   * dnsState must have been set properly before the call */ dns

  void dnsErr ();

  /** Delete the old identity of the site */

  void newId ();     //爲網站刪除舊的標識符

  /** put this url in its IPSite */

  void transfer (url *u);  //url放入他的IPSite

  /** forget this url for this reason */

  void forgetUrl (url *u, FetchError reason);

 public:

  /** Constructor */

  NamedSite ();

  /** Destructor : never used */

  ~NamedSite ();

  /* name of the site */

  char name[maxSiteSize];       //網站名

  /* port of the site */

  uint16_t port;

  /* numbers of urls in ram for this site */

  uint16_t nburls;

  /* fifo of urls waiting to be fetched */

  url *fifo[maxUrlsBySite];         //此網站中存在的url

  uint8_t inFifo;

  uint8_t outFifo;

  void putInFifo(url *u);               //入隊

  url *getInFifo();                    //出隊

  short fifoLength();                  //長度

  /** Is this Site in a dnsSites */

  bool isInFifo;

  /** internet addr of this server */

  char dnsState;

  struct in_addr addr;

  uint ipHash;     //此變量用以肯定此NamedSite所對應的IPSite

  /* Date of expiration of dns call and robots.txt fetch */

  time_t dnsTimeout;

  /** test if a file can be fetched thanks to the robots.txt */

  bool testRobots(char *file);

  /* forbidden paths : given by robots.txt */

  Vector<char> forbidden;           //獲取robots中的禁止抓取的url

  /** Put an url in the fifo

   * If there are too much, put it back in UrlsInternal

   * Never fill totally the fifo => call at least with 1 */

  void putGenericUrl(url *u, int limit, bool prio);

  inline void putUrl (url *u) { putGenericUrl(u, 15, false); }

  inline void putUrlWait (url *u) { putGenericUrl(u, 10, false); }

  inline void putPriorityUrl (url *u) { putGenericUrl(u, 5, true); }

  inline void putPriorityUrlWait (url *u) { putGenericUrl(u, 1, true); }

  /** Init a new dns query */

  void newQuery ();

  /** The dns query ended with success */

  void dnsAns (adns_answer *ans);

  /** we got the robots.txt, transfer what must be in IPSites */

  void robotsResult (FetchError res);

};

相關文章
相關標籤/搜索