larbin源碼分析--IPSite

class IPSite { html

 private: api

  /* date of last access : avoid rapid fire */ cookie

  time_t lastAccess; socket

  /** Is this Site in a okSites (eg have something to fetch) */ ide

  bool isInFifo; fetch

  /** Get an url from the fifo this

   * resize tab if too big url

   */ spa

  url *getUrl (); orm

 public:

  /** Constructor */

  IPSite ();

  /** Destructor : never used */

  ~IPSite ();

  /** Urls waiting for being fetched */

  Fifo<url> tab;

  /** Put an url in the fifo */

  void putUrl (url *u);

  /** fetch the fist page in the fifo okSites

   * expects at least one element in freeConns

   * return expected time for next call (0 means now)

   */

  int fetch ();             //taburl的抓取。

};

 

 

疑問:  若是每個NamedSite都對應一個IPSite,IPSiteNamedSite中爲何都有一個url的隊列?

 

 

int IPSite::fetch () {

  if (tab.isEmpty()) {        抓取tab中的url

         // no more url to read

         // This is possible because this function can be called recursively

         isInFifo = false;

    return 0;

  } else {

    int next_call = lastAccess + global::waitDuration;

    if (next_call > global::now) {

      global::okSites->rePut(this);

      return next_call;

    } else {

      Connexion *conn = global::freeConns->get();         //創建連接

      url *u = getUrl();          //獲取IPSite中的url

      // We're allowed to fetch this one

      // open the socket and write the request

      char res = getFds(conn, &(u->addr), u->getPort());

      if (res != emptyC) {

        lastAccess = global::now;

        conn->timeout = timeoutPage;

 

// http報文的組建

 

        conn->request.addString("GET ");

        if (global::proxyAddr != NULL) {

          char *tmp = u->getUrl();

          conn->request.addString(tmp);

        } else {

          conn->request.addString(u->getFile());

        }

        conn->request.addString(" HTTP/1.0\r\nHost: ");

        conn->request.addString(u->getHost());

#ifdef COOKIES

        if (u->cookie != NULL) {

          conn->request.addString("\r\nCookie: ");

          conn->request.addString(u->cookie);

        }

#endif // COOKIES

        conn->request.addString(global::headers);

        conn->parser = new html (u, conn);

        conn->pos = 0;

        conn->err = success;

        conn->state = res;

        if (tab.isEmpty()) {

          isInFifo = false;

        } else {

          global::okSites->put(this);

        }

        return 0;

      } else {

        // Unable to connect

        fetchFail(u, noConnection);      //抓取失敗則記錄緣由

        answers(noConnection);

        delete u;

        global::freeConns->put(conn);     //有待進一步研究

        return fetch();    //遞歸抓取

      }   

    }

  }

}

相關文章
相關標籤/搜索