Loading ...
Sorry, an error occurred while loading the content.

Patch -- OmitPage direcive to ignore url in page count

Expand Messages
  • iamadammorton
    Hello webalizer group! I use webalizer on a web application that uses URLs that do not end in a file extension . I wanted to not count a couple of them in
    Message 1 of 1 , Feb 15, 2005
    • 0 Attachment
      Hello webalizer group!

      I use webalizer on a web application that uses URLs that do not end
      in a "file extension". I wanted to not count a couple of them in
      the page count, but the existing PageType directive only allows
      configuration by extension.

      To work around this, I wrote a patch that adds a new
      parameter, "OmitPage", or "-O" that will not count any provided URL
      as a page, even if it is has no extension or an extension listed in
      a "PageType" parameter. Wildcard matching with "*" is allowed in
      OmitPage urls.

      The patch is included below (line wrapping may give you some
      headaches), or can be downloaded from
      http://www.adammorton.com/code/code.html#OmitPage

      diff -Naur tarballs/webalizer-2.01-10/README webalizer-2.01-10-
      OmitPage/README
      --- webalizer-2.01-10/README Wed Oct 24 00:05:07 2001
      +++ webalizer-2.01-10-OmitPage/README Mon Feb 14 19:30:50 2005
      @@ -232,10 +232,10 @@
      on 'Grouped' records, and thought of as the "Minimum number of
      visits"
      that came from that grouping instead. Note: Visits only occur on
      PageType requests, that is, for any request whose URL is one of the
      -'page' types defined with the PageType option. Due to the
      limitation
      -of the HTTP protocol, log rotations and other factors, this number
      -should not be taken as absolutely accurate, rather, it should be
      -considered a pretty close "guess".
      +'page' types defined with the PageType option, and not exluded by
      the
      +OmitPage option. Due to the limitation of the HTTP protocol, log
      +rotations and other factors, this number should not be taken as
      absolutely
      +accurate, rather, it should be considered a pretty close "guess".

      KBytes

      @@ -379,6 +379,11 @@
      'php3' or 'pl' in order to have them counted as well. The
      default is 'htm*' and 'cgi' for web logs and 'txt' for
      ftp.
      Config file keyword: PageType
      +
      +-O name Specify URLs which are not counted as 'pages'. Requests
      + matcing one of these URLs will not count as a page even
      if they
      + have an extension matching one of the PageTypes defined
      above
      + Config file keyword: OmitPage

      -t name This option specifies the title string for all reports.
      This
      string is used, in conjunction with the hostname (if not
      blank)
      diff -Naur tarballs/webalizer-2.01-10/linklist.c webalizer-2.01-10-
      OmitPage/linklist.c
      --- webalizer-2.01-10/linklist.c Fri Jun 15 04:34:24 2001
      +++ webalizer-2.01-10-OmitPage/linklist.c Mon Feb 14 19:09:45
      2005
      @@ -102,6 +102,7 @@
      NLISTPTR html_tail = NULL; /* tail HTML
      code */
      NLISTPTR html_end = NULL; /* after everything
      else */
      NLISTPTR page_type = NULL; /* page view
      types */
      +NLISTPTR omit_page = NULL; /* pages not
      counted */
      GLISTPTR search_list = NULL; /* Search engine
      list */

      /*********************************************/
      diff -Naur tarballs/webalizer-2.01-10/linklist.h webalizer-2.01-10-
      OmitPage/linklist.h
      --- webalizer-2.01-10/linklist.h Thu Sep 28 23:50:30 2000
      +++ webalizer-2.01-10-OmitPage/linklist.h Mon Feb 14 19:09:24
      2005
      @@ -38,6 +38,7 @@
      extern NLISTPTR html_tail ; /* tail HTML
      code */
      extern NLISTPTR html_end ; /* after everything
      else */
      extern NLISTPTR page_type ; /* page view
      types */
      +extern NLISTPTR omit_page ; /* pages not
      counted */
      extern GLISTPTR search_list ; /* Search engine
      list */

      extern char *isinlist(NLISTPTR, char *); /* scan list
      for str */
      diff -Naur tarballs/webalizer-2.01-10/sample.conf webalizer-2.01-10-
      OmitPage/sample.conf
      --- webalizer-2.01-10/sample.conf Thu Sep 28 23:51:42 2000
      +++ webalizer-2.01-10-OmitPage/sample.conf Mon Feb 14 19:13:24
      2005
      @@ -111,6 +111,13 @@
      #PageType php3
      #PageType pl

      +# OmitPage lets you tell the Webalizer that certain URLs do not
      contain any
      +# 'page's. No url matching an OmitPage value will be counted as a
      page, even
      +# if it matches a PageType above or has no extension (e.g., a
      directory)
      +# They will still count as a hit-- use IgnoreURL to ignore
      completely
      +
      +OmitPage /render
      +
      # UseHTTPS should be used if the analysis is being run on a
      # secure server, and links to urls should use 'https://' instead
      # of the default 'http://'. If you need this, set it to 'yes'.
      diff -Naur tarballs/webalizer-2.01-10/webalizer.c webalizer-2.01-10-
      OmitPage/webalizer.c
      --- webalizer-2.01-10/webalizer.c Tue Apr 16 18:11:31 2002
      +++ webalizer-2.01-10-OmitPage/webalizer.c Mon Feb 14 19:56:36
      2005
      @@ -272,7 +272,7 @@

      /* get command line options */
      opterr = 0; /* disable parser errors */
      - while ((i=getopt
      (argc,argv,"a:A:c:C:dD:e:E:fF:g:GhHiI:l:Lm:M:n:N:o:pP:qQr:R:s:S:t:Tu:
      U:vVx:XY"))!=EOF)
      + while ((i=getopt
      (argc,argv,"a:A:c:C:dD:e:E:fF:g:GhHiI:l:Lm:M:n:N:o:O:pP:qQr:R:s:S:t:T
      u:U:vVx:XY"))!=EOF)
      {
      switch (i)
      {
      @@ -301,6 +301,7 @@
      case 'n': hname=optarg; break; /*
      Hostname */
      case 'N': dns_children=atoi(optarg); break; /* # of DNS
      children */
      case 'o': out_dir=optarg; break; /* Output
      directory */
      + case 'O': add_nlist(optarg,&omit_page); break; /* pages not
      counted */
      case 'p': incremental=1; break; /* Incremental
      run */
      case 'P': add_nlist(optarg,&page_type); break; /* page view
      types */
      case 'q': verbose=1; break; /* Quiet
      (verbose=1) */
      @@ -1449,7 +1450,8 @@
      "DNSCache", /* DNS Cache file
      name 84 */
      "DNSChildren", /* DNS Children (0=no
      DNS) 85 */
      "DailyGraph", /* Daily Graph
      (0=no) 86 */
      - "DailyStats" /* Daily Stats
      (0=no) 87 */
      + "DailyStats", /* Daily Stats
      (0=no) 87 */
      + "OmitPage" /* URLS not counted as a
      page 88 */
      };

      FILE *fp;
      @@ -1593,6 +1595,7 @@
      #endif /* USE_DNS */
      case 86: daily_graph=(value[0]=='n')?0:1; break; /*
      HourlyGraph */
      case 87: daily_stats=(value[0]=='n')?0:1; break; /*
      HourlyStats */
      + case 88: add_nlist(value,&omit_page); break; /*
      OmitPage */
      }
      }
      fclose(fp);
      @@ -1719,6 +1722,8 @@
      int ispage(char *str)
      {
      char *cp1, *cp2;
      +
      + if (isinlist(omit_page,str)!=NULL) return 0;

      cp1=cp2=str;
      while (*cp1!='\0') { if (*cp1=='.') cp2=cp1; cp1++; }
    Your message has been successfully submitted and would be delivered to recipients shortly.