;;;; This file contains the feature list for the class ;;;; along with a function for testing features in a ;;;; feature list against a series of datapoints. ;;; Feature/datapoint tester (defun test-feature-list (features data-points) (and ;; features must be a list of valid features (listp features) (every #'feature-p features) ;; data-points must be a list of valid data-points ;; Note that since this is general (not image specific), ;; I can't use img-data-point-list-p. (listp data-points) (every #'(lambda (datapoint) (and (listp datapoint) (every #'consp datapoint))) data-points) ;; Now, I'll apply each feature to each data-point and ;; make sure it comes back with something in its domain: (every #'(lambda (datapoint) ;; In here, I'm testing all features against one ;; datapoint. (every #'(lambda (feature) ;; In here, I'm testing one feature ;; against one datapoint. (let ((ret (funcall (feature-extractor feature) datapoint)) (domain (feature-domain feature))) (member ret domain))) features)) data-points) )) ;;; The feature list (defvar *features*) (setf *features* `( (:CHADLI-ISMAP "Is the image a server-side image map?" (:YES :NO) ,#'ismap-extractor) (:CHADLI-SHAPE "What shape is the image?" (:TALL :SQUARELY :STOUT :UNKNOWN) ,#'shape-extractor) (:DNCRAWF-QUESTIONMARK "Is there a question mark in the image URL?" (:T :F) ,#'questionMarkExtractor) (:qian-type-of-image "What type of image is it?" (:GIF :JPG :BMP :PNG :OTHER) ,#'type-of-image-extractor) (:NTMOORE-BANNER "Does \"banner\" exist in the tag?" (:TRUE :FALSE) ,#'Banner) (:HORN-ADS-S "Does the URL contain /AD/ or /ADS/?" (:TRUE :FALSE) ,#'ads-s-extractor) (:FSCHWIET-SLASHES "How many slashes are there in the image URL?" (:FEW :SEVERAL :LOTS) ,#'slash-extractor) (:MEGRAW-IMAGES "Does the image URL have the word \"images\" in it?" (:IMAGES :NOT-IMAGES) ,#'images-extractor) (:DHPHAN-GEO "Is the site a geocities site?" (:GEO :NOT-GEO) ,#'geo-site) (:JILANO-BANNER "Is the image 'banner shaped'?" (:BANNER :NOT-BANNER) ,#'banner-extractor) (:KFORBES-EXTENSION "Does the page have a .gov or .edu extension?" (:TRUE :FALSE) ,#'safe-extension) (:TIM-INSTANCE "Does the image occur more than once on the page?" (:TRUE :FALSE) ,#'multi-instance) (:MKK-COUNTER-IMAGE "Is the image a counter image?" (:COUNTER :NOT-COUNTER) ,#'is-counter) (:WESC-IMGSRCLEN "How long is the image URL?" (:SHORT :LONG :VERYLONG) ,#'imglen-extractor) (:TROLS-HAS-LINKEX "Did somebody mention Link Exchange?" (:LINKEX :NO-LINKEX) ,#'has-linkex) (:KEVINZ-EXTERNAL-SOURCE "Was this image stored on the same computer as the web page?" (:TRUE :FALSE) ,#'feature-external-source) (:PALMER-SMALL "Is this a \"small\" image?" (:TRUE :FALSE) ,#'feature-small) (:CLICK-IN-ALT "Does it say 'click' in the alt tag?" (:TRUE :FALSE) ,#'CLICK-EXTRACTOR) (:TILDE-STRING "Is there a '~' around?" (:TRUE :FALSE) ,#'TILDE-EXTRACTOR) (:ALIGNMENT-TAG "How is the image aligned?" (:NO-SPEC :BOTTOM :TOP :LEFT :RIGHT :MIDDLE :CENTER :OTHER) ,#'PROCRAS-ALIGN-EXTRACTOR) (:GFISH-ABSOLUTE "Is the IMG SRC an absolute URL?" (:YES :NO) ,#'absolute-func) (:RYANWONG-URLTYPE "What is the URL extension?" (:COM :EDU :ORG :OTHER) ,#'url-extractor) (:TSUNAMI-SIZE "How big is the image (area)?" (:UNKOWN :TINY :SMALL :MEDIUM :LARGE) ,#'tsu-size-extractor) (:TSUNAMI-WIDTH "How wide is the image?" (:UNKOWN :SMALL :MEDIUM :LARGE) ,#'width-extractor) (:TSUNAMI-HEIGHT "How tall is the image?" (:UNKOWN :SMALL :MEDIUM :LARGE) ,#'height-extractor) ))