;;; This file defines the following features which were ;;; pulled from the class's submissions to part I: ;;; ;:CHADLI-ISMAP ;:CHADLI-SHAPE ;:DNCRAWF-QUESTIONMARK ;:QIAN-TYPE-OF-IMAGE ;:NTMOORE-BANNER ;:HORN-ADS-S ;:FSCHWIET-SLASHES ;:MEGRAW-IMAGES ;:DHPHAN-GEO ;:JILANO-BANNER ;:KFORBES-EXTENSION ;:TIM-INSTANCE ;:MKK-COUNTER-IMAGE ;:WESC-IMGSRCLEN ;:TROLS-HAS-LINKEX ;:KEVINZ-EXTERNAL-SOURCE ;:PALMER-SMALL ;:CLICK-IN-ALT ;:TILDE-STRING ;:ALIGNMENT-TAG ;:GFISH-ABSOLUTE ;:RYANWONG-URLTYPE ;:TSUNAMI-HEIGHT ;:TSUNAMI-WIDTH ;:TSUNAMI-SIZE ;;; The feature list itself appears at the end of the file. ;;; What follows is helper code. (defvar stout-thres) (defvar tall-thres) (defvar BEGINurl) (defvar TAG) (defvar domain) (defvar img-tag) (defvar instring) (defvar stindex) ;; Return one of the elements of the domain based on the ;; matching border check which succeeds (assumes domain is ;; one longer than borders so that there is a default domain ;; value). ;; (defun extract-from-list (value borders domain comparator) (if (null borders) (car domain) (if (funcall comparator value (car borders)) (car domain) (extract-from-list value (cdr borders) (cdr domain) comparator)))) ;;;; From chadli_rsturg ; ------------------------------------------------------------------- ; This is our little parsing package. It gives back an assoc list with all ; the attributes of an img tag. (defun parse-img-tag (tag) (mapcar #'(lambda (x) (cons (car x) (cleanup-val (cdr x)))) (make-pairs (split-img-tag tag)))) (defun get-val-and-attrib (str) (let* ((spos (position #\Space str :from-end T)) (pathetic "\"") (qpos (position (char pathetic 0) str :from-end T))) (cond ((null str) :ERROR) ((null spos) (list str NIL)) ((null qpos) (list (subseq str 0 spos) (subseq str (+ 1 spos)))) ((or (= (- (length str) 1) qpos) (= (- (length str) 1) spos)) (list str NIL)) (T (list (subseq str 0 (max spos qpos)) (subseq str (+ 1 (max spos qpos)))))))) (defun split-img-tag (tag) (mapcar #'get-val-and-attrib (mapcar #'(lambda (x) (string-trim '(#\Space #\Tab #\Newline) x)) (split-sequence #\= (string-downcase (string-right-trim " >" tag)))))) (defun cleanup-val (val) (let ((pos (position #\" val :from-end T))) (if (null pos) (first (split-sequence #\Space val)) (string-trim "\"" (subseq val 0 (+ pos 1)))))) (defun split-sequence (item seq) (let ((pos (position item seq))) (if (null pos) (list seq) (cons (subseq seq 0 pos) (split-sequence item (subseq seq (+ 1 pos))))))) (defun make-pairs (seq) (if (or (null seq) (null (cdr seq))) nil (cons (cons (second (first seq)) (first (second seq))) (make-pairs (cdr seq))))) ; ------------------------------------------------------------------- ; handy little helper that gives the value of an attribute when given ; a datapoint. It uses the img tag parser above. (defun find-attribute (datapoint attrib) (let ((imgtag (assoc ':IMG datapoint)) (pair NIL)) (if (null imgtag) NIL (progn (setf pair (assoc attrib (parse-img-tag (cdr imgtag)) :test #'equal)) (if (null pair) NIL (cdr pair)))))) ; ------------------------------------------------------------------- ; Here are the feature extractors. They have comments as to what they do in ; the feature set definition below. (defun ismap-extractor (datapoint) (if (or (find-attribute datapoint "ismap") (find-attribute datapoint "usemap")) :YES :NO)) (setf stout-thres 1.3) (setf tall-thres 0.7) (defun shape-extractor (datapoint) (let* ((heighttag (find-attribute datapoint "height")) (widthtag (find-attribute datapoint "width"))) (cond ((or (null heighttag) (null widthtag)) :UNKNOWN) ((> (/ (parse-integer widthtag :junk-allowed T) (parse-integer heighttag :junk-allowed T)) stout-thres) :STOUT) ((< (/ (parse-integer widthtag :junk-allowed T) (parse-integer heighttag :junk-allowed T)) tall-thres) :TALL) (T :SQUARELY)))) ;;;; From dvc ;;does it have a "?" in the image url? ;;the img tag must have the image URL specified like this: "src=\"anyURL\"" ;;feature extraction utilities (defun extract (i) (cond ((null i) :F) (T :T) ) ) (defun imageURL (dp) (setf tag (imageTag dp)) (let ((srcLoc (search "src=\"" (string-downcase tag)))) (if (null srcLoc) "" (progn (setf beginURL (subseq tag (+ (search "src=\"" (string-downcase tag)) 5))) ;;cut off everything before the URL (subseq beginURL 0 (search "\"" beginURL)))))) ;;cut off everything after the URL (defun imageTag (dp) (cdr (assoc :img dp)) ) (defun found (i) ;;digit -> T nil -> nil (not (not i)) ) (defun questionMarkExtractor (dp) (extract (questionMark dp)) ) (defun questionMark (dp) (found (search "?" (imageURL dp))) ) ;;;; From Echidna (defun type-of-image-extractor (data-point) (setf domain '(:GIF :JPG :BMP :PNG :OTHER)) (find-type data-point domain)) (defun find-type (data-point domain) (setf img-tag (cdr (assoc ':IMG data-point))) (setf img-tag (string-upcase img-tag)) (if (search ".GIF\"" img-tag) (first domain) (if (search ".JPG\"" img-tag) (second domain) (if (search ".BMP\"" img-tag) (third domain) (if (search ".PNG\"" img-tag) (fourth domain) (fifth domain)))))) ;;this looks for banner. the word banner is a pretty good tip off ;;that the image is an ad. (defun Banner (data-point) (setf domain '(:TRUE :FALSE)) (find-Banner data-point domain)) (defun find-Banner (data-point domain) (setf img-tag (cdr (assoc ':IMG data-point))) (setf img-tag (string-upcase img-tag)) (if (search "BANNER" img-tag) (first domain) (second domain))) ;; ;; ads-s-extractor is just a wrapper for the extract-from-list function. ;; It simply tells extract-from-list how to classify the information stored ;; in the image tag. ;; (defun ads-s-extractor (data-point) (let ((domain '(t nil)) (range '(:TRUE :FALSE))) (extract-from-list (ads-s (cdr (assoc :IMG data-point))) domain range #'(lambda (x y) (string-equal x y))))) ;; ;; ads-s simply searches the image tag for the string "/ADS/" or "/AD/". ;; It returns either true if /ADS/ or /AD/ appears in the tag or false ;; if not found. ;; (defun ads-s (img-tag) (let ((pos (search "/ADS/" img-tag :test #'char-equal))) (if (null pos) (let ((pos1 (search "/AD/" img-tag :test #'char-equal))) (if (null pos1) nil t)) t))) ;;;; From Go Team ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; SLASH-EXTRACTOR ;; Seperates the number of slashes in the IMG URL into three distinct categories: ;; :FEW - Less than 3 slashes ;; :SEVERAL - Between 3 and 6 slashes ;; :LOTS - Greater than 6 slashes ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defun slash-extractor (data-point) (let* ((imgTag (cdr (assoc :IMG data-point))) (numSlash (count-if #'(lambda (c) (equalp c #\/)) imgTag))) (cond ((< numSlash 3) :FEW) ((<= numSlash 6) :SEVERAL) (T :LOTS)))) ;;;; From imp ;; Identify the presence of the word 'images' in the SRC attribute of the image (defun images-extractor (data-point) (if (search "images" (string-downcase (cdr (assoc :IMG data-point)))) :IMAGES :NOT-IMAGES)) ;; Classify image as :GEO or :NOT-GEO (defun geo-site (data-point) (cond ((search "geocities" (cdr (assoc :IMG data-point)) :test #'char-equal) :GEO) ((search "geocities" (cdr (assoc :URL data-point)) :test #'char-equal) :GEO) (T :NOT-GEO))) ;; helper function: return the width given an IMG tag WITH A WIDTH VALUE (defun get-val (search-str len str) (let* ((new-str1 (remove #\" str)) (new-str2 (remove #\= new-str1)) (pos (search search-str new-str2 :test #'char-equal))) (parse-integer new-str2 :start (+ pos len) :junk-allowed T))) ;; helper function: given a datapoint, return width if there is one, NIL otherwise (defun get-width (data-point) (let ((str (cdr (assoc :IMG data-point)))) (if (search "width" str :test #'char-equal) (funcall #'get-val "width" 5 str) NIL))) ;; helper function: given a datapoint, return height if there is one, NIL otherwise (defun get-height (data-point) (let ((str (cdr (assoc :IMG data-point)))) (if (search "height" str :test #'char-equal) (funcall #'get-val "height" 6 str) NIL))) ;; if width/height is between 7 and 8 (typical banner size), classify the image as a possible ;; banner (defun banner-extractor (data-point) (let ((wdt (get-width data-point)) (ht (get-height data-point))) (cond ((null wdt) :NOT-BANNER) ((null ht) :NOT-BANNER) ((and (<= 7 (/ wdt ht)) (>= 8 (/ wdt ht))) :BANNER) (t :NOT-BANNER)))) ;;;; From the Mighty Endorphins ;;Checks to see if the site is an edu or gov site. If so, ;;it probably won't have ads. (defun safe-extension (data-point) (let ((url (string-upcase (cdr (assoc :URL data-point))))) (if (or (search ".GOV" url) (search "EDU" url)) :TRUE :FALSE))) ;;Check to see if the image occurs more than once in the same page. If so it is ;;probably a bullet. (defun multi-instance (data-point) (let ((instances (cdr (assoc :NUM data-point)) )) (if (> instances 1) :TRUE :FALSE))) ;;;; From mk-ws-to ;;this function looks for the string 'count' with no letters following it ;; in the image tag to try and guess whether the image is a counter or not. (defun is-counter (data-point) (let* ((img-tag (cdr (assoc :IMG data-point))) (index-of (search "count" img-tag :test #'equalp )) ) (if (and index-of (not (alpha-char-p (char img-tag (+ index-of 5)))) ) :COUNTER :NOT-COUNTER))) ;;gets the length of the img tag (defun getimgsrclen (ImgTag) (length ImgTag) ) ;;If the path to the image is really long, it's likely to be an ad, since a lot of ads have generated paths and/or are rotated daily. ;;human defined paths are generally easier to read than machine generated paths (defun imglen-extractor (data-point) (if (< 80 (getimgsrclen (cdr (assoc :IMG data-point)))) :VERYLONG (if (< 40 (getimgsrclen (cdr (assoc :IMG data-point)))) :LONG :SHORT ) ) ) ;;checks for "linkexchange" in IMG tag (defun has-linkex (data-point) (if (search "linkexchange" (cdr (assoc :IMG data-point)) :test #'equalp) :LINKEX :NO-LINKEX)) ;;;; From palmer_kevinz ;; ;; Module: image-tag.lsp ;; ;; Purpose: This module contains the definition of functions for ;; manipulating HTML image tags. ;; ;; The following function returns true if and only if the given ;; character is a space, tab, or newline character. (defun whitespacep (c) (or (eql c #\Space) (eql c #\Newline) (eql c #\Tab))) ;; The following function returns the value of the given string when ;; interpretted as an integer. (defun string-to-integer (s) (do ((i 0 (+ i 1)) (n 0 (+ (* n 10) (digit-char-p (char s i))))) ((= i (length s)) n) ) ) ;; The following function returns the new association list which ;; results from adding an association for the name-value pair given by ;; the subsequences (name-start, name-end) and (value-start, ;; value-end) of the image-tag string. If the value consists of all ;; digits, then it will be converted to an integer. (defun add-arg-assoc (assoc-list image-tag name-start name-end value-start value-end ) (let ((name (string-upcase (subseq image-tag name-start (+ name-end 1)))) (value (subseq image-tag value-start (+ value-end 1)))) (if (find-if-not #'digit-char-p value) (acons name value assoc-list) (acons name (string-to-integer value) assoc-list)))) ;; The image-tag-to-assoc-list takes an image tag (a string) and ;; produces an association list out of the arguments in that tag. For ;; example, if given the image tag ;; ;; "" ;; ;; it would return the association list ;; ;; (("SRC" . "foo.html") ("WIDTH" . 15) ("HEIGHT" . 20)) (defun image-tag-to-assoc-list (image-tag) ;; This function will parse the image tag by simulating a simple ;; state machine. This state machine will have 8 states: ;; ;; 0 - before image tag ;; 1 - before argument name ;; 2 - during argument name ;; 3 - after argument name ;; 4 - before argument value ;; 5 - during quoted argument value ;; 6 - during unquoted argument value ;; 7 - after image tag (let ((state 0) ;; the current state (name-start 0) ;; the index of the character which starts ;; the of the most recent argument name ;; found (name-end 0) ;; the index of the character which ends ;; the of the most recent argument name ;; found (value-start 0) ;; the index of the character which starts ;; the of the most recent argument value ;; found (assoc-list nil) ;; the current association list (current-char nil) ;; the character of the image tag which is ;; currently under examination ) ;; Loop through each character i in the image tag. (do ((i 0 (+ i 1))) ((= i (length image-tag)) nil) ;; Set the character which is to be examined. (setq current-char (elt image-tag i)) ;; Find the code for the current state. (case state ;; before image tag ((0) ;; Move to before-argument-name when a '<' character is ;; encountered. (if (eql current-char #\<) (setq state 1) ) ) ;; before argument name ((1) ;; If we discover a '>' character, then we have found ;; the end of the image tag. (if (eql current-char #\>) (setq state 7) ;; Move to during-argument-name once a nonspace ;; character (which is not '>') is encountered. Also, ;; record that this character position is where the ;; name begins. (if (not (whitespacep current-char)) (progn (setq state 2) (setq name-start i) ) ) ) ) ;; during argument name ((2) ;; Move to after-argument-name if a whitespace ;; character is encountered. Also, record that the ;; previous character position is where the name ends. (if (whitespacep current-char) (progn (setq state 3) (setq name-end (- i 1)) ) ;; Move to before-argument-value (skipping ;; after-argument-value) if an '=' character is ;; encountered. Also, record that the previous ;; character position is where the name ends. (if (eql current-char #\=) (progn (setq state 4) (setq name-end (- i 1)) ) ) ) ) ;; after argument name ((3) ;; Move to before-argument-value when an '=' character ;; is encountered. (if (eql current-char #\=) (setq state 4) ;; If we discover a non-whitespace character which not ;; a '=' character, then we will assume that this is ;; the beginning of an argument name (and the ;; previously found argument name was junk). (if (not (whitespacep current-char)) (progn (setq state 2) (setq name-start i) ) ) ) ) ;; before argument value ((4) ;; If a quote character is encountered, move to the ;; during-quoted-argument-value state and record that ;; the next character is where the value begins. (if (eql current-char #\") (progn (setq state 5) (setq value-start (+ i 1)) ) ;; If we discover a non-whitespace character which is ;; not a quote character, then move to the ;; during-unquoted-argument-value state and record that ;; this is where the value begins. (if (not (whitespacep current-char)) (progn (setq state 6) (setq value-start i) ) ) ) ) ;; during quoted argument value ((5) ;; If we are at a quote character, then we have reached ;; the end of the argument value. In that case, we add ;; the name and value to the association list and move ;; back to the before-argument-name state. (if (eql current-char #\") (progn (setq assoc-list (add-arg-assoc assoc-list image-tag name-start name-end value-start (- i 1))) (setq state 1) ) ) ) ;; during unquoted argument value ((6) ;; If we are at a whitespace character or a '>' ;; character, then we have reached the end of the ;; argument value. In that case, we add the name and ;; value to the association list and move back to the ;; before-argument-name state. (if (or (whitespacep current-char) (eql current-char #\>)) (progn (setq assoc-list (add-arg-assoc assoc-list image-tag name-start name-end value-start (- i 1))) (setq state 1) ) ) ) ;; after image tag ((7) ;; We do nothing after an image tag... just walk to the ;; end of the string. ) ) ) ;; Return the association list produced assoc-list ) ) ;; The following function retrieves the value of the argument whose ;; name is given from the association list given or nil if it cannot ;; be found. (defun argument-value (arg-name assoc-list) (let ((val (assoc arg-name assoc-list :test #'string-equal))) (if (null val) nil (cdr val)))) ;; The following function returns true if the given data point has an ;; image tag whose SRC argument is a string which starts with ;; "http://" and false otherwise. (defun feature-external-source (image) (if (= (length "http://") (mismatch (argument-value "SRC" (image-tag-to-assoc-list (cdr (assoc :IMG image)))) "http://")) :TRUE :FALSE)) ;; The following function returns true if the given data point is an ;; image which is no wider than 30 and no taller than 30 and false ;; otherwise. (defun feature-small (image) (let* ((assoc-list (image-tag-to-assoc-list (cdr (assoc :IMG image)))) (width (argument-value "WIDTH" assoc-list)) (height (argument-value "HEIGHT" assoc-list))) (when (and (not (numberp width)) (stringp width)) (setf width (parse-integer width :junk-allowed T))) (when (and (not (numberp height)) (stringp height)) (setf height (parse-integer height :junk-allowed T))) (if (and (not (null width)) (not (null height)) (numberp width) (numberp height) (<= width 30) (<= height 30)) :TRUE :FALSE))) ;;;; From Procrastination ;;; ;;; Name: tilde-extractor ;;; ;;; Desc: Do any of these URLs contain "~"? ;;; ;;; ;;; In: data-point ;;; Out: {TRUE, FALSE} ;;; (defun tilde-extractor (data-point) (cond ((search "~" (cdr (assoc :IMG data-point)) :test #'char-equal) :TRUE) ((search "~" (cdr (assoc :URL data-point)) :test #'char-equal) :TRUE) (t :FALSE))) ;;; ;;; Name: remove-whitespace ;;; ;;; Desc: Removes spaces and quotes ("). ;;; ;;; ;;; In: string ;;; Out: string with whitespace removed. ;;; (defun remove-whitespace (string1) ;; White spacing meaning " and space (remove #\Space (remove #\" string1))) ;;; ;;; Name: procras-align-extractor ;;; ;;; Desc: What is the alignment of the image? ;;; ;;; ;;; In: data-point ;;; Out: {:NO-SPEC, :BOTTOM, :TOP, :LEFT, :RIGHT, :MIDDLE, :OTHER} ;;; (defun procras-align-extractor (data-point) (let ((removed-DP (remove-whitespace (cdr (assoc :IMG data-point))))) (cond ((eq (search "ALIGN=" removed-DP :test #'char-equal) NIL) :NO-SPEC) ((search "ALIGN=BOTTOM" removed-DP :test #'char-equal) :BOTTOM) ((search "ALIGN=TOP" removed-DP :test #'char-equal) :TOP) ((search "ALIGN=LEFT" removed-DP :test #'char-equal) :LEFT) ((search "ALIGN=RIGHT" removed-DP :test #'char-equal) :RIGHT) ((search "ALIGN=MIDDLE" removed-DP :test #'char-equal) :MIDDLE) (t :OTHER)))) ;;; ;;; Name: parse-html-value ;;; ;;; Desc: Parses source string as value from HTML tag. If it is enclosed in quotes, then stops at ;;; ending quote. If not, stops at first whitespace. ;;; ;;; In: source HTML value string. ;;; Out: Parsed string. (defun parse-html-value (s) ;; Initialize local variables (let ((stoken (make-string 0)) (i 0) (quote-count 0)) ;; Loop through the source string. (loop ;; Have we reached the end of the source string s? (unless (< i (length s)) (return stoken)) ;; No! Switch on the current character in s. (cond ;; Is this character a space and we have seen no quotes? ((and (char= (char s i) #\ ) (eq 0 quote-count)) ;; Yes! We are done. Return the string token. (return stoken)) ;; Is this character a quote? ((char= (char s i) #\") ;; Yes! Are we at the end of the string? (if (>= (+ i 1) (length s)) ;; Yes! We are done. Return the token. (return stoken) ;; No! Increment quote count. (if (eq quote-count 0) (incf quote-count) (return stoken)) )) ;; Default case. Normal character. Add the character to the token. (t (setf stoken (concatenate-strings stoken (char s i))))) ;; Increment the source string position index. (incf i)))) ;;; ;;; Name: extract-tag-value ;;; ;;; Desc: Extracts a value corresponding ;;; to a specified HTML tag from a source HTML string. ;;; ;;; ;;; In: source - source string of HTML code ;;; sequence - string of HTML tag. ;;; Note: Must specify "=" in the TAG (e.g. "ALT="). ;;; Out: string with parsed value. ;;; (defun extract-tag-value (source sequence) (let ((firstpos (search sequence source :test #'char-equal))) (if (eq firstpos NIL) NIL (parse-html-value (subseq source (+ (length sequence) firstpos)))))) ;;; ;;; Name: click-extractor ;;; ;;; Desc: Is there a string containing "click" in the ALT text for the image? ;;; ;;; ;;; In: data-point ;;; Out: {TRUE, FALSE} ;;; (defun click-extractor (data-point) (cond ((search "click" (extract-tag-value (cdr (assoc :IMG data-point)) "ALT=") :test #'char-equal) :TRUE) (t :FALSE))) ;;;; From Random Goth Generators (defun absolute-func (data-point) (if (search "http" (cdr (assoc :IMG data-point))) :YES :NO) ) ;;;; From ry-dogg ;; This parses the extension from the url and returns the extension ;; as a lower case string. ;; It assumes that the url starts with "http://". Also, that ;; the extension is the characters after "http://", before the next "/", ;; and after the first "." which precedes that "/". (defun parse-ext-from-url (url) (let ((ext nil) (trim-url nil) (index 0) (char-at-index nil)) ;; remove the "http://" and lower case the string (setf trim-url (string-left-trim " http://" (string-downcase url))) (setf index (search "/" trim-url)) (when (null index) (setf index (length trim-url))) (decf index) (setf char-at-index (char trim-url index)) ;; gather the characters before the "/" until you get a "." (while (and (not (char= #\. char-at-index)) (>= index 0)) (setf ext (cons (char trim-url index) ext)) (decf index) (when (>= index 0) (setf char-at-index (char trim-url index)))) (make-array (length ext) :element-type 'character :initial-contents ext))) ;; Categorize the url's as COM, EDU, ORG or OTHER. (defun url-extractor (data-point) (let ((ranges '("com" "edu" "org")) (values '(:COM :EDU :ORG :OTHER))) (extract-from-list (parse-ext-from-url (cdr (assoc :URL data-point))) ranges values #'(lambda (x y) (whole-string= x y))))) ;;;; From Tsunami ;; Helper Function -- get-substring ;; copy the original string to the new string from the start point to ;; the stop character ;; input -- the index of start point, the stop character, the original string, ;; an empty new string ;; output -- a string (defun get-substring (start-index stop-char input-str new-str) (cond ((char= stop-char (aref input-str start-index)) new-str) ((>= (+ 1 start-index)(length input-str)) (concatenate 'string new-str (string (aref input-str start-index)))) (T (get-substring (+ 1 start-index) stop-char input-str (concatenate 'string new-str (string (aref input-str start-index))))))) ;; Helper Function -- extract-string ;; search the key-string in the IMG tag, use get-substring function to get the ;; immediate substring followed the key-string (excluding the equal sign) ;; The function handles the whitespaces around "=" ;; REMEMBER: use uppercase character for key-string ;; input -- datapoint, key-string (uppercase) for search, stop charactor ;; output -- a string (defun extract-string (data-point key-string stop-char) (setf INstring (cdr (assoc :IMG data-point))) (setf STindex (+ (length key-string) (search key-string (string-upcase INstring)))) (loop until (eq #\= (aref INstring STindex)) ;;handle the whitespaces before "=" do (setf STindex (+ 1 STindex))) (setf STindex (+ 1 STindex)) ;;pass "=" (loop while (eq #\space (aref INstring STindex)) ;;handle the whitespaces after "=" do (setf STindex (+ 1 STindex))) (get-substring STindex stop-char INstring "")) ;; Helper Function -- extract-number ;; use extract-string function to get width and height ;; and converse a string to a number ;; default stop character is whitespace ;; REMEMBER: use uppercase character for key-string ;; input -- data-point, key-string (uppercase) for search ;; output -- a number (defun extract-number (data-point key-string) (parse-integer (string-trim '(#\") (extract-string data-point key-string #\space)) :junk-allowed T)) ;; Helper Function -- img-width ;; Extract image width. ;; If no width data, return 0 ;; input -- data-point ;; output -- a number (defun img-width (data-point) (if (null (search "WIDTH" (string-upcase (cdr (assoc :IMG data-point))))) 0 (let ((num (extract-number data-point "WIDTH"))) (if (numberp num) num 0)))) ;; Helper Function -- img-height ;; Extract image height. ;; If no width data, return 0 ;; input -- data-point ;; output -- a number (defun img-height (data-point) (if (null (search "HEIGHT" (string-upcase (cdr (assoc :IMG data-point))))) 0 (let ((num (extract-number data-point "HEIGHT"))) (if (numberp num) num 0)))) ;; Helper Function -- img-size ;; Calculate the size by multipling width and height. ;; If no width or height data, return 0 ;; input -- data-point ;; output -- a number (defun img-size (data-point) (* (img-width data-point) (img-height data-point))) ;; Feature function -- size-extractor ;; Categorize the size of image as unknow, small, medium and large (defun tsu-size-extractor (data-point) (let ((ranges '(0 500 5000 10000)) (values '(:UNKOWN :TINY :SMALL :MEDIUM :LARGE))) (extract-from-list (img-size data-point) ranges values #'(lambda (x y) (<= x y))))) ;; Feature function -- width-extractor ;; Categorize the width of image as unknow, small, medium and large (defun width-extractor (data-point) (let ((ranges '(0 100 500)) (values '(:UNKOWN :SMALL :MEDIUM :LARGE))) (extract-from-list (img-width data-point) ranges values #'(lambda (x y) (<= x y))))) ;; Feature function -- height-extractor ;; Categorize the height of image as unknow, small, medium and large (defun height-extractor (data-point) (let ((ranges '(0 100 500)) (values '(:UNKOWN :SMALL :MEDIUM :LARGE))) (extract-from-list (img-height data-point) ranges values #'(lambda (x y) (<= x y)))))