AAWS class definition

Jump to: navigation, search

This is the code for a PHP class that queries the Amazon Product Advertising API. To use it, you must have an Web Services account and plug in your own public and private keys. This code is not guaranteed to work, to work well, or to comply with your Web Services contract. Use it at your own risk as a starting point.

See AAWS signed URL function for an example focused on query signing.

This PHP class is not meant to be instantiated. Use the class variables and methods to query AAWS. The simplest usage is only two commands:

$query = array( 'Operation'     =>'ItemLookup', 
                'ResponseGroup' =>'Small,Images',
                'IdType'        =>'ASIN',
                'ItemId'        =>'0060558121' );
$xml_obj = AAWS::get_xml_obj($query);

The main method "get_xml_obj" returns a simple_xml object, but can easily be modified to return raw XML. Query parameters that are used with every query are automatically added. The method sleeps for 1 second between queries per the AAWS contract.

  • Query-related methods:
    • get_xml_obj($query, $cacheperiod, $keyprefix)
    • query_aws($query)
    • urlString($query)
    • hostnameForLocale($locale)
  • Cache-related methods
    • retrieve_cache_key($key)
    • replace_cache_key($key, $data, $cacheperiod)
    • these methods use DBClass. If you don't need to cache the data, just delete the code that calls these methods.
  • Browsenode methods as an example
    • lookup_browsenode($browsenodeID)
    • lookup_browsenode_batch($browsenodeID_array)

Code

<?php
class AAWS {
    /* --------- CLASS CONSTANTS --------- */
    const PUBLIC_KEY = 'my_public_key'; 
    const PRIVATE_KEY = 'my_private_key';
    const AWS_VERSION = 'Version=2009-10-01'; //note that this is ready to insert into a URL
 
    /* --------- CLASS VARIABLES --------- */
    public static $associate_id = 'kennethlucius-20';
 
    public static $callcount = 0; //an int incremented with every AAWS query
    public static $queryinterval = 1; //number of seconds to sleep between queries
    public static $nextquerytime = 0; //unix time when another query is allowed
 
    public static $retrieved; /*    array containing retrieval info
                                    if retrieved from cache:
                                    array(  'error'     =>'an error string',
                                            'cache'     =>'not cached' or 'the cache tablename'
                                            'cachekey'  =>'if retrieved from cache, the cachekey'
                                    if retrieved from AAWS:
                                    array(  'error'     =>'an error string',
                                            'contents'  =>'string retrieved from AAWS. should be in XML format'
                                            'curl_info' =>'curl_getinfo results'
                            */  
 
    public static $dbtable = 'aq_xx'; //table name of query cache. different for each locale
 
    public static $found_keys; //an array of keys that exist in cache. Used to determine whether to INSERT or UPDATE 
 
 
    /* --------- QUERY FUNCTIONS --------- */
 
    /* function get_xml_obj() 
        this is the main function. retrieves data from AWS and returns a simplexml object
        arguments: $query
                        an array keyed by fieldname like this:
                        (   'Operation'     =>'ItemLookup', 
                            'ResponseGroup' =>'Small,Images',
                            'IdType'        =>'ASIN',
                            'ItemId'        =>'123456789' 
                        );
                    $cacheperiod
                        integer. number of seconds to cache the xmlobject
                        defaults to 15 minutes
                    $keyprefix
                        a string prefix for the cache key
 
        returns: a simplexml object
    */
 
    public static function get_xml_obj($query, $cacheperiod=900, $keyprefix='') { 
        //first, sort the query array by key so the order
        //will not break the cache-lookup method
        ksort($query);
 
        //look in the cache if $cacheperiod is nonzero
        if($cacheperiod=intval($cacheperiod)) {
            //if prefix not supplied, set it to the operation
            if(! $keyprefix) {
                $keyprefix = ($query['Operation'] ? $query['Operation'] : 'NoOperation');
            }
            //generate a unique key based on $query
            $key =  $keyprefix .':'. md5(implode(':', $query) );
            //look for it in the cache. if found, return simplexml object
            if( $data = self::retrieve_cache_key($key) ) {
                $ret = new SimpleXMLElement($data);
                return $ret;
            }
        }
 
        //not cached, so do the query
        if(! ($str=self::query_aws($query)) ) {
            //the query function returned false or nothing
            //set an error in $retrieved and return false
            self::$retrieved['error'] = 'query_aws returned false';
            return false;
        }
 
        //convert the received xml to a simplexml object
        //return error if not successful
        $obj = simplexml_load_string($str);
        if(! $obj) {
            self::$retrieved['error'] = 'XML parse error';
            return false;
        }
 
        //cache the xml if $cacheperiod is nonzero
        if($cacheperiod) {
            self::$retrieved['cache'] = self::$dbtable;
            self::$retrieved['cachekey'] = $key;
            self::replace_cache_key($key, $obj->asXML(), $cacheperiod);
        }
        else {
            self::$retrieved['cache'] = 'not cached';
        }
 
        return $obj;
    }
 
 
 
    /* function query_aws() 
        this private function retrieves and returns data from AWS
        arguments: $query
                        an array keyed by fieldname. see notes on get_xml_obj()
        returns: a string containing the response
    */
 
    private static function query_aws($query) {
        //get a properly formatted and signed URL string
        $url = self::urlString($query);
        if(! url) return false;
 
        //prepare a CURL object
        $ch = curl_init($url );
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_TIMEOUT, 7);
 
        //try up to 3 times to get a response
        for($x=1; $x<3; $x++) {
            //sleep for $queryinterval seconds if $nextquerytime has not been reached yet
            if(self::$nextquerytime > time() ) {
                sleep(self::$queryinterval);
            }
            //do the call and increment $callcount
            $contents = curl_exec($ch);
            self::$callcount++;
            //get curl info and check response code
            $info = curl_getinfo($ch);
            //response 200 is all good
            if($info['http_code']==200) {
                break;
            }
            //response 503 probably means too many queries too fast
            elseif($info['http_code'] == 503) {
                self::$nextquerytime = time()+self::$queryinterval;
            }
        } 
        //set $retrieved elements
        self::$retrieved = array('contents'=>$contents, 'curl_info'=>$info );
        curl_close($ch);
        //return false if there was a retrieval error
        if($info['http_code'] != 200) {
            return false;
        }
        //set $nextquerytime per AAWS agreement
        self::$nextquerytime = time()+self::$queryinterval;
        //return the raw XML
        return $contents;
    }
 
 
 
 
    /* function urlString() 
        this public function generates a valid, signed URL based on supplied query
        arguments: $query
                        an array keyed by fieldname. see notes on get_xml_obj()
        returns: a string containing the URL
    */
 
    public static function urlString($query) {
        //sanity check
        if(! $query) return '';
        //get the host name for the locale we are using
        $server = self::hostnameForLocale();
 
        //a few other params
        $uri = '/onca/xml'; //used in $sig and $url
        $method = 'GET'; //used in $sig
 
        //convert query to an array of URL parameter=value
        $qa = array();
        foreach($query as $key=>$val) {
            $qa[$key] = rawurlencode($key) . '=' . rawurlencode($val);
        }
 
        //add the parameters that are needed for every query
        $qa['AssociateTag'] = 'AssociateTag='.self::$associate_id;
        $qa['AWSAccessKeyId'] = 'AWSAccessKeyId='.rawurlencode(self::PUBLIC_KEY);
        $qa['Service'] = 'Service=AWSECommerceService';
        $qa['Timestamp'] = 'Timestamp='.rawurlencode(gmdate('Y-m-d\TH:i:s\Z'));
        $qa['Version'] = self::AWS_VERSION;
 
        //sort the array before generating the signature
        ksort($qa);
 
        //implode and generate signature
        $qs = implode('&',$qa); 
        $sig = base64_encode(hash_hmac('sha256', "{$method}\n{$server}\n{$uri}\n{$qs}", self::PRIVATE_KEY, true));
 
        //generate the URL with signature added on;
        $url = "http://{$server}{$uri}?{$qs}&Signature=" . rawurlencode($sig);
        return $url;
    }
 
 
    /* function hostnameForLocale() 
        return server name for specified locale
        arguments: $locale
                        an valid locale string like "us"
        returns: a string containing the server name
 
        Note that the constants used in this function are defined elsewhere
    */
    public static function hostnameForLocale($locale=CURRENT_LOCALE) {
        switch ($locale) {
            case AAWS_LOCALE_US:
                return 'ecs.amazonaws.com'; break;
            case AAWS_LOCALE_UK:
                return 'ecs.amazonaws.co.uk'; break;
            case AAWS_LOCALE_CANADA:
                return 'ecs.amazonaws.ca'; break;
            case AAWS_LOCALE_FRANCE:
                return 'ecs.amazonaws.fr'; break;
            case AAWS_LOCALE_GERMANY:
                return 'ecs.amazonaws.de'; break;
            case AAWS_LOCALE_JAPAN:
                return 'ecs.amazonaws.jp'; break;
            default:
                return 'ecs.amazonaws.com'; break;
        }
    }
 
 
 
 
 
    /* --------- CACHE FUNCTIONS --------- */
 
    /* function retrieve_cache_key() 
        return a lookup key for supplied text. The key is a 16-byte binary string
    */
    public static function cache_key_for_text($text) { 
        return md5($text, true);
    }
 
    /* function retrieve_cache_key() 
        search cache for specified key
        arguments: $key
                        a string that is unique to the data
        returns: a string containing the data
 
        Notes:  DBClass (used to access the database) is defined elsewhere
                $dbtable must be set according to locale by the calling script 
    */
 
    public static function retrieve_cache_key($key) { 
        $db = DBClass::get_object();
        $res = $db->query( 'SELECT `data`, `expires` FROM `'.self::$cachetable
                .'` WHERE id="'.$db->escape_string($key).'" LIMIT 1');
        //examine the result. ensure it is not expired
        if($res && mysql_num_rows($res) > 0) {
            self::$found_keys[$key] = true; //neet to UPDATE this key, not INSERT
            $row = mysql_fetch_assoc($res);
            if($row['expires'] > gmdate('YmdHis')) {
                $ret = $row['data'];
            }
            else {
                $ret = '';
            }
        }
        else {
            self::$found_keys[$key] = false; //neet to INSERT this key, not UPDATE
            $ret = '';
        }
        $db->free_result();
        return $ret;
    }
 
    /* function replace_cache_key() 
        insert/replace data in the cache
        arguments: $key
                        a string that is unique to the data
                    $data
                        a string of data (XML, hopefully)
                    $cacheperiod
                        an integer. number of seconds until data expires
    */
    public static function replace_cache_key($key, $data, $cacheperiod=900) { //15 minutes
        $db = DBClass::get_object();
        $k = $db->escape_set('id', $key);
        $v = $db->escape_set('expires', gmdate('YmdHis', $cacheperiod+time()) ) . ','
                        . $db->escape_set('data', $data );
        if(! isset(self::$found_keys[$key]) ) { //not known whether key exists
            $sql = 'INSERT INTO `'.self::$cachetable.'` SET '.$k.','.$v
                    . ' ON DUPLICATE KEY UPDATE '.$v;
        }
        elseif(self::$found_keys[$key]===true) {
            $sql = 'UPDATE `'.self::$cachetable.'` SET '.$v.' WHERE '.$k;
        }
        else {
            $sql = 'INSERT DELAYED INTO `'.self::$cachetable.'` SET '.$k.','.$v;
        }
        $res = $db->query($sql);
    }
 
    /* --------- TO DO AFTER PAGE OUTPUT --------- */
    //purge the cache of old data
    public static function post_output(){
        $olderthan = gmdate('YmdHis', time()-(86400*7) ); //7 days
        $db = DBClass::get_object();
        $olderthan = $olderthan - (86400 * 300); //300 days old
        $db->query('DELETE FROM `'.self::$dbtable
                        .'` WHERE `lastupdate`<'.$olderthan);
    }
 
 
 
 
    /* --------- CONVENIENCE FUNCTIONS --------- */
 
    /* function lookup_browsenode() 
        lookup a single browsenode
        arguments: $bn
                        a browsenode ID
        returns: a simplexml object
    */
    public static function lookup_browsenode($bn=0) {
        if($bn < 1) return false;
        $q = array( 'Operation'     =>'BrowseNodeLookup', 
                    'BrowseNodeId'  =>$bn, 
                    'ResponseGroup' =>'BrowseNodeInfo');
        //browsenodes don't change very often, so cache it for 24 hours
        $xml = self::get_xml_obj($q, 86400); 
        return $xml;
    }
 
    /* function lookup_browsenode_batch() 
        lookup a bunch of browsenodes
        arguments: $bnarr
                        an array of browsenode IDs
                    $data
                        a string of data (XML, hopefully)
                    $cacheperiod
                        an integer. number of seconds until data expires
        returns: an array of simplexml objects containing 10 browsenodes each
    */
    public static function lookup_browsenode_batch($bnarr) {
        //sanity check
        if(! is_array($bnarr) ) return false;
        //break the array into chunks of 10 browsenode IDs each
        $chunk = array_chunk($bnarr, 10);
        //prepare an empty array to return
        $ret = array();
        /*  for each 10 browsenodes, do a single query using batch query notation:
                BrowseNodeId.1=1234&BrowseNodeId.2=5678
        */
        foreach($chunk as $bns) {
            $q = array( 'Operation'     =>'BrowseNodeLookup', 
                        'ResponseGroup' =>'BrowseNodeInfo');
            foreach($bns as $idx=>$b) {
                $q['BrowseNodeId.'.($idx+1)] = $b;
            }
            $ret[] = self::get_xml_obj($q, 86400); //24 hour cache
        }
        return $ret;
    }
 
 
} //end of AAWS
 
//this line sets the cache table whenever this file is included
AAWS::$dbtable = 'AAWScache_'.CURRENT_LOCALE;
 
Personal tools