Contents |
"Sitemaps are an easy way for webmasters to inform search engines about pages on their sites that are available for crawling. In its simplest form, a Sitemap is an XML file that lists URLs for a site along with additional metadata about each URL (when it was last updated, how often it usually changes, and how important it is, relative to other URLs in the site) so that search engines can more intelligently crawl the site."
(source: Sitemaps.org)
When you are using Symfony you are probably using it for larger dynamic projects so then you also want to automate the generation of your sitemaps.
This tutorial explains how to use a Symfony Helper to generate a sitemap for your application in a few simple steps:
Add Sitemap Helper to lib/helper/SitemapHelper.php:
// lib/helper/SitemapHelper.php /** * SITEMAP HELPER * * @license http://www.opensource.org/licenses/lgpl-license.php LGPL * @author Jasper Moelker <J.B.Moelker@activeIDs.nl> * @version */ use_helper('Date'); /** * CREATE XML NODE * * @param <string> $name * @param <string> $value * @return <string> */ function xmlNode($name, $value){ return '<'.$name.'>' . $value . '</'.$name.'>'; } /** * VALIDATE LOCATION URL * check if the given url is a valid absolute url. * * @param <string> $url * @return <boolean> */ function validLocationUrl($url) { if( !is_string($url) || substr($url,0,7)!='http://' ){ return false; } return true; } /** * SITEMAP URLSET NODE * * @link http://www.google.com/support/webmasters/bin/answer.py?answer=183668#2 Sitemap Extensions * * @param <array> $options - array of extension types. options: GeneralUrl, Images, Video, Mobile, CodeSearch, Geo, News * @return <string> XML Node */ function sitemapUrlsetNode($options = array()) { // EXTENSIONS: // types and related namespaces: $extensions = array( 'GeneralUrl' => 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"', 'Images' => 'xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"', 'Video' => 'xmlns:video="http://www.google.com/schemas/sitemap-video/1.1"', 'Mobile' => 'xmlns:mobile="http://www.google.com/schemas/sitemap-mobile/1.0"', 'CodeSearch' => 'xmlns:codesearch="http://www.google.com/codesearch/schemas/sitemap/1.0"', 'Geo' => 'xmlns:geo="http://www.google.com/geo/schemas/sitemap/1.0"', 'News' => 'xmlns:news="http://www.google.com/schemas/sitemap-news/0.9"', ); $extensionTypes = array_keys($extensions); // SET NAMESPACES: $namespaces = array($extensions['GeneralUrl']); foreach($options as $option){ if( in_array($option, $extensionTypes) ){ array_push($namespaces, $extensions[$option]); } } // RETURN URLSET NODE: $xml = '<urlset ' . implode($namespaces, ' ') . '>'; return $xml . "\n"; } /** * SITEMAP URL NODE * * @link http://www.google.com/support/webmasters/bin/topic.py?topic=8476 Sitemaps on Google Webmaster Tools * @link http://www.google.com/support/webmasters/bin/answer.py?hl=en&answer=34648 Mobile Sitemaps on Google Webmaster Tools * * @param <string> $loc - absolute url for the web page * @param <array> $options: * <datetime> datetime - example: time(), created_at, updated_at * <string> changefreq - options: always, hourly, daily, weekly, monthly, yearly, never * <type> priority - value between 0.0 en 1.0, for internal priority only. * @param <array> $childnodes - array of child nodes (eg. image, video) as strings to be inserted within the url node * @param <boolean> $mobile - default: false. set to true if mobile sitemap * * @return <string> XML Node */ function sitemapUrlNode($loc, $options = array('datetime' => null, 'changefreq' => null, 'priority' => null), $childNodes = array(), $mobile = false) { // LOCATION: // should be valid location url: if( !validLocationUrl($loc) ){ return ''; } $xml = xmlNode('loc',$loc); // LAST MODIFIED: // should be datetime format (how to check?) if( isset($options['datetime']) ){ $datetime = $options['datetime']; if($datetime){ $lastmod = format_date($datetime, 'yyyy-MM-ddTHH:mm:ss+01:00'); $xml .= xmlNode('lastmod',$lastmod); } } // CHANGE FREQUENCY: // should be always, hourly, daily, weekly, monthly, yearly or never if( isset($options['changefreq']) ){ $changefreq = $options['changefreq']; if( in_array($changefreq, array('always','hourly','daily','weekly','monthly','yearly','never')) ){ $xml .= xmlNode('changefreq',$changefreq); } } // PRIORITY: // should be value between 0.0 and 1.0 if ( isset($options['priority']) ) { $priority = $options['priority']; if( (is_float($priority) || is_integer($priority)) && $priority > 0.0 && $priority < 1.0 ){ $xml .= xmlNode('priority',$priority); } }; // CHILD NODES: // add all child nodes if(isset($childNodes) && is_array($childNodes) && sizeof($childNodes)){ $xml .= "\n"; foreach($childNodes as $childNode){ $xml .= $childNode . "\n"; } } // MOBILE if($mobile){ $xml .= '<mobile:mobile/>'; } // RETURN URL NODE: return xmlNode('url',$xml) . "\n"; } /** * SIMPLE SITEMAP URL NODE * * @param <string> $loc - absolute url for the web page * @param <datetime> $datetime - example: time(), created_at, updated_at * @param <string> $changefreq - options: always, hourly, daily, weekly, monthly, yearly, never * @param <type> $priority - value between 0.0 en 1.0, for internal priority only. * @return <string> XML Node */ function simpleSitemapUrlNode($loc, $datetime, $changefreq, $priority) { return sitemapUrlNode($loc, array('datetime' => $datetime, 'changefreq' => $changefreq, 'priority' => $priority)); } /** * SITEMAP GEO URL NODE * * @link http://www.google.com/support/webmasters/bin/answer.py?answer=94555 Geo Sitemap on Google Webmaster Tools * @link http://www.google.com/support/webmasters/bin/answer.py?answer=94556 Support Geo Formats * * @param <string> $loc - absolute url of the geo file / feed * @param <string> $format - shortname of geo format. Options: kml, kmz, georss * @return <string> XML Node */ function sitemapGeoUrlNode($loc, $format) { // LOCATION: // should be valid location url: if( !validLocationUrl($loc) ){ return ''; } $xml = xmlNode('loc',$loc); // FORMAT: // shortname should be kml, kmz or georss if( !in_array($format, array('kml','kmz','georss')) ){ return ''; } $xml .= xmlNode('geo:geo', xmlNode('geo:format',$format)); // RETURN GEO URL NODE: return xmlNode('url',$xml) . "\n"; } /** * SITEMAP IMAGE NODE * * @link http://www.google.com/support/webmasters/bin/answer.py?answer=178636 Sitemap Image on Google Webmaster Tools * * @param <string> $loc - absolute url of the image * @param <array> $options: * <string> caption * <string> geo_location - Example 1: Amsterdam, The Netherlands. Example 2 (latlong): 52.3730556, 4.8922222 * <string> title * <string> license - absolute url of a license file * * @return <string> XML Node */ function sitemapImageNode($loc, $options = array('caption' => null, 'geo_location' => null, 'title' => null, 'license' => null)) { // LOCATION: // should be valid location url: if( !validLocationUrl($loc) ){ return ''; } $xml = xmlNode('loc',$loc); // CAPTION: $caption = $options['caption']; if($caption && is_string($caption)){ $xml .= xmlNode('image:caption',$caption); } // GEO LOCATION: $geo_location = $options['geo_location']; if($geo_location && is_string($geo_location)){ $xml .= xmlNode('image:geo_location',$geo_location); } // TITLE: $title = $options['title']; if($title && is_string($title)){ $xml .= xmlNode('image:title',$title); } // LICENSE: $license = $options['license']; if( is_string($license) || substr($license,0,7)=='http://' ){ $xml .= xmlNode('image:license',$license); } // RETURN IMAGE NODE: return xmlNode('image:image',$xml); } /** * SITEMAP NEWS NODE * * @link http://www.google.com/support/news_pub/bin/answer.py?hl=en&answer=74288 News Sitemaps on Google Webmaster Tools * * @param <string> $name - * @param <string> $language - examples: en, fr, nl * @param <array> $options - options: access, genres, publication_date, title, keywords, stock_tickers * @return <string> */ function sitemapNewsNode($name, $language, $options = array('access' => null, 'genres' => null, 'pubdate' => null, 'title' => null, 'keywords' => null, 'stock_tickers' => null)) { // PUBLICATION // valid news node needs at least a name & language if(!isset($name) || !is_string($name) || !isset($language) || !is_string($language)){ return ''; } $publication = xmlNode('news:name',$name); $publication .= xmlNode('news:language',$language); $xml = xmlNode('news:publication',$publication); // ACCESS: // should be Subscription, Registration or empty if( isset($options['access'])){ $access = $options['access']; if (in_array($access,array('Subscription','Registration'))){ $xml .= xmlNode('news:access',$access); } } // GENRES: if( isset($genres)){ $genres = $options['genres']; if(is_string($genres)){ $xml .= xmlNode('news:genres',$genres); } } // PUBLICATION DATE: // @todo: add check if( isset($options['pubdate']) ){ $pubdate = $options['pubdate']; $xml .= xmlNode('news:publication_date',$pubdate); } // TITLE: if( isset($options['title']) ){ $title = $options['title']; if( is_string($title) ){ $xml .= xmlNode('news:title',$title); } } // KEYWORDS: if( isset($options['keywords']) ){ $keywords = $options['keywords']; if( is_string($keywords) ){ $xml .= xmlNode('news:keywords',$keywords); } } // STOCK TICKERS: // @todo: add check if( isset($options['stock_tickers']) ){ $stock_tickers = $options['stock_tickers']; $xml .= xmlNode('news:stock_tickers',$stock_tickers); } // RETURN NEWS NODE: return xmlNode('news:news',$xml) . "\n"; }
This SitemapHelper adds functions to create UrlSets and different types of Nodes. Information about each node is provided inline under the @link.
Now we can use the SitemapHelper to generate a sitemap, for example for our Project module:
# /config/doctrine/schema.yml
Project:
columns:
id:
type: integer(4)
primary: true
autoincrement: true
name:
type: string(255)
notblank: true
location_id:
type: integer(4)
notblank: true
relations:
Location:
class: Location
local: location_id
foreign: id
onDelete: CASCADE
foreignAlias: Projects
Location:
columns:
id:
type: integer(4)
primary: true
autoincrement: true
address:
type: string(255)
notblank: true
latitude:
type: float(20,17)
notblank: true
longitude:
type: float(20,17)
notblank: trueFirst we need to get the projects via a query in the actions class:
// app/modules/project/actions/actions.class.php class projectActions extends sfActions { public function executeSitemap(sfWebRequest $request) { $this->projects = Doctrine_Query::create() ->select('p.id') ->from('Project p') ->fetchArray(); $this->setLayout(false); } }
And we need a route for our sitemap:
# /app/config/routing.yml
projectsitemap:
url: /project/sitemap
param: { module: project, action: sitemap, sf_format: atom }
requirements:
sf_format: atomNow we can create the sitemap in the sitemap template. First include the SitemapHelper Then use the sitemapUrlsetNode function to define the namespaces you'll be using for the Url Nodes. In this case we use sitemapUrlNode for each project page and as the projects have a Geo RSS we add a sitemapGeoUrlNode for all of those too. In the Urlset I also defined the Images namespace, so adding the url's for all the images is still on the to do. Also note that the sitemap requires absolute url's and therefore the host is defined at the top of the template.
// app/modules/project/templates/sitemapSuccess.atom.php <?php use_helper('Sitemap'); ?> <?php $host = 'http://' . $sf_request->getHost(); ?> <?xml version="1.0" encoding="UTF-8"?> <?php echo sitemapUrlsetNode(array('Images','Geo')); ?> <?php // INDEX: echo sitemapUrlNode($host . url_for('project/index'), array( 'datetime' => false, 'changefreq' => 'monthly', 'priority' => 1.0 )); // PROJECTS: foreach($projects as $project){ // project page (next to map): echo sitemapUrlNode($host . url_for('project/show?id=' . $project['id']), array( 'datetime' => false, 'changefreq' => 'yearly', 'priority' => 0.7 )); // project geo rss feed: echo sitemapGeoUrlNode($host . url_for('project/feed?id=' . $project['id']), 'georss'); } ?> </urlset>
That's it. You can now retrieve your application's sitemap via http://www.yoursite.com/project/sitemap .
Once you're done you can submit your sitemap to all large search engines like Google, Bing, Yahoo and Ask. For instance for Google you can use Google Webmaster Tools. They have their own how to submit your sitemap page.
In addition you can of course also put your sitemap feed as link in your website's html head:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="sitemap"
href="<?php echo url_for('@projectsitemap'); ?>"
type="application/atom+xml"
title="My first Sitemap"
/>