Web Images Videos Maps News Shopping Gmail more »
Recently Visited Groups | Help | Sign in
Google Groups Home
Group info
Recent pages and files
how-to-rewrite-urls-in-the-xslt    

There are times when you need to crawl URLs via a particular URL but then during serving you want the users to use a different URL when they click on the results. Here is an example of doing it for http URLs and one for SMB URLs (they are different). The code here is based on 5.0 XSLT so things may change between versions of the XSLT but should generally work.

?

Rewriting HTTP

?

1. Find the following section of the default stylesheet:

<!--
A single result (do not customize)
-->

?

2. In that section, find the following line:

<xsl:variable name="display_url_tmp" select="substring-after(UD, ':')"/>

?

3. Replace that line with the following. Make sure to add your own values for $rewrite_from and $rewrite_to.

<xsl:variable name="rewrite_from" select="'http://192.168.1.1/'"/>
<xsl:variable name="rewrite_to" select="'http://www.host.com/'"/>
<xsl:variable name="rewritten_url">
<xsl:choose>
<xsl:when test="(substring-before(U, $rewrite_from) = '' ) and contains(U, $rewrite_from)">
<xsl:value-of select="concat($rewrite_to, substring-after(U, $rewrite_from))"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="U"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="display_url_tmp" select="substring-after($rewritten_url, ':
')"/>

?

4. Look for the following line (about 70 lines down from above)

<xsl:otherwise>
<xsl:value-of disable-output-escaping='yes' select="U"/>
</xsl:otherwise>

?

5. Change it to the following.

<xsl:otherwise>
<xsl:value-of disable-output-escaping='yes' select="$rewritten_url"/>
</xsl:otherwise>

?

6. Save this new stylesheet and test it.

?

?

?

Rewriting SMB


Just replace this entire section "A single result". I can't remember each change I made. :-)



<!--
A single result (do not customize)

-->
<xsl:template match="R">
<xsl:param name="query"/>

<xsl:variable name="rewrite_from" select="'smb://www.google.com/'"/>
<xsl:variable name="rewrite_to" select="'smb://myserver/'"/>
<xsl:variable name="rewritten_url">
<xsl:choose>
<xsl:when test="(substring-before(U, $rewrite_from) = ''
) and contains(U,
$rewrite_from)">
<xsl:value-of select="concat($rewrite_to, substring-after(U,
$rewrite_from))"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="U"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="display_url_tmp"
select="substring-after($rewritten_url, ':')"/>

<xsl:variable name="display_url">
<xsl:choose>
<xsl:when test="$display_url_tmp">
<xsl:value-of select="$display_url_tmp"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="substring-after($display_url_tmp, ':
')"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="escaped_url" select="substring-after(UE, ':')"/>
<xsl:variable name="protocol" select="substring-before(U, ':
')"/>
<xsl:variable name="full_url" select="UE"/>
<xsl:variable name="crowded_url" select="HN/@U"/>
<xsl:variable name="crowded_display_url" select="HN"/>
<xsl:variable name="lower" select="'abcdefghijklmnopqrstuvwxyz'"/>
<xsl:variable name="upper" select="'ABCDEFGHIJKLMNOPQRSTUVWXYZ'"/>

<xsl:variable name="temp_url" select="substring-after($rewritten_url,
':')"/>
<xsl:variable name="url_indexed" select="not(starts-with($temp_url,
'noindex!/'))"/>

<xsl:variable name="stripped_url">
<xsl:choose>
<xsl:when test="$truncate_result_urls != '0'">
<xsl:call-template name="truncate_url">
<xsl:with-param name="t_url" select="U"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="U"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>

<!-- * Indent as required (only supports 2 levels) * -->
<xsl:if test="@L='2'">
<xsl:text disable-output-escaping="yes">&lt;blockquote
class=&quot;g&quot;&gt;</xsl:text>
</xsl:if>

<!-- * Result Header * -->
<p class="g">

<!-- * Result Title (including PDF tag and hyperlink) * -->
<xsl:if test="$show_res_title != '0'">
<font size="-2"><b>
<xsl:choose>
<xsl:when test="@MIME='text/html' or @MIME='' or
not(@MIME)"></xsl:when>
<xsl:when test="@MIME='text/plain'">[TEXT]</xsl:when>
<xsl:when test="@MIME='application/rtf'">[RTF]</xsl:when>
<xsl:when test="@MIME='application/pdf'">[PDF]</xsl:when>
<xsl:when test="@MIME='application/postscript'">[PS]</xsl:when>
<xsl:when test="@MIME='application/vnd.ms-powerpoint'">[MS
POWERPOINT]</xsl:when>
<xsl:when test="@MIME='application/vnd.ms-excel'">[MS
EXCEL]</xsl:when>
<xsl:when test="@MIME='application/msword'">[MS WORD]</xsl:when>
<xsl:otherwise>
<xsl:variable name="extension">
<xsl:call-template name="last_substring_after">
<xsl:with-param name="string" select="substring-after(

substring-after(U,':'),
'/')"/>
<xsl:with-param name="separator" select="'.'"/>
<xsl:with-param name="fallback" select="'UNKNOWN'"/>
</xsl:call-template>
</xsl:variable>
[<xsl:value-of select="translate($extension,$lower,$upper)"/>]
</xsl:otherwise>
</xsl:choose>
</b></font>
<xsl:text> </xsl:text>

<xsl:if test="$url_indexed">

<xsl:text disable-output-escaping='yes'>&lt;a href="</xsl:text>

<xsl:choose>
<xsl:when test="starts-with(U, $db_url_protocol)">
<xsl:value-of disable-output-escaping='yes'
select="concat('db/',
substring-after($stripped_url,':'))"/>
</xsl:when>
<!-- * URI for smb or NFS must be escaped because it appears in the URI
query
* -->
<xsl:when test="$protocol='nfs' or $protocol='smb'">
<xsl:value-of disable-output-escaping='yes'
select="concat($protocol,'/',
substring-after($stripped_url,':
'))"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of disable-output-escaping='yes'
select="$rewritten_url"/>
</xsl:otherwise>
</xsl:choose>
<xsl:text disable-output-escaping='yes'>"&gt;</xsl:text>
</xsl:if>
<span class="l">
<xsl:choose>
<xsl:when test="T">
<xsl:call-template name="reformat_keyword">
<xsl:with-param name="orig_string" select="T"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise><xsl:value-of
select="$stripped_url"/></xsl:otherwise>
</xsl:choose>
</span>
<xsl:if test="$url_indexed">
<xsl:text disable-output-escaping='yes'>&lt;/a&gt;</xsl:text>
</xsl:if>
</xsl:if>

<!-- * Snippet Box * -->
<table cellpadding="0" cellspacing="0" border="0">
<tr>
<td class="s">
<xsl:if test="$show_res_snippet != '0'">
<xsl:call-template name="reformat_keyword">
<xsl:with-param name="orig_string" select="S"/>
</xsl:call-template>
</xsl:if>

<!-- * Meta tags * -->
<xsl:if test="$show_meta_tags != '0'">
<xsl:apply-templates select="MT"/>
</xsl:if>

<!-- * URL * -->
<br/>

<font color="{$res_url_color}" size="{$res_url_size}">
<xsl:choose>
<xsl:when test="not($url_indexed)">
<xsl:if test="($show_res_size!='0') or
($show_res_date!='0') or
($show_res_cache!='0')">
<xsl:text>Not Indexed:</xsl:text>
<xsl:value-of
select="substring-after($rewritten_url,':')"/>
</xsl:if>
</xsl:when>
<xsl:otherwise>
<xsl:if test="$show_res_url != '0'">
<xsl:value-of
select="substring-after($rewritten_url,':
')"/>
</xsl:if>
</xsl:otherwise>
</xsl:choose>
</font>

<!-- * Miscellaneous (- size - date - cache) * -->

?

Version: 
Create a group - Google Groups - Google Home - Terms of Service - Privacy Policy
©2009 Google