summaryrefslogtreecommitdiff
path: root/kolab.org/www/drupal-7.18/sites/all/modules/linkchecker/linkchecker.module
diff options
context:
space:
mode:
Diffstat (limited to 'kolab.org/www/drupal-7.18/sites/all/modules/linkchecker/linkchecker.module')
-rw-r--r--kolab.org/www/drupal-7.18/sites/all/modules/linkchecker/linkchecker.module1309
1 files changed, 966 insertions, 343 deletions
diff --git a/kolab.org/www/drupal-7.18/sites/all/modules/linkchecker/linkchecker.module b/kolab.org/www/drupal-7.18/sites/all/modules/linkchecker/linkchecker.module
index 9c13058..bc1abf9 100644
--- a/kolab.org/www/drupal-7.18/sites/all/modules/linkchecker/linkchecker.module
+++ b/kolab.org/www/drupal-7.18/sites/all/modules/linkchecker/linkchecker.module
@@ -109,7 +109,7 @@ function linkchecker_menu() {
);
// Add the user menu item after node/edit tab.
$items['user/%user/linkchecker'] = array(
- 'access callback' => '_linkchecker_user_access_own_broken_links_report',
+ 'access callback' => '_linkchecker_user_access_account_broken_links_report',
'access arguments' => array(1),
'description' => 'Shows a list of broken links in content.',
'file' => 'linkchecker.pages.inc',
@@ -133,8 +133,8 @@ function linkchecker_menu() {
}
/**
-* Implements hook_admin_paths().
-*/
+ * Implements hook_admin_paths().
+ */
function linkchecker_admin_paths() {
$paths = array(
// This is marked as an administrative path so that if it is visited from
@@ -149,128 +149,397 @@ function linkchecker_admin_paths() {
/**
* Access callback for user/%user/linkchecker.
*/
-function _linkchecker_user_access_own_broken_links_report($account) {
+function _linkchecker_user_access_account_broken_links_report($account) {
global $user;
- // Access to this path is only granted for authenticated users viewing their
- // own broken links and all administrative users.
- return $account->uid && ($user->uid == $account->uid || (user_access('administer nodes') && user_access('administer linkchecker'))) && user_access('access own broken links report');
+ // Users with 'access own broken links report' permission can only view their
+ // own report. Users with the 'access broken links report' permission can
+ // view the report for any authenticated user.
+ return $account->uid && (($user->uid == $account->uid && user_access('access own broken links report')) || user_access('access broken links report'));
}
/**
* Access callback for linkchecker/%linkchecker_link/edit.
*/
function _linkchecker_user_access_edit_link_settings($link) {
- global $user;
+ return user_access('edit link settings') && _linkchecker_link_access($link);
+}
+
+/**
+ * Determines if the current user has access to view a link.
+ *
+ * Link URLs can contain private information (for example, usernames and
+ * passwords). So this module should only display links to a user if the link
+ * already appears in at least one place on the site where the user would
+ * otherwise have access to see it.
+ */
+function _linkchecker_link_access($link) {
+ $link = (object) $link;
+ return _linkchecker_link_node_ids($link) || _linkchecker_link_comment_ids($link) || _linkchecker_link_block_ids($link);
+}
- if (user_access('administer nodes') && user_access('administer linkchecker')) {
- // Full access to this path is granted to administrative users.
- return TRUE;
+/**
+ * Returns IDs of nodes that contain a link which the current user may be allowed to view.
+ *
+ * Important note: For performance reasons, this function is not always
+ * guaranteed to return the exact list of node IDs that the current user is
+ * allowed to view. It will, however, always return an empty array if the user
+ * does not have access to view *any* such nodes, thereby meeting the security
+ * goals of _linkchecker_link_access() and other places that call it.
+ *
+ * In the case where a user has access to some of the nodes that contain the
+ * link, this function may return some node IDs that the user does not have
+ * access to. Therefore, use caution with its results.
+ *
+ * @param object $link
+ * An object representing the link to check.
+ * @param object $node_author_account
+ * (optional) If a user account object is provided, the returned nodes will
+ * additionally be restricted to only those owned by this account. Otherwise,
+ * nodes owned by any user account may be returned.
+ *
+ * @return array
+ * An array of node IDs that contain the provided link and that the current
+ * user may be allowed to view.
+ */
+function _linkchecker_link_node_ids($link, $node_author_account = NULL) {
+ static $fields_with_node_links = array();
+
+ // Exit if all node types are disabled or if the user cannot access content,
+ // there is no need to check further.
+ $linkchecker_scan_nodetypes = array_filter(variable_get('linkchecker_scan_nodetypes', array()));
+ if (empty($linkchecker_scan_nodetypes) || !user_access('access content')) {
+ return array();
}
- else {
- // Verify that $lid is at least in one of the authors nodes or comments.
- $subquery2 = db_select('node', 'n');
- $subquery2->innerJoin('node_revision', 'r', 'r.vid = n.vid');
- $subquery2->innerJoin('linkchecker_node', 'ln', 'ln.nid = n.nid');
- $subquery2->innerJoin('linkchecker_link', 'll', 'll.lid = ln.lid AND ll.lid = :lid', array(':lid' => $link->lid));
- $subquery2->condition(db_or()
- ->condition('n.uid', $user->uid)
- ->condition('r.uid', $user->uid)
+
+ // Get a list of nodes containing the link, using addTag('node_access') to
+ // allow node access modules to exclude nodes that the current user does not
+ // have access to view.
+ if (!empty($node_author_account)) {
+ $query = db_select('node', 'n');
+ $query->addTag('node_access');
+ $query->innerJoin('linkchecker_node', 'ln', 'ln.nid = n.nid');
+ $query->innerJoin('node_revision', 'r', 'r.vid = n.vid');
+ $query->condition('ln.lid', $link->lid);
+ $query->condition(db_or()
+ ->condition('n.uid', $node_author_account->uid)
+ ->condition('r.uid', $node_author_account->uid)
);
- $subquery2->distinct();
- $subquery2->fields('ll' , array('lid'));
-
- if (variable_get('linkchecker_scan_comments', 0)) {
- // Build query for broken links in nodes and comments of the current user.
- $subquery3 = db_select('comment', 'c');
- $subquery3->innerJoin('linkchecker_comment', 'lc', 'lc.cid = c.cid');
- $subquery3->innerJoin('linkchecker_link', 'll', 'll.lid = lc.lid AND ll.lid = :lid', array(':lid' => $link->lid));
- $subquery3->condition('c.uid', $user->uid);
- $subquery3->distinct();
- $subquery3->fields('ll' , array('lid'));
-
- // UNION the linkchecker_node and linkchecker_comment tables.
- $subquery1 = db_select($subquery2->union($subquery3), 'q1')->fields('q1', array('lid'));
+ $query->fields('n', array('nid'));
+ }
+ else {
+ $query = db_select('node', 'n');
+ $query->addTag('node_access');
+ $query->innerJoin('linkchecker_node', 'ln', 'ln.nid = n.nid');
+ $query->condition('ln.lid', $link->lid);
+ $query->fields('n', array('nid'));
+ }
+ $nodes = $query->execute();
+
+ // Check if the current user has access to view the link in each node.
+ // However, for performance reasons, as soon as we find one node where that
+ // is the case, stop checking and return the remainder of the list.
+ $nids = array();
+ $access_allowed = FALSE;
+ foreach ($nodes as $node) {
+ if ($access_allowed) {
+ $nids[] = $node->nid;
+ continue;
}
- else {
- // Build query for broken links in nodes of the current user.
- $subquery1 = db_select($subquery2, 'q1')->fields('q1', array('lid'));
+ $node = node_load($node->nid);
+
+ // We must check whether the link is currently part of the node; if not, we
+ // do not want to return it (and it is not safe to, since we cannot know if
+ // it contained access restrictions for the current user at the point which
+ // it was originally extracted by the Link checker module).
+ if (!isset($fields_with_node_links[$node->nid])) {
+ $fields_with_node_links[$node->nid] = _linkchecker_extract_node_links($node, TRUE);
+ }
+ if (empty($fields_with_node_links[$node->nid][$link->url])) {
+ continue;
}
+ // If the link appears in fields and a field access module is being used,
+ // we must check that the current user has access to view at least one field
+ // that contains the link; if they don't, we should not return the node.
+ $fields = $fields_with_node_links[$node->nid][$link->url];
+ if (module_implements('field_access')) {
+ $fields_with_access = array();
+
+ $bundle_instances = field_info_instances('node', $node->type);
+ foreach ($bundle_instances as $field_name => $field_instance) {
+ $field = field_info_field($field_name);
+
+ // Field types supported by linkchecker.
+ $fields_supported = array(
+ 'text_with_summary',
+ 'text_long',
+ 'text',
+ 'link_field',
+ );
+
+ // Only check link and text fields, since those are the only types we
+ // extract links from.
+ if (in_array($field['type'], $fields_supported) && field_access('view', $field, 'node', $node)) {
+ $fields_with_access[] = $field['field_name'];
+ }
+ }
+ if (!array_intersect($fields, $fields_with_access)) {
+ continue;
+ }
+ }
+ $nids[] = $node->nid;
+ $access_allowed = TRUE;
+ }
+
+ return $nids;
+}
+
+/**
+ * Returns IDs of comments that contain a link which the current user is allowed to view.
+ *
+ * @param object $link
+ * An object representing the link to check.
+ * @param object $comment_author_account
+ * (optional) If a user account object is provided, the returned comments
+ * will additionally be restricted to only those owned by this account.
+ * Otherwise, comments owned by any user account may be returned.
+ *
+ * @return array
+ * An array of comment IDs that contain the provided link and that the
+ * current user is allowed to view.
+ */
+function _linkchecker_link_comment_ids($link, $comment_author_account = NULL) {
+ // Exit if comments are disabled or if the user cannot access comments, there
+ // is no need to check further.
+ if (!variable_get('linkchecker_scan_comments', 0) || !user_access('access comments')) {
+ return array();
+ }
+
+ // Get a list of comments containing the link, using addTag('node_access') to
+ // allow comment access modules to exclude comments that the current user
+ // does not have access to view.
+ if (!empty($comment_author_account)) {
+ $query = db_select('comment', 'c');
+ $query->addMetaData('base_table', 'comment');
+ $query->addTag('node_access');
+ $query->innerJoin('linkchecker_comment', 'lc', 'lc.cid = c.cid');
+ $query->condition('lc.lid', $link->lid);
+ $query->condition('c.uid', $comment_author_account->uid);
+ $query->fields('c', array('cid'));
+ }
+ else {
+ $query = db_select('comment', 'c');
+ $query->addMetaData('base_table', 'comment');
+ $query->addTag('node_access');
+ $query->innerJoin('linkchecker_comment', 'lc', 'lc.cid = c.cid');
+ $query->condition('lc.lid', $link->lid);
+ $query->fields('c', array('cid'));
+ }
+ $cids = $query->execute()->fetchCol();
- // Build full query.
- $query = db_select('linkchecker_link', 'll');
- $query->innerJoin($subquery1, 'q2', 'q2.lid = ll.lid');
- $query->fields('ll');
- $is_author_of_lid = $query->countQuery()->execute()->fetchField();
+ // Return the array of comment IDs.
+ return $cids;
+}
+
+/**
+ * Returns IDs of blocks that contain a link which the current user is allowed to view.
+ *
+ * @param object $link
+ * An object representing the link to check.
+ *
+ * @return array
+ * An array of custom block IDs that contain the provided link and that the
+ * current user is allowed to view.
+ */
+function _linkchecker_link_block_ids($link) {
+ // Exit if blocks are disabled.
+ if (!variable_get('linkchecker_scan_blocks', 0)) {
+ return array();
+ }
+
+ // Get the initial list of block IDs.
+ $bids = db_query('SELECT bid FROM {linkchecker_block_custom} WHERE lid = :lid', array(':lid' => $link->lid))->fetchCol();
- // This path is only allowed for authenticated users looking at their own
- // links.
- return $is_author_of_lid && user_access('edit link settings');
+ // If the user can administer blocks, they're able to see all block content.
+ if (user_access('administer blocks')) {
+ return $bids;
}
+
+ // Otherwise, only return blocks that this user (or anonymous users) have
+ // access to.
+ global $user;
+ $rids = array_keys($user->roles);
+ $rids[] = DRUPAL_ANONYMOUS_RID;
+
+ $query = db_select('block', 'b');
+ $query->leftJoin('block_role', 'r', 'b.module = r.module AND b.delta = r.delta');
+ $query->condition('b.module', 'block');
+ $query->condition(db_or()
+ ->condition('r.rid', $rids, 'IN')
+ ->isNull('r.rid')
+ );
+ $query->fields('b', array('delta'));
+ $query->distinct();
+ $allowed_bids = $query->execute()->fetchCol();
+
+ return array_intersect($bids, $allowed_bids);
}
/**
* Implements hook_cron().
*/
function linkchecker_cron() {
- // Get max_execution_time from configuration, override 0 with 240 seconds.
- $max_execution_time = ini_get('max_execution_time') == 0 ? 240 : ini_get('max_execution_time');
-
// Remove outdated links no longer in use once per day.
if (REQUEST_TIME - variable_get('linkchecker_cleanup_links_last', 0) >= 86400) {
_linkchecker_cleanup_links();
variable_set('linkchecker_cleanup_links_last', REQUEST_TIME);
}
- // @todo Implement cURL support.
- // $has_curl = function_exists('curl_init');
+ // Run link checker in a new process, independent of cron.
+ if (module_exists('httprl') && variable_get('linkchecker_check_library', 'core') == 'httprl') {
+ // Setup callback options array; call _linkchecker_check_links() in the
+ // background.
+ $callback_options = array(array('function' => '_linkchecker_check_links'));
+ // Queue up the request.
+ httprl_queue_background_callback($callback_options);
+ // Execute request.
+ httprl_send_request();
+
+ // Exit here so we don't call _linkchecker_check_links() in this process.
+ return;
+ }
+ // Run the link checks the normal way.
+ _linkchecker_check_links();
+}
- // @todo: Remove some confusion about the max links that can be checked per
- // cron run and guess that 2 link can be checked per second what is
- // nevertheless uncommon. But we can use the max_execution_time to calculate
- // a value that is higher, but not totally out of scope to keep the query
- // resultset small. For cURL we need to add this setting back or a thread
- // limit per remote server for not overloading them.
- $check_links_max_per_cron_run = $max_execution_time;
- // $check_links_max_per_cron_run = variable_get('linkchecker_check_links_max', 10);
+/**
+ * Run link checks.
+ */
+function _linkchecker_check_links() {
+ // Get max_execution_time from configuration, override 0 with 240 seconds.
+ $max_execution_time = ini_get('max_execution_time') == 0 ? 240 : ini_get('max_execution_time');
+ // Make sure we have enough time to validate all of the links.
+ drupal_set_time_limit($max_execution_time);
- $check_links_interval = variable_get('linkchecker_check_links_interval', 2419200);
- $useragent = variable_get('linkchecker_check_useragent', 'Drupal (+http://drupal.org/)');
+ // Make sure this is the only process trying to run this function.
+ if (!lock_acquire(__FUNCTION__, $max_execution_time)) {
+ watchdog('linkchecker', 'Attempted to re-run link checks while they are already running.', array(), WATCHDOG_WARNING);
+ return FALSE;
+ }
+
+ $has_httprl = (module_exists('httprl') && variable_get('linkchecker_check_library', 'core') == 'httprl');
+
+ // Do not confuse admins with a setting of maximum checkable links per cron
+ // run and guess that 2 links can be checked per second with 1 thread, what is
+ // nevertheless uncommon. The max_execution_time can be used to calculate
+ // a useful value that is higher, but not totally out of scope and limits the
+ // query resultset to a resonable size.
+ $linkchecker_check_connections_max = variable_get('linkchecker_check_connections_max', 8);
+ $check_links_max_per_cron_run = ($has_httprl) ? ($linkchecker_check_connections_max * $max_execution_time) : $max_execution_time;
+
+ $linkchecker_check_links_interval = variable_get('linkchecker_check_links_interval', 2419200);
+ $linkchecker_check_useragent = variable_get('linkchecker_check_useragent', 'Drupal (+http://drupal.org/)');
+
+ // Connection limit can be overriden via settings.php. Two connections is the
+ // limit defined in RFC http://www.ietf.org/rfc/rfc2616.txt. Modern browsers
+ // are typically using 6-8 connections and no more. Never use more and keep
+ // in mind that you can overload other people servers.
+ $linkchecker_check_domain_connections = variable_get('linkchecker_check_domain_connections', 2);
// Get URLs for checking.
- $result = db_query_range('SELECT * FROM {linkchecker_link} WHERE last_checked < :last_checked AND status = :status ORDER BY last_checked, lid ASC', 0, $check_links_max_per_cron_run, array(':last_checked' => REQUEST_TIME - $check_links_interval, ':status' => 1));
- foreach ($result as $link) {
- // Make sure we have enough time to validate the link.
- drupal_set_time_limit(240);
+ $links = db_query_range('SELECT * FROM {linkchecker_link} WHERE last_checked < :last_checked AND status = :status ORDER BY last_checked, lid ASC', 0, $check_links_max_per_cron_run, array(':last_checked' => REQUEST_TIME - $linkchecker_check_links_interval, ':status' => 1));
+ $links_remaining = $links->rowCount();
+ foreach ($links as $link) {
$headers = array();
- $headers['User-Agent'] = 'User-Agent: ' . $useragent;
+ $headers['User-Agent'] = $linkchecker_check_useragent;
+
+ $uri = @parse_url($link->url);
+
+ // URL contains a fragment.
+ if (in_array($link->method, array('HEAD', 'GET')) && !empty($uri['fragment'])) {
+ // We need the full content and not only the HEAD.
+ $link->method = 'GET';
+ // Request text content only (like Firefox/Chrome).
+ $headers['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8';
+ }
+ elseif ($link->method == 'GET') {
+ // Range: Only request the first 1024 bytes from remote server. This is
+ // required to prevent timeouts on URLs that are large downloads.
+ $headers['Range'] = 'bytes=0-1024';
+ }
+
+ // Add in the headers.
+ $options = array(
+ 'headers' => $headers,
+ 'method' => $link->method,
+ 'max_redirects' => 0,
+ );
- // Range: Only request the first 1024 bytes from remote server. This is
- // required to prevent timeouts on URLs that are large downloads.
- if ($link->method == 'GET') { $headers['Range'] = 'bytes=0-1024'; }
+ if ($has_httprl) {
+ // Define the callback and add the $link object to it.
+ // Notes:
+ // - 'global_timeout' does not require a timer_read('page'), as this job
+ // runs in a new process, independent of cron.
+ $options += array(
+ 'global_connections' => $linkchecker_check_connections_max,
+ 'global_timeout' => $max_execution_time - 30,
+ 'domain_connections' => $linkchecker_check_domain_connections,
+ 'callback' => array(
+ array(
+ 'function' => '_linkchecker_status_handling',
+ ),
+ $link, // This need to be passed or it's not send back to _linkchecker_status_handling()
+ ),
+ );
+ // Queue up the requests.
+ httprl_request($link->url, $options);
+ $links_remaining--;
+
+ // After all links are queued, run the url checks.
+ if ($links_remaining == 0) {
+ httprl_send_request();
+ }
+ }
+ else {
+ // Drupal core.
+ $response = drupal_http_request($link->url, $options);
- // Fetch URL.
- $response = drupal_http_request($link->url, array('headers' => $headers, 'method' => $link->method, 'max_redirects' => 0));
- _linkchecker_status_handling($link, $response);
+ // Add 'redirect_code' property to core response object for consistency
+ // with HTTPRL object.
+ if ($response->code == 301 && !isset($response->redirect_code)) {
+ $response->redirect_code = $response->code;
+ }
+ // Add 'uri' property to core response object for 'fragment' check and
+ // consistency with HTTPRL object.
+ $response->uri = $uri;
- if ((timer_read('page') / 1000) > ($max_execution_time / 2)) {
- break; // Stop once we have used over half of the maximum execution time.
+ _linkchecker_status_handling($response, $link);
+
+ if ((timer_read('page') / 1000) > ($max_execution_time / 2)) {
+ // Stop once we have used over half of the maximum execution time.
+ break;
+ }
}
}
+
+ // Release the lock.
+ lock_release(__FUNCTION__);
+ watchdog('linkchecker', 'Link checks completed.', array(), WATCHDOG_INFO);
+ watchdog('linkchecker', 'Memory usage: @memory_get_usage, Peak memory usage: @memory_get_peak_usage.', array('@memory_get_peak_usage' => format_size(memory_get_peak_usage()), '@memory_get_usage' => format_size(memory_get_usage())), WATCHDOG_DEBUG);
+ return TRUE;
}
/**
* Status code handling.
*
- * @param string $link
- * An object containing the url, lid and fail_count.
* @param object $response
* An object containing the HTTP request headers, response code, headers,
* data and redirect status.
+ * @param string $link
+ * An object containing the url, lid and fail_count.
*/
-function _linkchecker_status_handling($link, $response) {
- $useragent = variable_get('linkchecker_check_useragent', 'Drupal (+http://drupal.org/)');
+function _linkchecker_status_handling(&$response, $link) {
$ignore_response_codes = preg_split('/(\r\n?|\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));
// - Prevent E_ALL warnings in DB updates for non-existing $response->error.
@@ -278,17 +547,32 @@ function _linkchecker_status_handling($link, $response) {
// what results in a database UPDATE failure. For more information, see
// http://drupal.org/node/371495.
// Workaround: ISO-8859-1 as source encoding may be wrong, but WFM.
- if (!isset($response->error)) { $response->error = ''; }
- if (!isset($response->status_message)) { $response->status_message = ''; }
+ if (!isset($response->error)) {
+ $response->error = '';
+ }
+ if (!isset($response->status_message)) {
+ $response->status_message = '';
+ }
$response->error = trim(drupal_convert_to_utf8($response->error, 'ISO-8859-1'));
$response->status_message = trim(drupal_convert_to_utf8($response->status_message, 'ISO-8859-1'));
- // Make core response object consistent with HTTPRL.
- if ($response->code == 301 && !isset($response->redirect_code)) {
- $response->redirect_code = $response->code;
+ // Destination anchors in HTML documents may be specified either by the A
+ // element (naming it with the name attribute), or by any other element
+ // (naming with the id attribute).
+ // See http://www.w3.org/TR/html401/struct/links.html
+ if ($response->code == 200 && !empty($response->uri['fragment']) && (!empty($response->data) && !preg_match('/(\s[^>]*(name|id)=["\'])(' . preg_quote($response->uri['fragment'], '/') . ')(["\'][^>]*>)/i', $response->data))) {
+ // Override status code 200 with status code 404 so it can be handled with
+ // default status code 404 logic and custom error text.
+ $response->code = 404;
+ $response->status_message = $response->error = 'URL fragment identifier not found in content';
}
switch ($response->code) {
+ case -4: // HTTPRL: httprl_send_request timed out.
+ // Skip these and try them again next cron run.
+ break;
+
+ case -2: // HTTPRL: maximum allowed redirects exhausted.
case 301:
// Remote site send status code 301 and link needs an update.
db_update('linkchecker_link')
@@ -297,7 +581,7 @@ function _linkchecker_status_handling($link, $response) {
'code' => $response->redirect_code,
'error' => $response->status_message,
'fail_count' => 0,
- 'last_checked' => time()
+ 'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
@@ -307,70 +591,114 @@ function _linkchecker_status_handling($link, $response) {
// link and if we trust this change we are able to replace the old link
// with the new one without any hand work.
$auto_repair_301 = variable_get('linkchecker_action_status_code_301', 0);
- if ($auto_repair_301 && $auto_repair_301 <= ($link->fail_count+1) && valid_url($response->redirect_url, TRUE)) {
+ if ($auto_repair_301 && $auto_repair_301 <= ($link->fail_count + 1) && valid_url($response->redirect_url, TRUE)) {
+ // Switch anonymous user to an admin.
+ linkchecker_impersonate_user(user_load_by_name(variable_get('linkchecker_impersonate_user', '')));
+
// NODES: Autorepair all nodes having this outdated link.
- $nids = db_query('SELECT nid FROM {linkchecker_node} WHERE lid = :lid', array(':lid' => $link->lid))->fetchCol();
- $nodes = node_load_multiple($nids);
- foreach ($nodes as $node) {
- $node = _linkchecker_replace_fields($node, $node->type, 'node', $link->url, $response->redirect_url);
-
- // Always use the default revision setting. For more information,
- // see node_object_prepare().
- $node_options = variable_get('node_options_' . $node->type, array('status', 'promote'));
- $node->revision = in_array('revision', $node_options);
-
- // Generate a log message for the node_revisions table, visible on
- // the node's revisions tab.
- $node->log = t('Changed permanently moved link in %node from %src to %dst.', array('%node' => url('node/' . $row->nid), '%src' => $link->url, '%dst' => $response->redirect_url));
-
- // Save changed node and update the node link list.
- node_save($node);
- watchdog('linkchecker', 'Changed permanently moved link in %node from %src to %dst.', array('%node' => url('node/' . $row->nid), '%src' => $link->url, '%dst' => $response->redirect_url), WATCHDOG_INFO);
+ $result = db_query('SELECT nid FROM {linkchecker_node} WHERE lid = :lid', array(':lid' => $link->lid));
+ foreach ($result as $row) {
+ // Explicitly don't use node_load_multiple() or the module may run
+ // into issues like http://drupal.org/node/1210606. With this logic
+ // nodes can be updated until an out of memory occurs and further
+ // updates will be made on the remaining nodes only.
+ $node = node_load($row->nid);
+
+ // Has the node object loaded successfully?
+ if (is_object($node)) {
+ $node_original = clone $node;
+ $node = _linkchecker_replace_fields('node', $node->type, $node, $link->url, $response->redirect_url);
+
+ if ($node_original != $node) {
+ // Always use the default revision setting. For more information,
+ // see node_object_prepare().
+ $node_options = variable_get('node_options_' . $node->type, array('status', 'promote'));
+ $node->revision = in_array('revision', $node_options);
+
+ // Generate a log message for the node_revisions table, visible on
+ // the node's revisions tab.
+ $node->log = t('Changed permanently moved link in %node from %src to %dst.', array('%node' => url('node/' . $node->nid), '%src' => $link->url, '%dst' => $response->redirect_url));
+
+ // Save changed node and update the node link list.
+ node_save($node);
+ watchdog('linkchecker', 'Changed permanently moved link in %node from %src to %dst.', array('%node' => url('node/' . $node->nid), '%src' => $link->url, '%dst' => $response->redirect_url), WATCHDOG_INFO);
+ }
+ else {
+ watchdog('linkchecker', 'Link update in node failed. Permanently moved link %src not found in node %node. Manual fix required.', array('%node' => url('node/' . $row->nid), '%src' => $link->url), WATCHDOG_WARNING);
+ }
+ }
+ else {
+ watchdog('linkchecker', 'Loading node %node for update failed. Manual fix required.', array('%node' => $row->nid), WATCHDOG_ERROR);
+ }
}
// COMMENTS: Autorepair all comments having this outdated link.
- if (variable_get('linkchecker_scan_comments', 0)) {
- $cids = db_query('SELECT cid FROM {linkchecker_comment} WHERE lid = :lid', array(':lid' => $link->lid))->fetchCol();
- $comments = comment_load_multiple($cids);
- foreach ($comments as $comment) {
+ $result = db_query('SELECT cid FROM {linkchecker_comment} WHERE lid = :lid', array(':lid' => $link->lid));
+ foreach ($result as $row) {
+ // Explicitly don't use comment_load_multiple() or the module may run
+ // into issues like http://drupal.org/node/1210606. With this logic
+ // comment can be updated until an out of memory occurs and further
+ // updates will be made on the remaining comments only.
+ $comment = comment_load($row->cid);
+
+ // Has the comment object loaded successfully?
+ if (is_object($comment)) {
+ $comment_original = clone $comment;
+
// Replace links in subject.
_linkchecker_link_replace($comment->subject, $link->url, $response->redirect_url);
// Replace links in fields.
- $comment = _linkchecker_replace_fields($comment, $comment->node_type, 'comment', $link->url, $response->redirect_url);
+ $comment = _linkchecker_replace_fields('comment', $comment->node_type, $comment, $link->url, $response->redirect_url);
// Save changed comment and update the comment link list.
- comment_save($comment);
- watchdog('linkchecker', 'Changed permanently moved link in comment %comment from %src to %dst.', array('%comment' => $comment['cid'], '%src' => $link->url, '%dst' => $response->redirect_url), WATCHDOG_INFO);
+ if ($comment_original != $comment) {
+ comment_save($comment);
+ watchdog('linkchecker', 'Changed permanently moved link in comment %comment from %src to %dst.', array('%comment' => $comment->cid, '%src' => $link->url, '%dst' => $response->redirect_url), WATCHDOG_INFO);
+ }
+ else {
+ watchdog('linkchecker', 'Link update in comment failed. Permanently moved link %src not found in comment %comment. Manual fix required.', array('%comment' => $comment->cid, '%src' => $link->url), WATCHDOG_WARNING);
+ }
+ }
+ else {
+ watchdog('linkchecker', 'Loading comment %comment for update failed. Manual fix required.', array('%comment' => $comment->cid), WATCHDOG_ERROR);
}
}
// CUSTOM BLOCKS: Autorepair all custom blocks having this outdated
// link.
- if (variable_get('linkchecker_scan_blocks', 0)) {
- $result = db_query('SELECT bid FROM {linkchecker_block_custom} WHERE lid = :lid', array(':lid' => $link->lid));
- foreach ($result as $row) {
- $block_custom = block_custom_block_get($row->bid);
+ $result = db_query('SELECT bid FROM {linkchecker_block_custom} WHERE lid = :lid', array(':lid' => $link->lid));
+ foreach ($result as $row) {
+ $block_custom = linkchecker_block_custom_block_get($row->bid);
- // Create array of custom block fields to scan.
- $text_items = array();
- $text_items[] = 'info';
- $text_items[] = 'body';
+ // Has the custom block object loaded successfully?
+ if (is_object($block_custom)) {
+ $block_custom_original = clone $block_custom;
// Now replace the outdated link with the permanently moved one in
// all custom block fields.
- foreach ($text_items as $text_item) {
- _linkchecker_link_replace($block_custom[$text_item], $link->url, $response->redirect_url);
+ _linkchecker_link_replace($block_custom->info, $link->url, $response->redirect_url);
+ _linkchecker_link_replace($block_custom->body['value'], $link->url, $response->redirect_url);
+
+ if ($block_custom_original != $block_custom) {
+ // Save changed block and update the block link list.
+ block_custom_block_save((array) $block_custom, $block_custom->delta);
+ // There is no hook that fires on block_custom_block_save(),
+ // therefore do link extraction programmatically.
+ _linkchecker_add_block_custom_links($block_custom, $block_custom->delta);
+ watchdog('linkchecker', 'Changed permanently moved link in custom block %bid from %src to %dst.', array('%bid' => $block_custom->delta, '%src' => $link->url, '%dst' => $response->redirect_url), WATCHDOG_INFO);
+ }
+ else {
+ watchdog('linkchecker', 'Link update in block failed. Permanently moved link %src not found in block %bid. Manual fix required.', array('%bid' => $block_custom->delta, '%src' => $link->url), WATCHDOG_WARNING);
}
-
- // Save changed block and update the block link list.
- block_custom_block_save($block_custom, $block_custom['bid']);
- // There is no hook that fires on block_custom_block_save(),
- // therefore do link extraction programmatically.
- _linkchecker_add_block_custom_links($block_custom, $block_custom['bid']);
- watchdog('linkchecker', 'Changed permanently moved link in custom block %bid from %src to %dst.', array('%bid' => $block_custom['bid'], '%src' => $link->url, '%dst' => $response->redirect_url), WATCHDOG_INFO);
+ }
+ else {
+ watchdog('linkchecker', 'Loading block %bid for update failed. Manual fix required.', array('%bid' => $block_custom->delta), WATCHDOG_ERROR);
}
}
+
+ // Revert user back to anonymous.
+ linkchecker_revert_user();
}
else {
watchdog('linkchecker', 'Link %link has changed and needs to be updated.', array('%link' => $link->url), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
@@ -384,7 +712,7 @@ function _linkchecker_status_handling($link, $response) {
'code' => $response->code,
'error' => $response->error,
'fail_count' => 0,
- 'last_checked' => time()
+ 'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
@@ -392,25 +720,62 @@ function _linkchecker_status_handling($link, $response) {
// If unpublishing limit is reached, unpublish all nodes having this link.
$linkchecker_action_status_code_404 = variable_get('linkchecker_action_status_code_404', 0);
- if ($linkchecker_action_status_code_404 && $linkchecker_action_status_code_404 <= ($link->fail_count+1)) {
+ if ($linkchecker_action_status_code_404 && $linkchecker_action_status_code_404 <= ($link->fail_count + 1)) {
+ // Switch anonymous user to an admin.
+ linkchecker_impersonate_user(user_load_by_name(variable_get('linkchecker_impersonate_user', '')));
_linkchecker_unpublish_nodes($link->lid);
+ linkchecker_revert_user();
}
break;
case 405:
- case 500:
// - 405: Special error handling if method is not allowed. Switch link
- // checking to GET method and try again.
- // - 500: Like WGET, try with GET on "500 Internal server error".
+ // checking to GET method and try again.
db_update('linkchecker_link')
->condition('lid', $link->lid)
- ->fields(array('method' => 'GET'))
+ ->fields(array(
+ 'method' => 'GET',
+ 'code' => $response->code,
+ 'error' => $response->error,
+ 'fail_count' => 0,
+ 'last_checked' => time(),
+ ))
+ ->expression('fail_count', 'fail_count + 1')
+ ->execute();
+
+ watchdog('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', array('%link' => $link->url), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
+ break;
+
+ case 500:
+ // - 500: Like WGET, try with GET on "500 Internal server error".
+ // - If GET also fails with status code 500, than the link is broken.
+ if ($link->method == 'GET' && $response->code == 500) {
+ db_update('linkchecker_link')
+ ->condition('lid', $link->lid)
+ ->fields(array(
+ 'code' => $response->code,
+ 'error' => $response->error,
+ 'fail_count' => 0,
+ 'last_checked' => time(),
+ ))
+ ->expression('fail_count', 'fail_count + 1')
->execute();
- if ($response->code == 405) {
- watchdog('linkchecker', 'Method HEAD is not allowed for link %link. Method has been changed to GET.', array('%link' => $link->url), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
+ watchdog('linkchecker', 'Broken link %link has been found.', array('%link' => $link->url), WATCHDOG_NOTICE, l(t('Broken links'), 'admin/reports/linkchecker'));
}
- elseif ($response->code == 500) {
+ else {
+ db_update('linkchecker_link')
+ ->condition('lid', $link->lid)
+ ->fields(array(
+ 'method' => 'GET',
+ 'code' => $response->code,
+ 'error' => $response->error,
+ 'fail_count' => 0,
+ 'last_checked' => time(),
+ ))
+ ->expression('fail_count', 'fail_count + 1')
+ ->execute();
+
watchdog('linkchecker', 'Internal server error for link %link. Method has been changed to GET.', array('%link' => $link->url), WATCHDOG_INFO, l(t('Broken links'), 'admin/reports/linkchecker'));
}
break;
@@ -436,71 +801,16 @@ function _linkchecker_status_handling($link, $response) {
'code' => $response->code,
'error' => $response->error,
'fail_count' => 0,
- 'last_checked' => time()
+ 'last_checked' => time(),
))
->expression('fail_count', 'fail_count + 1')
->execute();
// watchdog('linkchecker', 'Unhandled link error %link has been found.', array('%link' => $link->url), WATCHDOG_ERROR, l(t('Broken links'), 'admin/reports/linkchecker'));
}
}
-}
-/**
- * Replace the old url by a new url on 301 status codes.
- *
- * @param object $entity
- * The object we are working on, can be a $node, $comment.
- * @param string $entity_type
- * The type of entity, like $node->type or $comment->node_type.
- * @param string $bundle
- * The type of bundle like 'node' or 'comment'.
- * @param string $old_url
- * The previous url.
- * @param string $new_url
- * The new url to replace the old.
- */
-function _linkchecker_replace_fields($entity, $entity_type, $bundle, $old_url, $new_url) {
- $field_list = field_info_fields();
- foreach ($field_list as $name => $field) {
- if (in_array($entity_type, $field['bundles'][$bundle])) {
- // This is because of a php error.
- $entity_field =& $entity->$name;
-
- switch ($field['type']) {
- // Core fields.
- case 'text_with_summary':
- foreach ($entity_field as $language_name => $language_value) {
- foreach ($language_value as $item_name => $item_value) {
- _linkchecker_link_replace($entity_field[$language_name][$item_name]['value'], $old_url, $new_url);
- _linkchecker_link_replace($entity_field[$language_name][$item_name]['summary'], $old_url, $new_url);
- }
- }
- break;
-
- // Core fields.
- case 'text_long':
- case 'text':
- foreach ($entity_field as $language_name => $language_value) {
- foreach ($language_value as $item_name => $item_value) {
- _linkchecker_link_replace($entity_field[$language_name][$item_name]['value'], $old_url, $new_url);
- }
- }
- break;
-
- // Link module field, http://drupal.org/project/link.
- case 'link_field':
- foreach ($entity_field as $language_name => $language_value) {
- foreach ($language_value as $item_name => $item_value) {
- _linkchecker_link_replace($entity_field[$language_name][$item_name]['url'], $old_url, $new_url);
- _linkchecker_link_replace($entity_field[$language_name][$item_name]['title'], $old_url, $new_url);
- }
- }
- break;
- }
- }
- }
-
- return $entity;
+ // Free Memory.
+ $response = new stdClass();
}
/**
@@ -508,21 +818,20 @@ function _linkchecker_replace_fields($entity, $entity_type, $bundle, $old_url, $
*/
function linkchecker_node_prepare($node) {
// Node edit tab is viewed.
- if (arg(0) == 'node' && is_numeric(arg(1)) && arg(2) == 'edit') {
+ if (arg(0) == 'node' && is_numeric(arg(1)) && arg(2) == 'edit' && isset($node->nid)) {
// Show a message on node edit page if a link check failed once or more.
$ignore_response_codes = preg_split('/(\r\n?|\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));
- $links = db_query('SELECT url, code, fail_count FROM {linkchecker_node} ln INNER JOIN {linkchecker_link} ll ON ln.lid = ll.lid WHERE ln.nid = :nid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes)', array(':nid' => $node->nid, ':fail_count' => 0, ':status' => 1, ':codes' => $ignore_response_codes));
+ $links = db_query('SELECT ll.* FROM {linkchecker_node} ln INNER JOIN {linkchecker_link} ll ON ln.lid = ll.lid WHERE ln.nid = :nid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes)', array(':nid' => $node->nid, ':fail_count' => 0, ':status' => 1, ':codes' => $ignore_response_codes));
foreach ($links as $link) {
- drupal_set_message(format_plural($link->fail_count, 'Link check of <a href="@url">@url</a> failed once (status code: @code).', 'Link check of <a href="@url">@url</a> failed @count times (status code: @code).', array('@url' => $link->url, '@code' => $link->code)), 'warning', FALSE);
+ if (_linkchecker_link_access($link)) {
+ drupal_set_message(format_plural($link->fail_count, 'Link check of <a href="@url">@url</a> failed once (status code: @code).', 'Link check of <a href="@url">@url</a> failed @count times (status code: @code).', array('@url' => $link->url, '@code' => $link->code)), 'warning', FALSE);
+ }
}
}
}
/**
* Implements hook_node_delete().
- *
- * @param object $node
- * The node object that is being deleted.
*/
function linkchecker_node_delete($node) {
_linkchecker_delete_node_links($node->nid);
@@ -530,26 +839,34 @@ function linkchecker_node_delete($node) {
/**
* Implements hook_node_insert().
- *
- * @param object $node
- * The node object that is being inserted.
*/
function linkchecker_node_insert($node) {
+ // Every moderation module saving a forward revision needs to exit here.
+ // Please see _linkchecker_isdefaultrevision() for more details.
+ // @todo: Refactor this workaround under D8.
+ if (!_linkchecker_isdefaultrevision($node)) {
+ return;
+ }
+
// The node is going to be published.
- if ($node->status == NODE_PUBLISHED && _linkchecker_scan_nodetype($node->type)) {
+ if (_linkchecker_scan_nodetype($node->type) && $node->status == NODE_PUBLISHED) {
_linkchecker_add_node_links($node);
}
}
/**
* Implements hook_node_update().
- *
- * @param object $node
- * The node object that is being updated.
*/
function linkchecker_node_update($node) {
+ // Every moderation module saving a forward revision needs to exit here.
+ // Please see _linkchecker_isdefaultrevision() for more details.
+ // @todo: Refactor this workaround under D8.
+ if (!_linkchecker_isdefaultrevision($node)) {
+ return;
+ }
+
// The node is going to be published.
- if ($node->status == NODE_PUBLISHED && _linkchecker_scan_nodetype($node->type)) {
+ if (_linkchecker_scan_nodetype($node->type) && $node->status == NODE_PUBLISHED) {
_linkchecker_add_node_links($node);
}
else {
@@ -560,9 +877,6 @@ function linkchecker_node_update($node) {
/**
* Implements hook_comment_delete().
- *
- * @param object $comment
- * The comment object that is being deleted.
*/
function linkchecker_comment_delete($comment) {
_linkchecker_delete_comment_links($comment->cid);
@@ -570,28 +884,22 @@ function linkchecker_comment_delete($comment) {
/**
* Implements hook_comment_insert().
- *
- * @param object $comment
- * The comment object that is being inserted.
*/
function linkchecker_comment_insert($comment) {
// The comment is going to be published.
$node_type = db_query('SELECT type FROM {node} WHERE nid = :nid', array(':nid' => $comment->nid))->fetchField();
- if ($comment->status == COMMENT_PUBLISHED && _linkchecker_scan_nodetype($node_type)) {
+ if (_linkchecker_scan_nodetype($node_type) && variable_get('linkchecker_scan_comments', 0) && $comment->status == COMMENT_PUBLISHED) {
_linkchecker_add_comment_links($comment);
}
}
/**
* Implements hook_comment_update().
- *
- * @param object $comment
- * The comment object that is being updated.
*/
function linkchecker_comment_update($comment) {
// The node is going to be published.
$node_type = db_query('SELECT type FROM {node} WHERE nid = :nid', array(':nid' => $comment->nid))->fetchField();
- if ($comment->status == COMMENT_PUBLISHED && _linkchecker_scan_nodetype($node_type)) {
+ if (_linkchecker_scan_nodetype($node_type) && variable_get('linkchecker_scan_comments', 0) && $comment->status == COMMENT_PUBLISHED) {
_linkchecker_add_comment_links($comment);
}
else {
@@ -617,9 +925,11 @@ function linkchecker_form_alter(&$form, &$form_state, $form_id) {
// Show a message on custom block edit page if a link check failed once
// or more often.
$ignore_response_codes = preg_split('/(\r\n?|\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));
- $links = db_query('SELECT url, code, fail_count FROM {linkchecker_block_custom} lb INNER JOIN {linkchecker_link} ll ON lb.lid = ll.lid WHERE lb.bid = :bid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes)', array(':bid' => arg(5), ':fail_count' => 0, ':status' => 1, ':codes' => $ignore_response_codes));
+ $links = db_query('SELECT ll.* FROM {linkchecker_block_custom} lb INNER JOIN {linkchecker_link} ll ON lb.lid = ll.lid WHERE lb.bid = :bid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes)', array(':bid' => arg(5), ':fail_count' => 0, ':status' => 1, ':codes' => $ignore_response_codes));
foreach ($links as $link) {
- drupal_set_message(format_plural($link->fail_count, 'Link check of <a href="@url">@url</a> failed once (status code: @code).', 'Link check of <a href="@url">@url</a> failed @count times (status code: @code).', array('@url' => $link->url, '@code' => $link->code)), 'warning', FALSE);
+ if (_linkchecker_link_access($link)) {
+ drupal_set_message(format_plural($link->fail_count, 'Link check of <a href="@url">@url</a> failed once (status code: @code).', 'Link check of <a href="@url">@url</a> failed @count times (status code: @code).', array('@url' => $link->url, '@code' => $link->code)), 'warning', FALSE);
+ }
}
}
@@ -631,20 +941,25 @@ function linkchecker_form_alter(&$form, &$form_state, $form_id) {
// Add custom submit handler to custom block delete form.
$form['#submit'][] = 'linkchecker_block_custom_delete_form_submit';
break;
+ }
+}
- case 'comment_form':
- // When displaying the form as 'view' or 'preview', show the broken links
- // warning.
- if ((empty($form_state['input']) || isset($form_state['input']['op']) && $form_state['input']['op'] == t('Preview')) && arg(0) == 'comment' && arg(1) == 'edit' && is_numeric(arg(2))) {
- // Show a message on comment edit page if a link check failed once or
- // more often.
- $ignore_response_codes = preg_split('/(\r\n?|\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));
- $links = db_query('SELECT url, code, fail_count FROM {linkchecker_comment} lc INNER JOIN {linkchecker_link} ll ON lc.lid = ll.lid WHERE lc.cid = :cid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes)', array(':cid' => arg(2), ':fail_count' => 0, ':status' => 1, ':codes' => $ignore_response_codes));
- foreach ($links as $link) {
- drupal_set_message(format_plural($link->fail_count, 'Link check of <a href="@url">@url</a> failed once (status code: @code).', 'Link check of <a href="@url">@url</a> failed @count times (status code: @code).', array('@url' => $link->url, '@code' => $link->code)), 'warning', FALSE);
- }
+/**
+ * Implements hook_form_BASE_FORM_ID_alter().
+ */
+function linkchecker_form_comment_form_alter(&$form, &$form_state, $form_id) {
+ // When displaying the form as 'view' or 'preview', show the broken links
+ // warning.
+ if ((empty($form_state['input']) || (isset($form_state['input']['op']) && $form_state['input']['op'] == t('Preview'))) && arg(0) == 'comment' && is_numeric(arg(1)) && arg(2) == 'edit') {
+ // Show a message on comment edit page if a link check failed once or
+ // more often.
+ $ignore_response_codes = preg_split('/(\r\n?|\n)/', variable_get('linkchecker_ignore_response_codes', "200\n206\n302\n304\n401\n403"));
+ $links = db_query('SELECT ll.* FROM {linkchecker_comment} lc INNER JOIN {linkchecker_link} ll ON lc.lid = ll.lid WHERE lc.cid = :cid AND ll.fail_count > :fail_count AND ll.status = :status AND ll.code NOT IN (:codes)', array(':cid' => arg(1), ':fail_count' => 0, ':status' => 1, ':codes' => $ignore_response_codes));
+ foreach ($links as $link) {
+ if (_linkchecker_link_access($link)) {
+ drupal_set_message(format_plural($link->fail_count, 'Link check of <a href="@url">@url</a> failed once (status code: @code).', 'Link check of <a href="@url">@url</a> failed @count times (status code: @code).', array('@url' => $link->url, '@code' => $link->code)), 'warning', FALSE);
}
- break;
+ }
}
}
@@ -652,15 +967,19 @@ function linkchecker_form_alter(&$form, &$form_state, $form_id) {
* Custom submit handler for block add page.
*/
function linkchecker_block_custom_add_form_submit($form, &$form_state) {
- $bid = db_query('SELECT MAX(bid) FROM {block_custom}')->fetchField();
- _linkchecker_add_block_custom_links($form_state['values'], $bid);
+ if (variable_get('linkchecker_scan_blocks', 0)) {
+ $bid = db_query('SELECT MAX(bid) FROM {block_custom}')->fetchField();
+ _linkchecker_add_block_custom_links($form_state['values'], $bid);
+ }
}
/**
* Custom submit handler for block configure page.
*/
function linkchecker_block_custom_configure_form_submit($form, &$form_state) {
- _linkchecker_add_block_custom_links($form_state['values'], $form_state['values']['delta']);
+ if (variable_get('linkchecker_scan_blocks', 0)) {
+ _linkchecker_add_block_custom_links($form_state['values'], $form_state['values']['delta']);
+ }
}
/**
@@ -671,35 +990,125 @@ function linkchecker_block_custom_delete_form_submit($form, &$form_state) {
}
/**
- * Add node links to database.
+ * Returns information from database about a user-created (custom) block.
+ *
+ * @param int $bid
+ * ID of the block to get information for.
+ *
+ * @return object
+ * Associative object of information stored in the database for this block.
+ * Object keys:
+ * - module: 'block' as the source of the custom blocks data.
+ * - delta: Block ID.
+ * - info: Block description.
+ * - body['value']: Block contents.
+ * - body['format']: Filter ID of the filter format for the body.
+ */
+function linkchecker_block_custom_block_get($bid) {
+ $block_custom = block_custom_block_get($bid);
+
+ if ($block_custom) {
+ $block = new stdClass();
+ $block->module = 'block';
+ $block->delta = $block_custom['bid'];
+ $block->info = $block_custom['info'];
+ $block->body = array();
+ $block->body['value'] = $block_custom['body'];
+ $block->body['format'] = $block_custom['format'];
+ }
+ else {
+ $block = FALSE;
+ }
+
+ return $block;
+}
+
+/**
+ * Extracts links from a node.
*
* @param object $node
* The fully populated node object.
- * @param bool $skip_missing_links_detection
- * To prevent endless batch loops the value need to be TRUE. With FALSE
- * the need for content re-scans is detected by the number of missing links.
+ * @param bool $return_field_names
+ * If set to TRUE, the returned array will contain the link URLs as keys, and
+ * each element will be an array containing all field names in which the URL
+ * is found. Otherwise, a simple array of URLs will be returned.
+ *
+ * @return array
+ * An array whose keys are fully qualified and unique URLs found in the node
+ * (as returned by _linkchecker_extract_links()), or a more complex
+ * structured array (see above) if $return_field_names is TRUE.
*/
-function _linkchecker_add_node_links($node, $skip_missing_links_detection = FALSE) {
- // Get current node language options for url() functions.
- $languages = language_list();
- // Note: An "undefined language" (value: 'und') isn't listed in the available
- // languages variable $languages.
- $url_options = (empty($node->language) || empty($languages[$node->language])) ? array('absolute' => TRUE) : array('language' => $languages[$node->language], 'absolute' => TRUE);
+function _linkchecker_extract_node_links($node, $return_field_names = FALSE) {
- $filter = new stdClass;
+ $filter = new stdClass();
$filter->settings['filter_url_length'] = 72;
// Create array of node fields to scan.
$text_items = array();
- $text_items[] = _filter_url($node->title, $filter);
- $text_items = array_merge($text_items, linkchecker_parse_fields('node', $node, $node->type));
+ $text_items_by_field = array();
+
+ // Add fields typically not used for urls to the bottom. This way a link may
+ // found earlier while looping over $text_items_by_field below.
+ $text_items_by_field = array_merge($text_items_by_field, _linkchecker_parse_fields('node', $node->type, $node, TRUE));
+ $text_items_by_field['title'][] = _filter_url($node->title, $filter);
+ $text_items = _linkchecker_array_values_recursive($text_items_by_field);
// Get the absolute node path for extraction of relative links.
+ $languages = language_list();
+ // Note: An "undefined language" (value: 'und') isn't listed in the available
+ // languages variable $languages.
+ $url_options = (empty($node->language) || empty($languages[$node->language])) ? array('absolute' => TRUE) : array('language' => $languages[$node->language], 'absolute' => TRUE);
$path = url('node/' . $node->nid, $url_options);
// Extract all links in a node.
$links = _linkchecker_extract_links(implode(' ', $text_items), $path);
+ // Return either the array of links, or an array of field names containing
+ // each link, depending on what was requested.
+ if (!$return_field_names) {
+ return $links;
+ }
+ else {
+ $field_names = array();
+ foreach ($text_items_by_field as $field_name => $items) {
+ foreach ($items as $item) {
+ foreach ($links as $uri => $link) {
+ // We only need to do a quick check here to see if the URL appears
+ // anywhere in the text; if so, that means users with access to this
+ // field will be able to see the URL (and any private data such as
+ // passwords contained in it). This is sufficient for the purposes of
+ // _linkchecker_link_node_ids(), where this information is used.
+ foreach ($link as $original_link) {
+ if (strpos($item, $original_link) !== FALSE) {
+ $field_names[$uri][$field_name] = $field_name;
+ }
+ // URLs in $links have been auto-decoded by DOMDocument->loadHTML
+ // and does not provide the RAW url with html special chars.
+ // NOTE: htmlspecialchars() is 30% slower than str_replace().
+ elseif (strpos($item, str_replace('&', '&amp;', $original_link)) !== FALSE) {
+ $field_names[$uri][$field_name] = $field_name;
+ }
+ }
+ }
+ }
+ }
+
+ return $field_names;
+ }
+}
+
+/**
+ * Add node links to database.
+ *
+ * @param object $node
+ * The fully populated node object.
+ * @param bool $skip_missing_links_detection
+ * To prevent endless batch loops the value need to be TRUE. With FALSE
+ * the need for content re-scans is detected by the number of missing links.
+ */
+function _linkchecker_add_node_links($node, $skip_missing_links_detection = FALSE) {
+ $links = array_keys(_linkchecker_extract_node_links($node));
+
// Node have links.
if (!empty($links)) {
// Remove all links from the links array already in the database and only
@@ -775,13 +1184,13 @@ function _linkchecker_add_node_links($node, $skip_missing_links_detection = FALS
*/
function _linkchecker_add_comment_links($comment, $skip_missing_links_detection = FALSE) {
- $filter = new stdClass;
+ $filter = new stdClass();
$filter->settings['filter_url_length'] = 72;
// Create array of comment fields to scan.
$text_items = array();
$text_items[] = _filter_url($comment->subject, $filter);
- $text_items = array_merge($text_items, linkchecker_parse_fields('comment', $comment, $comment->node_type));
+ $text_items = array_merge($text_items, _linkchecker_parse_fields('comment', $comment->node_type, $comment));
// Get the absolute node path for extraction of relative links.
$languages = language_list();
@@ -790,7 +1199,7 @@ function _linkchecker_add_comment_links($comment, $skip_missing_links_detection
$path = url('node/' . $comment->nid, $url_options);
// Extract all links in a comment.
- $links = _linkchecker_extract_links(implode(' ', $text_items), $path);
+ $links = array_keys(_linkchecker_extract_links(implode(' ', $text_items), $path));
// Comment have links.
if (!empty($links)) {
@@ -869,15 +1278,18 @@ function _linkchecker_add_comment_links($comment, $skip_missing_links_detection
*/
function _linkchecker_add_block_custom_links($block_custom, $bid, $skip_missing_links_detection = FALSE) {
// Convert custom block array to object.
+ // @todo: Are we able to remove this global conversion?
$block_custom = (object) $block_custom;
- // Custom blocks really suxxx as it's very inconsistent core logic.
- // Try to hack around this bad logic as good as possible to prevent issues.
- if ($block_custom->module != 'block' && !is_numeric($block_custom->delta) && !is_numeric($bid) && $block_custom->delta != $bid) {
+ // Custom blocks really suxxx as it's very inconsistent core logic (values are
+ // integers or strings) and there are no usable hooks. Try to workaround this
+ // bad logic as good as possible to prevent warnings/errors.
+ // NOTE: Only custom blocks from block.module are supported. Skip all others.
+ if ($block_custom->module != 'block' || !is_numeric($block_custom->delta) || !is_numeric($bid) || $block_custom->delta != $bid) {
return;
}
- $filter = new stdClass;
+ $filter = new stdClass();
$filter->settings['filter_url_length'] = 72;
// Create array of custom block fields to scan. All fields cannot exists.
@@ -885,17 +1297,13 @@ function _linkchecker_add_block_custom_links($block_custom, $bid, $skip_missing_
if (!empty($block_custom->info)) {
$text_items[] = _filter_url($block_custom->info, $filter);
}
- // $block_custom from a scan for links in blocks. See block_custom_block_get().
- if (!is_array($block_custom->body) && !empty($block_custom->body) && isset($block_custom->format)) {
- $text_items[] = _linkchecker_check_markup($block_custom->body, $block_custom->format);
- }
- // $block_custom from editing a block. See block_custom_block_save().
- if (is_array($block_custom->body) && array_key_exists('value', $block_custom->body) && array_key_exists('format', $block_custom->body)) {
+ // $block_custom from editing/scanning a block. See block_custom_block_save().
+ if (!empty($block_custom->body) && is_array($block_custom->body) && array_key_exists('value', $block_custom->body) && array_key_exists('format', $block_custom->body)) {
$text_items[] = _linkchecker_check_markup($block_custom->body['value'], $block_custom->body['format']);
}
// Extract all links in a custom block.
- $links = _linkchecker_extract_links(implode(' ', $text_items));
+ $links = array_keys(_linkchecker_extract_links(implode(' ', $text_items)));
// Custom block has links.
if (!empty($links)) {
@@ -1105,71 +1513,127 @@ function _linkchecker_block_custom_links_missing($bid, $links) {
* This function parse all fields from the entity and returns an array of
* filtered field items.
*
- * @param string $bundle
- * The bundle type to parse. Examples types are 'node', 'comment'.
+ * @param string $entity_type
+ * The type of entity; e.g., 'node', 'comment'.
+ * @param string $bundle_name
+ * The name of the bundle aka node type, e.g., 'article', 'page'.
* @param object $entity
- * The entity to parse, a node or a comment object.
- * @param string $type
- * Content type name e.g. $node->type or $comment->node_type.
+ * The entity to parse, a $node or a $comment object.
+ * @param bool $return_field_names
+ * If set to TRUE, the returned array will contain the content as keys, and
+ * each element will be an array containing all field names in which the
+ * content is found. Otherwise, a simple array with content will be returned.
*
* @return array
* Array of field items with filters applied.
*/
-function linkchecker_parse_fields($bundle, $entity, $type) {
+function _linkchecker_parse_fields($entity_type, $bundle_name, $entity, $return_field_names = FALSE) {
$text_items = array();
- $field_list = field_info_fields();
+ $text_items_by_field = array();
// Create settings for _filter_url() function.
- $filter = new stdClass;
+ $filter = new stdClass();
$filter->settings['filter_url_length'] = 72;
- foreach ($field_list as $name => $field) {
- if (!empty($field['bundles'][$bundle]) && in_array($type, $field['bundles'][$bundle])) {
- // @todo This is because of a php parse error.
- if (empty($entity->$name)) {
- continue;
- }
- $entity_field = $entity->$name;
-
- switch ($field['type']) {
- // Core fields.
- case 'text_with_summary':
- foreach ($entity_field as $language) {
- foreach ($language as $item) {
- $format = isset($item['format']) ? $item['format'] : NULL;
- $text_items[] = _linkchecker_check_markup($item['value'], $format, $entity->language, TRUE);
- $text_items[] = _linkchecker_check_markup($item['summary'], $format, $entity->language, TRUE);
- }
+ // Collect the fields from this entity_type and bundle.
+ foreach (field_info_instances($entity_type, $bundle_name) as $field_name => $instance) {
+ $field = field_info_field($field_name);
+ $entity_field = $entity->$field['field_name'];
+
+ switch ($field['type']) {
+ // Core fields.
+ case 'text_with_summary':
+ foreach ($entity_field as $language) {
+ foreach ($language as $item) {
+ $format = isset($item['format']) ? $item['format'] : NULL;
+ $text_items[] = $text_items_by_field[$field['field_name']][] = _linkchecker_check_markup($item['value'], $format, linkchecker_entity_language($entity_type, $entity), TRUE);
+ $text_items[] = $text_items_by_field[$field['field_name']][] = _linkchecker_check_markup($item['summary'], $format, linkchecker_entity_language($entity_type, $entity), TRUE);
}
- break;
-
- // Core fields.
- case 'text_long':
- case 'text':
- foreach ($entity_field as $language) {
- foreach ($language as $item) {
- $format = isset($item['format']) ? $item['format'] : NULL;
- $text_items[] = _linkchecker_check_markup($item['value'], $format, $entity->language, TRUE);
- }
+ }
+ break;
+
+ // Core fields.
+ case 'text_long':
+ case 'text':
+ foreach ($entity_field as $language) {
+ foreach ($language as $item) {
+ $format = isset($item['format']) ? $item['format'] : NULL;
+ $text_items[] = $text_items_by_field[$field['field_name']][] = _linkchecker_check_markup($item['value'], $format, linkchecker_entity_language($entity_type, $entity), TRUE);
}
- break;
-
- // Link module field, http://drupal.org/project/link.
- case 'link_field':
- foreach ($entity_field as $language) {
- foreach ($language as $item) {
- $options = drupal_parse_url($item['url']);
- $title = !empty($item['title']) ? $item['title'] : '';
- $text_items[] = l($title, $options['path'], $options);
- $text_items[] = _linkchecker_check_markup($title, NULL, $entity->language, TRUE);
- }
+ }
+ break;
+
+ // Link module field, http://drupal.org/project/link.
+ case 'link_field':
+ foreach ($entity_field as $language) {
+ foreach ($language as $item) {
+ $options = drupal_parse_url($item['url']);
+ $title = !empty($item['title']) ? $item['title'] : '';
+ $text_items[] = $text_items_by_field[$field['field_name']][] = l($title, $options['path'], $options);
+ $text_items[] = $text_items_by_field[$field['field_name']][] = _linkchecker_check_markup($title, NULL, linkchecker_entity_language($entity_type, $entity), TRUE);
}
- break;
- }
+ }
+ break;
}
}
- return $text_items;
+ return ($return_field_names) ? $text_items_by_field : $text_items;
+}
+
+/**
+ * Replace the old url by a new url on 301 status codes.
+ *
+ * @param string $entity_type
+ * The type of entity; e.g., 'node', 'comment'.
+ * @param string $bundle_name
+ * The name of the bundle aka node type, e.g., 'article', 'page'.
+ * @param object $entity
+ * The entity to parse, a $node or a $comment object.
+ * @param string $old_url
+ * The previous url.
+ * @param string $new_url
+ * The new url to replace the old.
+ */
+function _linkchecker_replace_fields($entity_type, $bundle_name, $entity, $old_url, $new_url) {
+ // Collect the fields from this entity_type and bundle.
+ foreach (field_info_instances($entity_type, $bundle_name) as $field_name => $instance) {
+ $field = field_info_field($field_name);
+ $entity_field =& $entity->$field['field_name'];
+
+ switch ($field['type']) {
+ // Core fields.
+ case 'text_with_summary':
+ foreach ($entity_field as $language_name => $language_value) {
+ foreach ($language_value as $item_name => $item_value) {
+ _linkchecker_link_replace($entity_field[$language_name][$item_name]['value'], $old_url, $new_url);
+ _linkchecker_link_replace($entity_field[$language_name][$item_name]['summary'], $old_url, $new_url);
+ }
+ }
+ break;
+
+ // Core fields.
+ case 'text_long':
+ case 'text':
+ foreach ($entity_field as $language_name => $language_value) {
+ foreach ($language_value as $item_name => $item_value) {
+ _linkchecker_link_replace($entity_field[$language_name][$item_name]['value'], $old_url, $new_url);
+ }
+ }
+ break;
+
+ // Link module field, http://drupal.org/project/link.
+ case 'link_field':
+ foreach ($entity_field as $language_name => $language_value) {
+ foreach ($language_value as $item_name => $item_value) {
+ _linkchecker_link_replace($entity_field[$language_name][$item_name]['url'], $old_url, $new_url);
+ _linkchecker_link_replace($entity_field[$language_name][$item_name]['title'], $old_url, $new_url);
+ }
+ }
+ break;
+ }
+ }
+
+ return $entity;
}
/**
@@ -1236,17 +1700,19 @@ function _linkchecker_cleanup_links() {
* Extract links from content.
*
* @param string $text
- * The text to be scanned for links.
+ * The text to be scanned for links.
* @param string $content_path
- * Path to the content that is currently scanned for links. This value is
- * required to build full qualified links from relative links. Relative links
- * are not extracted from content, if path is not provided.
+ * Path to the content that is currently scanned for links. This value is
+ * required to build full qualified links from relative links. Relative links
+ * are not extracted from content, if path is not provided.
*
* @return array
- * Array of full qualified and unique URLs found in content.
+ * Array whose keys are fully qualified and unique URLs found in the
+ * content, and whose values are arrays of actual text (raw URLs or paths)
+ * corresponding to each fully qualified URL.
*/
function _linkchecker_extract_links($text = '', $content_path = NULL) {
- global $base_root;
+ global $base_root, $is_https;
$html_dom = filter_dom_load($text);
$urls = array();
@@ -1322,7 +1788,7 @@ function _linkchecker_extract_links($text = '', $content_path = NULL) {
foreach ($params as $param) {
// @todo
// - Try to extract links in unkown "flashvars" values
- // (e.g. file=http://, data=http://).
+ // (e.g., file=http://, data=http://).
$names = array('archive', 'filename', 'href', 'movie', 'src', 'url');
if ($param->hasAttribute('name') && in_array($param->getAttribute('name'), $names)) {
$urls[] = $param->getAttribute('value');
@@ -1356,8 +1822,6 @@ function _linkchecker_extract_links($text = '', $content_path = NULL) {
}
}
- // Decode HTML links into plain text links.
- $urls = array_map('decode_entities', $urls);
// Remove empty values.
$urls = array_filter($urls);
// Remove duplicate urls.
@@ -1368,16 +1832,28 @@ function _linkchecker_extract_links($text = '', $content_path = NULL) {
$links = array();
foreach ($urls as $url) {
- // @todo #1149596 HACK - Encode spaces in URLs, so validation equals TRUE
- // and link gets added.
- $url_encoded = str_replace(' ', '%20', $url);
+ // Decode HTML links into plain text links.
+ // DOMDocument->loadHTML does not provide the RAW url from code. All html
+ // entities are already decoded.
+ // @todo: Try to find a way to get the raw value.
+ $url_decoded = $url;
+
+ // Prefix protocol relative urls with a protocol to allow link checking.
+ if (preg_match('!^//!', $url_decoded)) {
+ $http_protocol = $is_https ? 'https' : 'http';
+ $url_decoded = $http_protocol . ':' . $url_decoded;
+ }
+
+ // FIXME: #1149596 HACK - Encode spaces in URLs, so validation equals TRUE and link gets added.
+ $url_encoded = str_replace(' ', '%20', $url_decoded);
// Full qualified URLs.
if ($linkchecker_check_links_types != 2 && valid_url($url_encoded, TRUE)) {
- $links[] = $url;
+ // Add to Array and change HTML links into plain text links.
+ $links[$url_decoded][] = $url;
}
// Skip mailto:, javascript:, etc.
- elseif (preg_match('/^\w[\w.+]*:/', $url)) {
+ elseif (preg_match('/^\w[\w.+]*:/', $url_decoded)) {
continue;
}
// Local URLs. $linkchecker_check_links_types = 0 or 2
@@ -1386,23 +1862,25 @@ function _linkchecker_extract_links($text = '', $content_path = NULL) {
$absolute_content_path = _linkchecker_absolute_content_path($content_path);
// Absolute local URLs need to start with [/].
- if (preg_match('!^/!', $url)) {
- $links[] = $base_root . $url;
+ if (preg_match('!^/!', $url_decoded)) {
+ // Add to Array and change HTML encoded links into plain text links.
+ $links[$base_root . $url_decoded][] = $url;
}
// Anchors and URL parameters like "#foo" and "?foo=bar".
- elseif (!empty($content_path) && preg_match('!^[?#]!', $url)) {
- $links[] = $content_path . $url;
+ elseif (!empty($content_path) && preg_match('!^[?#]!', $url_decoded)) {
+ // Add to Array and change HTML encoded links into plain text links.
+ $links[$content_path . $url_decoded][] = $url;
}
// Relative URLs like "./foo/bar" and "../foo/bar".
- elseif (!empty($absolute_content_path) && preg_match('!^\.{1,2}/!', $url)) {
+ elseif (!empty($absolute_content_path) && preg_match('!^\.{1,2}/!', $url_decoded)) {
// Build the URI without hostname before the URI is normalized and
// dot-segments will be removed. The hostname is added back after the
// normalization has completed to prevent hostname removal by the regex.
// This logic intentionally does not implement all the rules definied in
// RFC 3986, section 5.2.4 to show broken links and over-dot-segmented
- // URIs; e.g. http://example.com/../../foo/bar.
+ // URIs; e.g., http://example.com/../../foo/bar.
// For more information, see http://drupal.org/node/832388.
- $path = substr_replace($absolute_content_path . $url, '', 0, strlen($base_root));
+ $path = substr_replace($absolute_content_path . $url_decoded, '', 0, strlen($base_root));
// Remove './' segments where possible.
$path = str_replace('/./', '/', $path);
@@ -1416,11 +1894,11 @@ function _linkchecker_extract_links($text = '', $content_path = NULL) {
}
// Glue the hostname and path to full-qualified URI.
- $links[] = $base_root . $path;
+ $links[$base_root . $path][] = $url;
}
// Relative URLs like "test.png".
- elseif (!empty($absolute_content_path) && preg_match('!^[^/]!', $url)) {
- $links[] = $absolute_content_path . $url;
+ elseif (!empty($absolute_content_path) && preg_match('!^[^/]!', $url_decoded)) {
+ $links[$absolute_content_path . $url_decoded][] = $url;
}
else {
// @todo Are there more special cases the module need to handle?
@@ -1428,7 +1906,7 @@ function _linkchecker_extract_links($text = '', $content_path = NULL) {
}
}
- return array_unique($links);
+ return $links;
}
/**
@@ -1447,7 +1925,7 @@ function _linkchecker_link_replace(&$text, $old_link_fqdn = '', $new_link_fqdn =
// Remove protocols and hostname from local URLs.
$base_roots = array(
drupal_strtolower('http://' . $_SERVER['HTTP_HOST']),
- drupal_strtolower('https://' . $_SERVER['HTTP_HOST'])
+ drupal_strtolower('https://' . $_SERVER['HTTP_HOST']),
);
$old_link = str_replace($base_roots, '', $old_link_fqdn);
$new_link = str_replace($base_roots, '', $new_link_fqdn);
@@ -1481,7 +1959,7 @@ function _linkchecker_link_replace(&$text, $old_link_fqdn = '', $new_link_fqdn =
// ampersand "&amp;".
$old_links = array_unique($old_links);
- // Load HTML code into DOM
+ // Load HTML code into DOM.
$html_dom = filter_dom_load($text);
// Finds all hyperlinks in the content.
@@ -1586,7 +2064,7 @@ function _linkchecker_link_replace(&$text, $old_link_fqdn = '', $new_link_fqdn =
foreach ($params as $param) {
// @todo
// - Try to replace links in unkown "flashvars" values
- // (e.g. file=http://, data=http://).
+ // (e.g., file=http://, data=http://).
$names = array('archive', 'filename', 'href', 'movie', 'src', 'url');
if ($param->hasAttribute('name') && in_array($param->getAttribute('name'), $names)) {
if (in_array($param->getAttribute('value'), $old_links)) {
@@ -1696,7 +2174,7 @@ function _linkchecker_check_markup($text, $format_id = NULL, $langcode = '', $ca
// Perform filtering.
foreach ($filters as $name => $filter) {
if (!in_array($name, $filters_blacklist)) {
- if ($filter->status && isset($filter_info[$name]['process callback']) && function_exists($filter_info[$name]['process callback'])) {
+ if ($filter->status && isset($filter_info[$name]['process callback']) && function_exists($filter_info[$name]['process callback'])) {
$function = $filter_info[$name]['process callback'];
$text = $function($text, $filter, $format, $langcode, $cache, $cache_id);
}
@@ -1860,10 +2338,13 @@ function _linkchecker_scan_nodetype($node_type = NULL) {
* A link ID that have reached a defined failcount.
*/
function _linkchecker_unpublish_nodes($lid) {
- $nids = db_query('SELECT nid FROM {linkchecker_node} WHERE lid = :lid', array(':lid' => $lid))->fetchCol();
-
- $nodes = node_load_multiple($nids);
- foreach ($nodes as $node) {
+ $result = db_query('SELECT nid FROM {linkchecker_node} WHERE lid = :lid', array(':lid' => $lid));
+ foreach ($result as $row) {
+ // Explicitly don't use node_load_multiple() or the module may run
+ // into issues like http://drupal.org/node/1210606. With this logic
+ // nodes can be updated until an out of memory occurs and further
+ // updates will be made on the remaining nodes only.
+ $node = node_load($row->nid);
$node->status = NODE_NOT_PUBLISHED;
node_save($node);
watchdog('linkchecker', 'Set @type %title to unpublished.', array('@type' => $node->type, '%title' => $node->title));
@@ -1876,3 +2357,145 @@ function _linkchecker_unpublish_nodes($lid) {
function linkchecker_link_load($lid) {
return db_query('SELECT * FROM {linkchecker_link} WHERE lid = :lid', array(':lid' => $lid))->fetchObject();
}
+
+/**
+ * Impersonates another user, see http://drupal.org/node/287292#comment-3162350.
+ *
+ * Each time this function is called, the active user is saved and $new_user
+ * becomes the active user. Multiple calls to this function can be nested,
+ * and session saving will be disabled until all impersonation attempts have
+ * been reverted using linkchecker_revert_user().
+ *
+ * @param int|object $new_user
+ * User to impersonate, either a UID or a user object.
+ *
+ * @return object
+ * Current user object.
+ *
+ * @see linkchecker_revert_user()
+ */
+function linkchecker_impersonate_user($new_user = NULL) {
+ global $user;
+ $user_original = &drupal_static(__FUNCTION__);
+
+ if (!isset($new_user)) {
+ if (isset($user_original) && !empty($user_original)) {
+ // Restore the previous user from the stack.
+ $user = array_pop($user_original);
+
+ // Re-enable session saving if we are no longer impersonating a user.
+ if (empty($user_original)) {
+ drupal_save_session(TRUE);
+ }
+ }
+ }
+ else {
+ // Push the original user onto the stack and prevent session saving.
+ $user_original[] = $user;
+ drupal_save_session(FALSE);
+
+ if (is_numeric($new_user)) {
+ $user = user_load($new_user);
+ }
+ else {
+ $user = is_object($new_user) ? $new_user : (object) $new_user;
+ }
+ }
+
+ return $user;
+}
+
+/**
+ * Reverts to the previous user after impersonating.
+ *
+ * @return object
+ * Current user.
+ *
+ * @see linkchecker_impersonate_user()
+ */
+function linkchecker_revert_user() {
+ return linkchecker_impersonate_user();
+}
+
+/**
+ * Checks if this entity is the default revision (published).
+ *
+ * @param object $entity
+ * The entity object, e.g., $node.
+ *
+ * @return bool
+ * TRUE if the entity is the default revision, FALSE otherwise.
+ */
+function _linkchecker_isdefaultrevision($entity) {
+ // D7 "Forward revisioning" is complex and causes a node_save() with the
+ // future node in node table. This fires hook_node_update() twice and cause
+ // abnormal behaviour in linkchecker.
+ //
+ // The steps taken by Workbench Moderation is to save the forward revision
+ // first and overwrite this with the live version in a shutdown function in
+ // a second step. This will confuse linkchecker. D7 has no generic property
+ // in the node object, if the node that is updated is the 'published' version
+ // or only a draft of a future version.
+ //
+ // This behaviour will change in D8 where $node->isDefaultRevision has been
+ // introduced. See below links for more details.
+ // - http://drupal.org/node/1879482
+ // - http://drupal.org/node/218755
+ // - http://drupal.org/node/1522154
+ //
+ // Every moderation module saving a forward revision needs to return FALSE.
+ // @todo: Refactor this workaround under D8.
+
+ // Workbench Moderation module.
+ if (module_exists('workbench_moderation') && workbench_moderation_node_type_moderated($entity->type) === TRUE && empty($entity->workbench_moderation['updating_live_revision'])) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/**
+ * Returns the language code of the given entity.
+ *
+ * Backward compatibility layer to ensure that installations running an older
+ * version of core where entity_language() is not avilable do not break.
+ *
+ * @param string $entity_type
+ * An entity type.
+ * @param object $entity
+ * An entity object.
+ *
+ * @return string
+ * The entity language code.
+ */
+function linkchecker_entity_language($entity_type, $entity) {
+ $langcode = NULL;
+ if (function_exists('entity_language')) {
+ $langcode = entity_language($entity_type, $entity);
+ }
+ elseif (!empty($entity->language)) {
+ $langcode = $entity->language;
+ }
+ return $langcode;
+}
+
+/**
+ * Return all the values of one-dimensional and multidimensional arrays.
+ *
+ * @return array
+ * Returns all the values from the input array and indexes the array numerically.
+ */
+function _linkchecker_array_values_recursive(array $array) {
+ $array_values = array();
+
+ foreach ($array as $value) {
+ if (is_array($value)) {
+ $array_values = array_merge($array_values, _linkchecker_array_values_recursive($value));
+ }
+ else {
+ $array_values[] = $value;
+ }
+ }
+
+ return $array_values;
+}