feedapi_imagegrabber.module

Tracking 6.x-1.x branch
  1. drupal
    1. 6 contributions/feedapi_imagegrabber/feedapi_imagegrabber.module

Grabs the primary image from the feed-item and stores into a CCK imagefield.

Functions & methods

NameDescription
feedapi_imagegrabber_create_feedCreates a feed object from a node => url array.
feedapi_imagegrabber_create_filenamecreates a unique filename for the file, Look at [#554980] for more
feedapi_imagegrabber_cronImplementation of hook_cron().
feedapi_imagegrabber_cron_timeCheck for time limits in feed processing.
feedapi_imagegrabber_download_confirmconfirms the downloading of the images from where it was paused
feedapi_imagegrabber_download_confirm_submitsubmit for the grab images form.
feedapi_imagegrabber_download_imagespage callback for hook_menu().
feedapi_imagegrabber_download_image_feeling_luckySelects the first image between the user defined tag.
feedapi_imagegrabber_download_image_heuristicsHeuristics for selecting the right image from the original feed item of the URL when $feeling_lucky is FALSE
feedapi_imagegrabber_feedapi_after_refreshImplementation of hook_feedapi_after_refresh().
feedapi_imagegrabber_form_alterImplementation of hook_form_alter().
feedapi_imagegrabber_form_node_validatevalidates the form altered for the feedapi enabled content type node.
feedapi_imagegrabber_get_imageDownloads an image from the given URL and stores it into the given filename.
feedapi_imagegrabber_get_settingsreturns the ImageGrabber settings associated with a feed
feedapi_imagegrabber_helpImplementation of hook_help().
feedapi_imagegrabber_image_fieldreturns the image field of the feed-item content type in which ImageGrabber stores the image for the feed-item.
feedapi_imagegrabber_image_uploadvalidates and uploads the image file into the cck image field
feedapi_imagegrabber_is_enabledchecks whether the ImageGrabber is enabled for this node or not.
feedapi_imagegrabber_is_imageChecks whether is file is a valid drupal recognized image or not.
feedapi_imagegrabber_menuImplementation of hook_menu().
feedapi_imagegrabber_nodeapi
feedapi_imagegrabber_permImplementation of hook_perm().
feedapi_imagegrabber_remote_image_existsChecks if a remote file exists and is below some specific size (1 MB)
feedapi_imagegrabber_search_images_to_selectBrowse through all the images between the user defined tag to select the image.
feedapi_imagegrabber_select_imageProcesses the feed options for ImageGrabber and call the appropriate helper function to download the image for the node.
_feedapi_imagegrabber_op_accesshelper access function for hook_menu().

Constants

NameDescription
FEEDAPI_IMAGEGRABBER_CRON_FEEDS

File

View source
  1. <?php
  2. //(c) Srijan Technologies Pvt. Ltd. 2009
  3. /**
  4. * @file
  5. * Grabs the primary image from the feed-item and
  6. * stores into a CCK imagefield.
  7. */
  8. // Number of feeds to process for each step in cron.
  9. define('FEEDAPI_IMAGEGRABBER_CRON_FEEDS', 20);
  10. /**
  11. * Implementation of hook_help().
  12. */
  13. function feedapi_imagegrabber_help($path, $arg) {
  14. switch ($path) {
  15. case 'admin/help#feedapi_imagegrabber':
  16. $output = '<p>'. t('Provides a parser for <a href="@feedapi">FeedAPI</a>, which visits the original posts of a feed and downloads the main image from the post.', array('@feedapi' => url('admin/help/feedapi'))) .'</p>';
  17. $output .= '<p>'. t('FeedAPI ImageGrabber mimics the thumbnail display of Google Reader on your Drupal website.') .'</p>';
  18. $output .= '<p>'. t('After downloading the main image from the post, it is stored with the node created by FeedAPI Node, inside an Imagefield. You can add new imagefields to a content type by clicking <i>manage fields</i> on the <a href="@content-types">content types</a> page. You can also use ImageCache and ImageAPI to add presets to the imagefields', array('@content-types' => url('admin/content/types'))) .'</p>';
  19. $output .= '<p>'. t('Usually ImageGrabber will search the whole of the web-page for an appropriate image, but you can restrict its search by specifying the id/class of the tag between which, you think, is the appropriate image. Please look <a href="@id-class">here</a> for a tutorial on id/class', array('@id-class' => url('http://www.w3.org/TR/html401/struct/global.html#h-7.5.2'))) .'</p>';
  20. $output .= '<p>'. t('I have prepared a turorial for FeedAPI ImageGrabber. Please look <a href="@tutorial">here</a> for help. Please open issues in the support forums if you face any issues.', array('@tutorial' => url('http://publicmind.in/blog/tutorial-for-feedapi-imagegrabber'))) .'</p>';
  21. return $output;
  22. }
  23. }
  24. /**
  25. * Implementation of hook_menu().
  26. */
  27. function feedapi_imagegrabber_menu() {
  28. $items = array();
  29. $items['node/%node/grabber'] = array(
  30. 'title' => 'Grab Images',
  31. 'page callback' => 'feedapi_imagegrabber_download_images',
  32. 'page arguments' => array(1),
  33. 'type' => MENU_LOCAL_TASK,
  34. 'access callback' => '_feedapi_imagegrabber_op_access',
  35. 'access arguments' => array(1),
  36. );
  37. return $items;
  38. }
  39. /**
  40. * helper access function for hook_menu().
  41. */
  42. function _feedapi_imagegrabber_op_access($node) {
  43. $enabled = feedapi_imagegrabber_is_enabled($node->nid);
  44. if (!isset($enabled) || $enabled == 0 || $enabled == '0') {
  45. return FALSE;
  46. }
  47. global $user;
  48. $own_feed = $node->uid == $user->uid && user_access('edit own '. $node->type .' content') ? TRUE : FALSE;
  49. return user_access('administer imagegrabber') || $own_feed;
  50. }
  51. /**
  52. * Implementation of hook_perm().
  53. */
  54. function feedapi_imagegrabber_perm() {
  55. return array('administer imagegrabber', 'enable imagegrabber');
  56. }
  57. /**
  58. * validates and uploads the image file into the cck image field
  59. *
  60. * @param $filename path to the image to be uploaded
  61. * @param $field cck image field instance to insert into which image is
  62. * uploaded (check before passing that this field exists)
  63. *
  64. * @return
  65. * fid of the image if the image is uploaded successfuly,
  66. * FALSE if any error occurs.
  67. */
  68. function feedapi_imagegrabber_image_upload($filename, $field, $replace = FILE_EXISTS_RENAME) {
  69. if (empty($filename)) {
  70. return 0;
  71. }
  72. $field_instance = content_fields($field);
  73. $dest = filefield_widget_file_path($field_instance);
  74. if (!field_file_check_directory($dest, FILE_CREATE_DIRECTORY)) {
  75. drupal_set_message(t('ImageGrabber: %dir directory is not writable, Please check the permissions', array('%dir' => $dest)), 'error', FALSE);
  76. return 0;
  77. }
  78. global $user;
  79. $validators1 = imagefield_widget_upload_validators($field_instance);
  80. $validators2 = filefield_widget_upload_validators($field_instance);
  81. $validators = array_merge($validators1, $validators2);
  82. // Add in our check of the the file name length.
  83. $validators['file_validate_name_length'] = array();
  84. $image_extensions = 'jpg jpeg gif png';
  85. // Begin building file object.
  86. $file = new stdClass();
  87. $file->filename = file_munge_filename(trim(basename($filename)), $image_extensions);
  88. $file->filepath = $filename;
  89. $file->filemime = file_get_mimetype($file->filename);
  90. // Rename potentially executable files, to help prevent exploits.
  91. if (preg_match('/\.(php|pl|py|cgi|asp|js)$/i', $file->filename) && (substr($file->filename, -4) != '.txt')) {
  92. return 0;
  93. }
  94. $file->source = $field ."_imagegrabber";
  95. $destination_path = file_destination(file_create_path($dest .'/'. $file->filename), $replace);
  96. $path_original = $filename;
  97. if (file_copy($filename, $destination_path, FILE_EXISTS_REPLACE)) {
  98. if (!($path_original == $destination_path || file_delete($path_original))) {
  99. drupal_set_message(t('ImageGrabber: Please remove the file %file, ImageGrabber was unable to do so.', array('%file' => $path_original)), 'warning');
  100. }
  101. }
  102. else {
  103. return 0;
  104. }
  105. $filename = $destination_path;
  106. $file->filename = file_munge_filename(trim(basename($filename)), $image_extensions);
  107. $file->destination = $filename;
  108. $file->filepath = $filename;
  109. chmod($filename, 0744);
  110. if (!($filesize = filesize($filename))) {
  111. drupal_set_message(t('ImageGrabber: Filesize for some files couldn\'t be calculated and aren\'t saved.'), 'error', FALSE);
  112. file_delete($filename);
  113. return 0;
  114. }
  115. $file->filesize = $filesize;
  116. // Call the validation functions.
  117. $errors = array();
  118. foreach ($validators as $function => $args) {
  119. array_unshift($args, $file);
  120. $errors = array_merge($errors, call_user_func_array($function, $args));
  121. }
  122. // Check for validation errors.
  123. if (!empty($errors)) {
  124. $message = t('ImageGrabber: The selected file %name could not be uploaded.', array('%name' => $file->filename));
  125. if (count($errors) > 1) {
  126. $message .= '<ul><li>'. implode('</li><li>', $errors) .'</li></ul>';
  127. }
  128. else {
  129. $message .= ' '. array_pop($errors);
  130. }
  131. drupal_set_message($message, 'error', FALSE);
  132. file_delete($filename);
  133. return 0;
  134. }
  135. $file->uid = $user->uid;
  136. $file->status = FILE_STATUS_TEMPORARY;
  137. $file->timestamp = time();
  138. drupal_write_record('files', $file);
  139. // Let modules add additional properties to the yet barebone file object.
  140. foreach (module_implements('file_insert') as $module) {
  141. $function = $module .'_file_insert';
  142. $function($file);
  143. }
  144. return $file;
  145. }
  146. /**
  147. * Implementation of hook_form_alter().
  148. */
  149. function feedapi_imagegrabber_form_alter(&$form, $form_state, $form_id) {
  150. if (user_access('enable imagegrabber')) {
  151. if (isset($form['type']) && isset($form['#node']) && $form['type']['#value'] .'_node_form' == $form_id && feedapi_enabled_type($form['type']['#value'])) {
  152. // Don't blow away existing form elements.
  153. if (!isset($form['imagegrabber'])) {
  154. $form['imagegrabber'] = array();
  155. }
  156. $form['imagegrabber'] += array(
  157. '#type' => 'fieldset',
  158. '#title' => t('ImageGrabber'),
  159. '#collapsible' => TRUE,
  160. '#collapsed' => FALSE,
  161. '#tree' => TRUE,
  162. );
  163. $node = $form['#node'];
  164. if (isset($node->nid)) {
  165. $feedapi_settings = feedapi_get_settings($node->type, $node->vid);
  166. if (isset($feedapi_settings['processors']['feedapi_node'])) {
  167. $is_feedapi_node_enabled = TRUE;
  168. }
  169. else $is_feedapi_node_enabled = FALSE;
  170. }
  171. else {
  172. $feedapi_settings = feedapi_get_settings($node->type);
  173. if (isset($feedapi_settings['processors']['feedapi_node']['enabled'])) {
  174. $is_feedapi_node_enabled = $feedapi_settings['processors']['feedapi_node']['enabled'];
  175. }
  176. else $is_feedapi_node_enabled = FALSE;
  177. }
  178. if (!$is_feedapi_node_enabled) {
  179. $form['imagegrabber']['value'] = array(
  180. '#type' => 'value',
  181. '#value' => $is_feedapi_node_enabled,
  182. );
  183. $form['imagegrabber']['message'] = array(
  184. '#title' => t('Enable the feedapi_node processor for this feed to use Imagegrabber'),
  185. '#type' => 'item',
  186. '#description' => t('ImageGrabber settings have been removed, because it needs the feedapi_node processor. Enable the feedapi_node processor for this feed to use ImageGrabber.'),
  187. );
  188. return;
  189. }
  190. if (!isset($node->nid) || feedapi_imagegrabber_get_settings($node->nid) == FALSE) {
  191. $enabled = 0;
  192. $image_field = '0';
  193. $id_class = 0;
  194. $id_class_description = '';
  195. $feeling_lucky = 0;
  196. }
  197. else {
  198. $imagegrabber_settings = feedapi_imagegrabber_get_settings($node->nid);
  199. $enabled = $imagegrabber_settings['enabled'];
  200. $image_field = $imagegrabber_settings['image_field'];
  201. $id_class = $imagegrabber_settings['id_class'];
  202. $id_class_description = $imagegrabber_settings['id_class_description'];
  203. $feeling_lucky = $imagegrabber_settings['feeling_lucky'];
  204. }
  205. $content_type_processor = $feedapi_settings['processors']['feedapi_node']['content_type'];
  206. $options = array(t('Select an Image field'),
  207. );
  208. if (isset($content_type_processor) && !empty($content_type_processor)) {
  209. $type = content_types($content_type_processor);
  210. $fields = $type['fields'];
  211. foreach ($fields as $type => $args) {
  212. if ($args['widget']['type'] == 'imagefield_widget') {
  213. $imagefield = $args['field_name'];
  214. $options[$imagefield] = $args['field_name'];
  215. }
  216. }
  217. if (count($options) == 1) {
  218. $form['imagegrabber']['value'] = array(
  219. '#type' => 'value',
  220. '#value' => 0,
  221. );
  222. $form['imagegrabber']['message'] = array(
  223. '#title' => t('There are no imagefields associated with <i>@node</i> content type.', array('@node' => $content_type_processor)),
  224. '#type' => 'item',
  225. '#description' => t('ImageGrabber stores the image in an image field of the feed item node. The content type for the feed-item of this feed is <i>@node</i>. Add Imagefields to <i>@node</i> content type in order to enable ImageGrabber.', array('@node' => $content_type_processor)),
  226. );
  227. return;
  228. }
  229. }
  230. else {
  231. $form['imagegrabber']['value'] = array(
  232. '#type' => 'value',
  233. '#value' => 0,
  234. );
  235. $form['imagegrabber']['message'] = array(
  236. '#title' => t('ImageGraber could not find the associated content type for the feed-item. Please try re-loading the page, otherwise report on the support forums.'),
  237. '#type' => 'item',
  238. '#description' => t('This is an un-expected error. It might be due to change in APIs of other dependent modules. Please report the issue at the earliest.'),
  239. );
  240. return;
  241. }
  242. $form['imagegrabber']['content_type_node'] = array(
  243. '#type' => 'value',
  244. '#value' => $content_type_processor,
  245. );
  246. $form['imagegrabber']['enabled'] = array(
  247. '#type' => 'checkbox',
  248. '#title' => t('Enable ImageGrabber for this feed'),
  249. '#description' => t('Check if you want to download the images of feed items to your site.'),
  250. '#default_value' => $enabled,
  251. '#weight' => -17,
  252. );
  253. if ($enabled == 1 || $enabled == '1') {
  254. $default = $image_field;
  255. }
  256. else {
  257. $default = $options[0];
  258. }
  259. $form['imagegrabber']['image_field'] = array(
  260. '#type' => 'select',
  261. '#title' => t('Store the image in: (Select an Imagefield)'),
  262. '#description' => t('ImageGrabber stores the image into this image field of the feed item. The content type for the feed-item of this feed is <i>@node</i>', array('@node' => $content_type_processor)),
  263. '#options' => $options,
  264. '#default_value' => $default,
  265. '#weight' => -16,
  266. );
  267. $form['imagegrabber']['id_class'] = array(
  268. '#type' => 'radios',
  269. '#title' => t('Search for an image between the tag which is identified by '),
  270. '#options' => array(t('None, search the whole web-page for the image.'),
  271. t('an ID'),
  272. t('a Class'),
  273. ),
  274. '#description' => t('Select <i>None</i> if you are not sure what this means. For help, click <a href="@feedapi-imagegrabber">here</a>. Enter the <i>ID</i> or <i>CLASS</i> of the tag in the textfield below. ', array('@feedapi-imagegrabber' => url('http://publicmind.in/blog/tutorial-for-feedapi-imagegrabber'))),
  275. '#default_value' => $id_class,
  276. '#weight' => -15,
  277. );
  278. $form['imagegrabber']['id_class_description'] = array(
  279. '#type' => 'textfield',
  280. '#title' => t('<i>ID</i> or <i>CLASS</i> of the HTML tag (Leave empty if you selected <i>None</i> above.)'),
  281. '#description' => t('Enter the <i>ID</i> or <i>CLASS</i> of the html element between which you want ImageGrabber to search for the image. In case of <i>CLASS</i>, the first tag matching the specified <i>CLASS</i> will be selected. Note that both <i>ID</i> and <i>CLASS</i> are CASE-SENSITIVE. For help, click <a href="@feedapi-imagegrabber">here</a>.', array('@feedapi-imagegrabber' => url('http://publicmind.in/blog/tutorial-for-feedapi-imagegrabber'))),
  282. '#default_value' => $id_class_description,
  283. '#maxlength' => 100,
  284. '#weight' => -14,
  285. );
  286. $form['imagegrabber']['feeling_lucky'] = array(
  287. '#type' => 'radios',
  288. '#title' => t('Feeling Lucky, huh?'),
  289. '#options' => array(t('No, Select the largest image between the tag.'),
  290. t('Yes, Select the first image between the tag. (Recommended)'),
  291. ),
  292. '#default_value' => $feeling_lucky,
  293. '#weight' => -13,
  294. );
  295. //set the value to 1 before returning the form, because everything went ok.
  296. $form['imagegrabber']['value'] = array(
  297. '#type' => 'value',
  298. '#value' => 1,
  299. );
  300. $form['#validate'][] = 'feedapi_imagegrabber_form_node_validate';
  301. }
  302. }
  303. }
  304. /**
  305. * validates the form altered for the feedapi enabled content type node.
  306. */
  307. function feedapi_imagegrabber_form_node_validate($form, &$form_state) {
  308. $value = $form_state['values']['imagegrabber']['value'];
  309. if ($value == 1) {
  310. $enabled = $form['imagegrabber']['enabled']['#value'];
  311. $image_field = $form['imagegrabber']['image_field']['#value'];
  312. if ($enabled == 1) {
  313. if ($image_field == '0') {
  314. form_set_error("imagegrabber']['image_field", "ImageGrabber is enabled but no image field is selected");
  315. return;
  316. }
  317. $id_class = $form['imagegrabber']['id_class']['#value'];
  318. $id_class_desc = $form['imagegrabber']['id_class_description']['#value'];
  319. if ($id_class != 0) {
  320. if (!isset($id_class_desc) || empty($id_class_desc) || $id_class_desc == '') {
  321. form_set_error("imagegrabber']['id_class_description", "ImageGrabber: ID or Class field can not be empty.");
  322. return;
  323. }
  324. if (!preg_match('/^[a-zA-Z]+[_a-zA-Z0-9-]*$/', $id_class_desc)) {
  325. form_set_error("imagegrabber']['id_class_description", "ImageGrabber: ID/CLASS string is not valid. Only alphabets, digits, hyphens and underscores are allowed.");
  326. return;
  327. }
  328. }
  329. if (!isset($form_state['values']['feedapi']['processors']['feedapi_node'])) {
  330. form_set_value($form['imagegrabber']['enabled'], 0, $form_state);
  331. drupal_set_message(t('ImageGrabber has been disabled for this feed as it depends on feedapi_node processor and it was unfortunately disabled.'), 'warning');
  332. return;
  333. }
  334. $content_type_node_new = $form['feedapi']['processors']['feedapi_node']['content_type']['#value'];
  335. $content_type_node_old = $form['imagegrabber']['content_type_node']['#value'];
  336. if (strcmp($content_type_node_new, $content_type_node_old) != 0) {
  337. form_set_value($form['imagegrabber']['enabled'], 0, $form_state);
  338. drupal_set_message(t('ImageGrabber has been disabled for this feed becuase you changed the content type of the feed items. Please edit the feed again to enable ImageGrabber'), 'warning');
  339. return;
  340. }
  341. }
  342. }
  343. }
  344. /*
  345. * Implementation of hook_nodeapi().
  346. */
  347. function feedapi_imagegrabber_nodeapi(&$node, $op, $teaser, $page) {
  348. if (feedapi_enabled_type($node->type)) {
  349. switch ($op) {
  350. case 'insert':
  351. case 'update':
  352. $value = $node->imagegrabber['value'];
  353. $nid = $node->nid;
  354. if ($value && isset($nid)) {
  355. $enabled = $node->imagegrabber['enabled'];
  356. if ($enabled == 1) {
  357. $image_field = $node->imagegrabber['image_field'];
  358. $id_class = $node->imagegrabber['id_class'];
  359. if ($id_class != 0) {
  360. $id_class_desc = $node->imagegrabber['id_class_description'];
  361. }
  362. else {
  363. $id_class_desc = '';
  364. }
  365. $feeling_lucky = $node->imagegrabber['feeling_lucky'];
  366. db_query("UPDATE {feedapi_imagegrabber} SET enabled = %d , image_field = '%s' , id_class = %d , id_class_description = '%s' , feeling_lucky = %d WHERE nid = %d", $enabled, $image_field, $id_class, $id_class_desc, $feeling_lucky, $nid);
  367. if (!db_affected_rows()) {
  368. @db_query("INSERT INTO {feedapi_imagegrabber} (nid, enabled, image_field, id_class, id_class_description, feeling_lucky) VALUES (%d, %d, '%s', %d, '%s', %d)", $nid, $enabled, $image_field, $id_class, $id_class_desc, $feeling_lucky);
  369. }
  370. }
  371. else {
  372. db_query("UPDATE {feedapi_imagegrabber} SET enabled = %d WHERE nid = %d", $enabled, $nid);
  373. }
  374. }
  375. break;
  376. case 'delete':
  377. $nid = $node->nid;
  378. @db_query("DELETE FROM {feedapi_imagegrabber} where nid = %d", $nid);
  379. @db_query("DELETE FROM {feedapi_imagegrabber_cron_nodes} where feed_nid = %d", $nid);
  380. break;
  381. }
  382. }
  383. }
  384. /**
  385. * Implementation of hook_feedapi_after_refresh().
  386. *
  387. * Called after feed is refreshed. It downloads the images
  388. * and stores it into the cck image field.
  389. *
  390. * @param $feed the feed object which contains all the feed items.
  391. * $is_cron if it is called by us or feedapi
  392. */
  393. function feedapi_imagegrabber_feedapi_after_refresh($feed, $is_cron = FALSE) {
  394. if (!user_access('administer imagegrabber')) {
  395. return;
  396. }
  397. $feed = (array)$feed;
  398. if (!isset($feed['nid'])) {
  399. return;
  400. }
  401. $feed_nid = $feed['nid'];
  402. $settings = feedapi_imagegrabber_get_settings($feed_nid);
  403. if ($settings == FALSE) {
  404. return;
  405. }
  406. if (!function_exists('curl_exec')) {
  407. drupal_set_message(t('ImageGrabber: The cURL Library for PHP is missing or outdated. Go to <a href="!admin-reports-status">Status Report page</a>', array('!admin-reports-status' => url('admin/reports/status'))), 'error');
  408. return;
  409. }
  410. $path = drupal_get_path('module', 'feedapi_imagegrabber') .'/url_to_absolute.php';
  411. if (!file_exists($path)) {
  412. drupal_set_message(t('ImageGrabber: The url conversion script is missing. Go to <a href="!admin-reports-status">Status Report page</a>', array('!admin-reports-status' => url('admin/reports/status'))), 'error');
  413. return;
  414. }
  415. require_once($path);
  416. $enabled = $settings['enabled'];
  417. $image_field = $settings['image_field'];
  418. $id_class = $settings['id_class'];
  419. $id_class_description = $settings['id_class_description'];
  420. $feeling_lucky = $settings['feeling_lucky'];
  421. if (!isset($enabled) || $enabled == 0 || $enabled == '0') {
  422. return;
  423. }
  424. if ($enabled == 1 && (!isset($image_field) || empty($image_field) || $image_field == '0')) {
  425. return;
  426. }
  427. if (($id_class == 1 || $id_class == 2) && (!isset($id_class_description) || empty($id_class_description) || $id_class_description == '')) {
  428. return;
  429. }
  430. $cron_items = array();
  431. $cron_empty = TRUE;
  432. foreach ($feed['items'] as $item => $argument) {
  433. if (isset($argument->nid)) {
  434. $nid = $argument->nid;
  435. }
  436. else {
  437. $original_url = $argument->options->original_url;
  438. if (!isset($original_url) || empty($original_url)) {
  439. continue;
  440. }
  441. $nid = db_fetch_object(db_query("SELECT nid FROM {feedapi_node_item} WHERE url = '%s'", $original_url));
  442. if (!isset($nid) || empty($nid)) {
  443. continue;
  444. }
  445. }
  446. if (!$cron_empty || !feedapi_imagegrabber_cron_time()) {
  447. $cron_items[$nid] = $argument->options->original_url;
  448. $cron_empty = FALSE;
  449. continue;
  450. }
  451. $original_url = $argument->options->original_url;
  452. if (!($node = node_load($nid))) {
  453. continue;
  454. }
  455. $type = content_types($node->type);
  456. $fields = $type['fields'];
  457. if (!(array_key_exists($image_field, $fields))) {
  458. continue;
  459. }
  460. $filename = feedapi_imagegrabber_select_image($original_url, $id_class, $id_class_description, $feeling_lucky);
  461. if (!$filename) {
  462. continue;
  463. }
  464. if (!($file = feedapi_imagegrabber_image_upload($filename, $image_field))) {
  465. continue;
  466. }
  467. if (isset($file->fid)) {
  468. $node->$image_field = array(
  469. array(
  470. 'fid' => $file->fid,
  471. 'filename' => $file->filename,
  472. 'filepath' => $file->filepath,
  473. 'status' => $file->status,
  474. 'filesize' => $file->filesize,
  475. 'list' => 1,
  476. ),
  477. );
  478. //a little hack to make filefield_paths module <= v1.3 process this node
  479. if (module_exists('filefield_paths')) {
  480. $node->form_id = $node->type .'_node_form';
  481. }
  482. node_save($node);
  483. }
  484. }
  485. if ($is_cron) {
  486. @db_query("DELETE FROM {feedapi_imagegrabber_cron_nodes} where feed_nid = %d", $feed_nid);
  487. }
  488. if (!$cron_empty) {
  489. $data = db_fetch_array(db_query("SELECT feed_nid, nid FROM {feedapi_imagegrabber_cron_nodes} WHERE feed_nid = %d", $feed_nid));
  490. if ($data != FALSE) {
  491. $data_nid = unserialize($data['nid']);
  492. foreach ($cron_items as $node => $url) {
  493. $data_nid[$node] = $url;
  494. }
  495. @db_query("UPDATE {feedapi_imagegrabber_cron_nodes} SET nid = '%s' WHERE feed_nid = %d", serialize($data_nid), $feed_nid);
  496. }
  497. else {
  498. @db_query("INSERT INTO {feedapi_imagegrabber_cron_nodes} (feed_nid, nid) VALUES (%d, '%s')", $feed_nid, serialize($cron_items));
  499. }
  500. drupal_set_message(t('ImageGrabber: PHP execution time limit for system is !limit seconds, due to which images for some feed items couldn\'t be downloaded. Please click on \'Grab Images\' to refresh those feed-items.', array('!limit' => ini_get('max_execution_time'))), 'warning');
  501. }
  502. else {
  503. drupal_set_message(t('ImageGrabber completed successfully!!'));
  504. }
  505. }
  506. /**
  507. * Implementation of hook_cron().
  508. */
  509. function feedapi_imagegrabber_cron() {
  510. $process = 0;
  511. while (!$process && feedapi_imagegrabber_cron_time()) {
  512. $process = FEEDAPI_IMAGEGRABBER_CRON_FEEDS;
  513. $result = db_query_range("SELECT feed_nid, nid FROM {feedapi_imagegrabber_cron_nodes}", FEEDAPI_CRON_FEEDS);
  514. while (feedapi_imagegrabber_cron_time() && $data = db_fetch_array($result)) {
  515. $feed_items = unserialize($data['nid']);
  516. $feed_nid = $data['feed_nid'];
  517. $feed = feedapi_imagegrabber_create_feed($feed_nid, $feed_items);
  518. feedapi_imagegrabber_feedapi_after_refresh($feed, TRUE);
  519. $process--;
  520. }
  521. }
  522. }
  523. /**
  524. * Check for time limits in feed processing.
  525. *
  526. * @return boolean FALSE (time exceeded)
  527. */
  528. function feedapi_imagegrabber_cron_time() {
  529. $max_exec_time = ini_get('max_execution_time') == 0 ? 120 : ini_get('max_execution_time') * 2 / 3;
  530. if ((timer_read('page') / 1000) > $max_exec_time) {
  531. return FALSE;
  532. }
  533. else return TRUE;
  534. }
  535. /**
  536. * checks whether the ImageGrabber is enabled for this node or not.
  537. *
  538. * @param $nid node to detect
  539. *
  540. * @return
  541. * TRUE if ImageGrabber enabled else FALSE
  542. */
  543. function feedapi_imagegrabber_is_enabled($nid) {
  544. $enabled = db_result(db_query("SELECT enabled FROM {feedapi_imagegrabber} WHERE nid = %d", $nid));
  545. return $enabled;
  546. }
  547. /**
  548. * returns the image field of the feed-item content type in which
  549. * ImageGrabber stores the image for the feed-item.
  550. *
  551. * @param $nid node to select
  552. *
  553. * @return
  554. * the image field or else FALSE
  555. */
  556. function feedapi_imagegrabber_image_field($nid) {
  557. $image_field = db_result(db_query("SELECT image_field FROM {feedapi_imagegrabber} WHERE nid = %d", $nid));
  558. return $image_field;
  559. }
  560. /**
  561. * returns the ImageGrabber settings associated with a feed
  562. *
  563. * @param $nid node to select
  564. *
  565. * @return
  566. * the settings array or else FALSE
  567. */
  568. function feedapi_imagegrabber_get_settings($nid) {
  569. $settings = db_fetch_array(db_query("SELECT enabled, image_field, refresh_on_update, id_class, id_class_description, feeling_lucky FROM {feedapi_imagegrabber} WHERE nid = %d", $nid));
  570. return $settings;
  571. }
  572. /**
  573. * Processes the feed options for ImageGrabber and call the appropriate helper
  574. * function to download the image for the node.
  575. *
  576. * @param string $original_url url of the feed-item
  577. * @param integer $id_class whether tag identified by id or class or none
  578. * @param string $id_class_description the id or class of the tag
  579. * @param integer $feeling_lucky
  580. *
  581. * @return selected file on success, FALSE on failure
  582. */
  583. function feedapi_imagegrabber_select_image($original_url, $id_class, $id_class_description, $feeling_lucky) {
  584. $filename = '';
  585. $options = array(
  586. // return web page
  587. CURLOPT_RETURNTRANSFER => TRUE,
  588. // fail on error
  589. CURLOPT_FAILONERROR => TRUE,
  590. // don't return headers
  591. CURLOPT_HEADER => FALSE,
  592. // follow redirects
  593. CURLOPT_FOLLOWLOCATION => TRUE,
  594. // handle all encodings
  595. CURLOPT_ENCODING => "",
  596. // who am i
  597. CURLOPT_USERAGENT => "feedapi_imagegrabber",
  598. // set referer on redirect
  599. CURLOPT_AUTOREFERER => TRUE,
  600. // timeout on connect
  601. CURLOPT_CONNECTTIMEOUT => 5,
  602. // timeout on response
  603. CURLOPT_TIMEOUT => 5,
  604. // stop after 10 redirects
  605. CURLOPT_MAXREDIRS => 2,
  606. );
  607. $ch = curl_init($original_url);
  608. curl_setopt_array($ch, $options);
  609. $file = curl_exec($ch);
  610. curl_close($ch);
  611. if ($file === FALSE) {
  612. return FALSE;
  613. }
  614. $doc = new DOMDocument();
  615. @$doc->loadHTML($file);
  616. if ($id_class == 0) {
  617. $items = $doc->getElementsByTagName("body");
  618. if ($items != NULL && $items->length > 0) {
  619. $dist = $items->item(0);
  620. }
  621. else $dist = NULL;
  622. }
  623. elseif ($id_class == 1) {
  624. $dist = $doc->getElementById($id_class_description);
  625. }
  626. elseif ($id_class == 2) {
  627. $xpath = new DOMXPath($doc);
  628. $items = $xpath->query("//*[@class and contains(concat(' ',normalize-space(@class),' '),' $id_class_description ')]");
  629. if ($items != NULL && $items->length > 0) {
  630. $dist = $items->item(0);
  631. }
  632. else $dist = NULL;
  633. }
  634. else {
  635. //Not Possible
  636. return FALSE;
  637. }
  638. if ($dist == NULL) {
  639. return FALSE;
  640. }
  641. $xml = $dist->ownerDocument->saveXML($dist);
  642. if ($feeling_lucky == 0) {
  643. $filename = feedapi_imagegrabber_search_images_to_select($original_url, $xml);
  644. return $filename;
  645. }
  646. else {
  647. $filename = feedapi_imagegrabber_download_image_feeling_lucky($original_url, $xml);
  648. return $filename;
  649. }
  650. }
  651. /**
  652. * Browse through all the images between the user defined tag to select the image.
  653. *
  654. * @param string $feed_item_url
  655. * @param string $xml
  656. *
  657. * @return filename on suceess, FALSE on failure.
  658. */
  659. function feedapi_imagegrabber_search_images_to_select($feed_item_url, $xml) {
  660. $image_found = 0;
  661. $selected_file = '';
  662. $selected_url = '';
  663. $doc = new DOMDocument();
  664. @$doc->loadXML($xml);
  665. $xpath = new DOMXPath($doc);
  666. $hrefs = $xpath->evaluate("//img");
  667. $temp_dir = file_directory_temp();
  668. $size_image_found = 0;
  669. for ($i = 0; $i < $hrefs->length; $i++) {
  670. $href = $hrefs->item($i);
  671. $url = $href->getAttribute('src');
  672. if (!isset($url) || empty($url) || $url == '') {
  673. continue;
  674. }
  675. $abs = url_to_absolute($feed_item_url, $url);
  676. if ($abs != FALSE) {
  677. if (feedapi_imagegrabber_download_image_heuristics($abs, $size_image_found)) {
  678. $image_found = 1;
  679. $selected_url = $abs;
  680. }
  681. }
  682. }
  683. if ($image_found) {
  684. $selected_file = feedapi_imagegrabber_create_filename($temp_dir, $selected_url);
  685. if (!feedapi_imagegrabber_get_image($selected_url, $selected_file)) {
  686. return FALSE;
  687. }
  688. else return $selected_file;
  689. }
  690. else return FALSE;
  691. }
  692. /**
  693. * Heuristics for selecting the right image from the original feed item
  694. * of the URL when $feeling_lucky is FALSE
  695. *
  696. * 1) select the image greater than some minimum resolution.(100x150)
  697. * 2) select the largest amongst the images found.
  698. *
  699. * @param $image_url url of the image.
  700. * @param $imagesize size of the largest image found till now
  701. *
  702. * @return boolean
  703. * TRUE if image url was selected.
  704. * FALSE if rejected or any other error as timeout, invalid url, etc.
  705. *
  706. */
  707. function feedapi_imagegrabber_download_image_heuristics($image_url, &$imagesize) {
  708. if (!feedapi_imagegrabber_remote_image_exists($image_url)) {
  709. return FALSE;
  710. }
  711. $info = @getimagesize($image_url);
  712. if (isset($info) && is_array($info)) {
  713. // allowed drupal images
  714. $extensions = array('1' => 'gif', '2' => 'jpg', '3' => 'png', '4' => 'jpeg');
  715. if (array_key_exists($info[2], $extensions)) {
  716. $height = $info[0];
  717. $width = $info[1];
  718. $resolution = $height * $width;
  719. //Minimum resolution of 100x150
  720. if ($resolution < 15000 || $resolution < $imagesize) {
  721. return FALSE;
  722. }
  723. $imagesize = $resolution;
  724. return TRUE;
  725. }
  726. }
  727. return FALSE;
  728. }
  729. /**
  730. * Selects the first image between the user defined tag.
  731. *
  732. * @param string $feed_item_url
  733. * @param string $xml
  734. *
  735. * @return filename on suceess, FALSE on failure.
  736. */
  737. function feedapi_imagegrabber_download_image_feeling_lucky($feed_item_url, $xml) {
  738. $temp_dir = file_directory_temp();
  739. $doc = new DOMDocument();
  740. @$doc->loadXML($xml);
  741. $xpath = new DOMXPath($doc);
  742. $hrefs = $xpath->evaluate("//img");
  743. if ($hrefs->length <= 0) {
  744. return FALSE;
  745. }
  746. for ($i = 0; $i < $hrefs->length; $i++) {
  747. $href = $hrefs->item($i);
  748. $url = $href->getAttribute('src');
  749. if (!isset($url) || empty($url) || $url == '') {
  750. continue;
  751. }
  752. $abs = url_to_absolute($feed_item_url, $url);
  753. if ($abs != FALSE) {
  754. $image_file = feedapi_imagegrabber_create_filename($temp_dir, $abs);
  755. if (!feedapi_imagegrabber_get_image($abs, $image_file)) {
  756. continue;
  757. }
  758. else {
  759. return $image_file;
  760. }
  761. }
  762. }
  763. return FALSE;
  764. }
  765. /**
  766. * Downloads an image from the given URL and stores it into the given
  767. * filename.
  768. *
  769. * @param $image_url the url of the image
  770. * @param $filename the name of the file.
  771. *
  772. * @return boolean
  773. * true on sucess else false for timeout, invalid response error or if the
  774. * downloaded file is not a drupal recognized image.
  775. */
  776. function feedapi_imagegrabber_get_image($image_url, $filename) {
  777. if (function_exists('curl_exec')) {
  778. $options = array(
  779. // return web page
  780. CURLOPT_RETURNTRANSFER => TRUE,
  781. // fail on error
  782. CURLOPT_FAILONERROR => TRUE,
  783. // don't return headers
  784. CURLOPT_HEADER => FALSE,
  785. // follow redirects
  786. CURLOPT_FOLLOWLOCATION => TRUE,
  787. // who am i
  788. CURLOPT_USERAGENT => "feedapi_imagegrabber",
  789. // set referer on redirect
  790. CURLOPT_AUTOREFERER => TRUE,
  791. // timeout on connect
  792. CURLOPT_CONNECTTIMEOUT => 5,
  793. // timeout on response
  794. CURLOPT_TIMEOUT => 5,
  795. // stop after 10 redirects
  796. CURLOPT_MAXREDIRS => 2,
  797. );
  798. $ch = curl_init($image_url);
  799. curl_setopt_array($ch, $options);
  800. $content = curl_exec($ch);
  801. $err = curl_errno($ch);
  802. curl_close($ch);
  803. if (!$err && (file_put_contents($filename, $content) !== FALSE)) {
  804. return feedapi_imagegrabber_is_image($filename);
  805. }
  806. }
  807. return FALSE;
  808. }
  809. /**
  810. * Checks whether is file is a valid drupal recognized image or not.
  811. *
  812. * @param $filepath
  813. * Path to the file to be checked
  814. *
  815. * @return boolean
  816. */
  817. function feedapi_imagegrabber_is_image($filepath) {
  818. $info = image_get_info($filepath);
  819. if (!$info || empty($info['extension'])) {
  820. return FALSE;
  821. }
  822. return TRUE;
  823. }
  824. /**
  825. * creates a unique filename for the file,
  826. * Look at [#554980] for more
  827. *
  828. * @param string $dir the directory in which file is to created
  829. * @param string $url the url of the filenme
  830. * @param integer $count the unique file count
  831. *
  832. * @return destination filepath
  833. */
  834. function feedapi_imagegrabber_create_filename($dir, $url) {
  835. $count = time();
  836. $arr = parse_url($url);
  837. if ($arr != FALSE) {
  838. $filename = basename($arr['path']);
  839. if (isset($filename) && !empty($filename) && $filename != '') {
  840. $filename_parts = explode('.', $filename);
  841. // Remove file basename.
  842. $new_filename = array_shift($filename_parts);
  843. // Remove final extension.
  844. $final_extension = array_pop($filename_parts);
  845. $short_filename = drupal_strlen($new_filename) > 100 ? drupal_substr($new_filename, 0, 100) : $new_filename;
  846. return file_destination(file_create_path($dir .'/'. $short_filename .'_'. $count .'.'. $final_extension), FILE_EXISTS_RENAME);
  847. }
  848. }
  849. return file_destination(file_create_path($dir .'/feedapi_imagegrabber_'. $count), FILE_EXISTS_RENAME);
  850. }
  851. /**
  852. * page callback for hook_menu().
  853. */
  854. function feedapi_imagegrabber_download_images($node) {
  855. return drupal_get_form('feedapi_imagegrabber_download_confirm', $node);
  856. }
  857. /**
  858. * confirms the downloading of the images from where it was paused
  859. */
  860. function feedapi_imagegrabber_download_confirm($form_state, $node) {
  861. $output = confirm_form(
  862. array('nid' => array('#type' => 'hidden', '#value' => $node->nid)),
  863. t('Grab images for the feed-items of !name', array('!name' => $node->title)), isset($_GET['destination']) ? $_GET['destination'] : 'node/'. $node->nid,
  864. t("Due to PHP execution time limit, some feed-items might have been left by ImageGrabber during last refresh. Are you sure you want to proceed with the download of those feed-items?"),
  865. t('Yes'), t('No'),
  866. 'feedapi_imagegrabber_download_confirm'
  867. );
  868. return $output;
  869. }
  870. /**
  871. * submit for the grab images form.
  872. */
  873. function feedapi_imagegrabber_download_confirm_submit($form, &$form_state) {
  874. $nid = $form_state['values']['nid'];
  875. $data = db_fetch_array(db_query("SELECT feed_nid, nid FROM {feedapi_imagegrabber_cron_nodes} WHERE feed_nid = %d", $nid));
  876. if ($data == FALSE) {
  877. drupal_set_message(t('No feed-items available for downloading images.'));
  878. $form_state['redirect'] = 'node/'. $form_state['values']['nid'];
  879. return;
  880. }
  881. $feed_items = unserialize($data['nid']);
  882. $feed = feedapi_imagegrabber_create_feed($nid, $feed_items);
  883. feedapi_imagegrabber_feedapi_after_refresh($feed, TRUE);
  884. $form_state['redirect'] = 'node/'. $form_state['values']['nid'];
  885. }
  886. /**
  887. * Creates a feed object from a node => url array.
  888. *
  889. * @param int $feed_nid the node of the feed
  890. * @param mixed $array array of feed items
  891. *
  892. * @return $feed, as created by feedapi
  893. */
  894. function feedapi_imagegrabber_create_feed($feed_nid, $array) {
  895. $feed = new stdClass();
  896. $feed->nid = $feed_nid;
  897. $feed->items = new stdClass();
  898. $item = 0;
  899. foreach ($array as $node => $url) {
  900. $temp_item = new stdClass();
  901. $temp_item->nid = $node;
  902. $options = new stdClass();
  903. $options->original_url = $url;
  904. $temp_item->options = $options;
  905. $feed->items->$item = $temp_item;
  906. $item++;
  907. }
  908. return $feed;
  909. }
  910. /**
  911. * Checks if a remote file exists and is below some specific size (1 MB)
  912. *
  913. * @param $url
  914. *
  915. * @return boolean
  916. */
  917. function feedapi_imagegrabber_remote_image_exists($url) {
  918. $ch = curl_init();
  919. curl_setopt($ch, CURLOPT_URL, $url);
  920. curl_setopt($ch, CURLOPT_HEADER, 1);
  921. curl_setopt($ch, CURLOPT_NOBODY, 1);
  922. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  923. curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5);
  924. curl_setopt($ch, CURLOPT_TIMEOUT, 5);
  925. curl_setopt($ch, CURLOPT_FAILONERROR, 1);
  926. curl_setopt($ch, CURLOPT_USERAGENT, "feedapi_imagegraber");
  927. curl_setopt($ch, CURLOPT_FOLLOWLOCATION, TRUE);
  928. curl_setopt($ch, CURLOPT_MAXREDIRS, 2);
  929. $result = curl_exec($ch);
  930. $ret = FALSE;
  931. if ($result !== FALSE) {
  932. // Who keeps such a big image expect a hacker (cracker)
  933. $max_imagesize = 1024 * 1024;
  934. $statusCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
  935. $content_length = curl_getinfo($ch, CURLINFO_CONTENT_LENGTH_DOWNLOAD);
  936. if ($statusCode == 200 && is_numeric($content_length) && $content_length < $max_imagesize) {
  937. $ret = TRUE;
  938. }
  939. }
  940. curl_close($ch);
  941. return $ret;
  942. }