Chris Pollett >
Students > [Bio] [Del2-Presentation on Web Crawlers-Nutch-PPT] [Del2-Implementation of Nutch Crawl] [Del3-Code Obfuscation Techniques-PPT] [CS298 Presentation Slides-PDF] |
Converting Images to the data:URI schemeDescription: As part of the this deliverable, I wrote a program that takes the URL of a page as input from the user and fetches the page using cURL() Example:The two figures below are screenshots of what is seen before and after the base64 encoding. The two figures above show the HTML pages before and after the base64 encoding has been applied to the image <img src="http://localhost/CS297/Images/Image1.jpg" /> <br /> <a href ="PageB.html">Link to PageB</a> <img src="http://localhost/CS297/Images/Image2.jpg" /> Where as for the new page in the figure on the right, the source code looks like: <img src="data:image/png;base64,/9j/4AAQSkZJRgABAgEBLAEsAAD/4QdVRXhpZgAATU0AKgAAAAgABwE\n SAAMAAAABAAEAAAEaAAUAAAABAAAAYgEbAAUAAAABAAAAagEoAAMAAAABAAIAAAExAAIAAAAUAAAAcgEyAAIAAAAUA\n AAAhodpAAQAAAABAAAAnAAAAMgAAAEsAAAAAQAAASwAAAABQWRvYmUgUGhvdG9zaG9wIDcuMAAyMDA3OjExOjI3IDE\n yOjAzOjM4AAAAAAOgAQADAAAAAf//AACgAgAEAAAAAQAAAMigAwAEAAAAAQAAAJYAAAAAAAAABgEDAAMAAAABAAYAA\n AEaAAUAAAABAAABFgEbAAUAAAABAAABHgEoAAMAAAABAAIAAAIBAAQAAAABAAABJgICAAQAAAABAAAGJwAAAAAAAAB\n IAAAAAQAAAEgAAAAB/9j/4AAQSkZJRgABAgEASABIAAD/7QAMQWRvYmVfQ00AAv/uAA5BZG9iZQBkgAAAAAH/2wCEA\n AwICAgJCAwJCQwRCwoLERUPDAwPFRgTExUTE.....”" /> Following is the PHP code for my program. <?php $url = $_GET['url']; echo '<br /><br />The page fetched is: '.$url.'<br />'; $prefixPath=dirname($url); $base = basename($url,".html"); $filename = $base.".txt"; $ch = curl_init($url); $fp = fopen($filename, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); $fp = fopen($filename, "r"); $fileStr = file_get_contents($filename); fclose($fp); $pattern = "/src\s*=\s*\"(.*?)\"/"; preg_match_all($pattern, $fileStr, $matches); $no_of_images = count($matches[1]); $tmpimgfile; foreach($matches[1] as $imgfile) { if (strpos($imgfile, 'http') !== FALSE) { $baseimg = basename($imgfile); $ch = curl_init($imgfile); $fp = fopen($baseimg, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); } else if (strpos($imgfile, 'www') !==FALSE) { $baseimg = basename($imgfile); $ch = curl_init($imgfile); $fp = fopen($baseimg, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); } else { $imgfile = $prefixPath."/".$imgfile; $baseimg = basename($imgfile); $ch = curl_init($imgfile); $fp = fopen($baseimg, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); } if(fopen($baseimg, "r")) { $imageFileStr = file_get_contents($baseimg); ?> <div> <img src="data:image/png;base64,<?echo base64_encode($imageFileStr)?>" /> </div> <?php } } ?> |