Chris Pollett >
Students > [Bio] [Del2-Presentation on Web Crawlers-Nutch-PPT] [Del2-Implementation of Nutch Crawl] [Del3-Code Obfuscation Techniques-PPT] [CS298 Presentation Slides-PDF] |
Converting Images to the data:URI schemeDescription: As part of the this deliverable, I wrote a program that takes the URL of a page as input from the user and fetches the page using cURL() Example:The two figures below are screenshots of what is seen before and after the base64 encoding. ![]() ![]() The two figures above show the HTML pages before and after the base64 encoding has been applied to the image <img src="http://localhost/CS297/Images/Image1.jpg" /> <br /> <a href ="PageB.html">Link to PageB</a> <img src="http://localhost/CS297/Images/Image2.jpg" /> Where as for the new page in the figure on the right, the source code looks like: <img src="\n SAAMAAAABAAEAAAEaAAUAAAABAAAAYgEbAAUAAAABAAAAagEoAAMAAAABAAIAAAExAAIAAAAUAAAAcgEyAAIAAAAUA\n AAAhodpAAQAAAABAAAAnAAAAMgAAAEsAAAAAQAAASwAAAABQWRvYmUgUGhvdG9zaG9wIDcuMAAyMDA3OjExOjI3IDE\n yOjAzOjM4AAAAAAOgAQADAAAAAf//AACgAgAEAAAAAQAAAMigAwAEAAAAAQAAAJYAAAAAAAAABgEDAAMAAAABAAYAA\n AEaAAUAAAABAAABFgEbAAUAAAABAAABHgEoAAMAAAABAAIAAAIBAAQAAAABAAABJgICAAQAAAABAAAGJwAAAAAAAAB\n IAAAAAQAAAEgAAAAB/9j/4AAQSkZJRgABAgEASABIAAD/7QAMQWRvYmVfQ00AAv/uAA5BZG9iZQBkgAAAAAH/2wCEA\n AwICAgJCAwJCQwRCwoLERUPDAwPFRgTExUTE.....”" /> Following is the PHP code for my program. <?php $url = $_GET['url']; echo '<br /><br />The page fetched is: '.$url.'<br />'; $prefixPath=dirname($url); $base = basename($url,".html"); $filename = $base.".txt"; $ch = curl_init($url); $fp = fopen($filename, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); $fp = fopen($filename, "r"); $fileStr = file_get_contents($filename); fclose($fp); $pattern = "/src\s*=\s*\"(.*?)\"/"; preg_match_all($pattern, $fileStr, $matches); $no_of_images = count($matches[1]); $tmpimgfile; foreach($matches[1] as $imgfile) { if (strpos($imgfile, 'http') !== FALSE) { $baseimg = basename($imgfile); $ch = curl_init($imgfile); $fp = fopen($baseimg, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); } else if (strpos($imgfile, 'www') !==FALSE) { $baseimg = basename($imgfile); $ch = curl_init($imgfile); $fp = fopen($baseimg, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); } else { $imgfile = $prefixPath."/".$imgfile; $baseimg = basename($imgfile); $ch = curl_init($imgfile); $fp = fopen($baseimg, "w"); curl_setopt($ch, CURLOPT_FILE, $fp); curl_setopt($ch, CURLOPT_HEADER, 0); curl_exec($ch); curl_close($ch); fclose($fp); } if(fopen($baseimg, "r")) { $imageFileStr = file_get_contents($baseimg); ?> <div> <img src="data:image/png;base64,<?echo base64_encode($imageFileStr)?>" /> </div> <?php } } ?> |