Open In App

Extract URLs present in a given string

Last Updated : 24 Jul, 2025
Comments
Improve
Suggest changes
Like Article
Like
Report

Given a string S, the task is to find and extract all the URLs from the string. If no URL is present in the string, then print "-1".

Examples:

Input: S = “Welcome to https://www.geeksforgeeks.org/ Computer Science Portal”
Output: https://www.geeksforgeeks.org/
Explanation:
The given string contains the URL 'https://www.geeksforgeeks.org/'.

Input: S = “Welcome to https://write.geeksforgeeks.org/ portal of https://www.geeksforgeeks.org/ Computer Science Portal”
Output:
https://write.geeksforgeeks.org/ 
https://www.geeksforgeeks.org/
Explanation:
The given string contains two URLs 'https://write.geeksforgeeks.org/' and 'https://www.geeksforgeeks.org/'.

Approach: The idea is to use Regular Expression to solve this problem. Follow the steps below to solve the given problem:

regex = “\\b((?:https?|ftp|file)://[-a-zA-Z0-9+&@#/%?=~_|!:, .;]*[-a-zA-Z0-9+&@#/%=~_|])”

  • Create an ArrayList in Java and compile the regular expression using Pattern.compile().
  • Match the given string with the regular expression. In Java, this can be done by using Pattern.matcher().
  • Find the substring from the first index of match result to the last index of the match result and add this substring into the list.
  • After completing the above steps, if the list is found to be empty, then print "-1" as there is no URL present in the string S. Otherwise, print all the string stored in the list.

Below is the implementation of the above approach:

C++
#include <iostream>
#include <regex>
#include <vector>
using namespace std;

// Function to extract all the URLs from the string
void extractURL(string str)
{
    // Creating an empty vector to store URLs
    vector<string> url_list;

    // Regular Expression to extract URLs from the string
    string regex_str = "\\b((?:https?|ftp|file):"
                       "\\/\\/[a-zA-Z0-9+&@#\\/%?=~_|!:,.;]*"
                       "[a-zA-Z0-9+&@#\\/%=~_|])";

    // Compile the Regular Expression
    regex r(regex_str, regex_constants::icase);

    // Find the match between string and the regular expression
    sregex_iterator m(str.begin(), str.end(), r);
    sregex_iterator m_end;

    // Find and store all the URLs in the vector
    while (m != m_end) {
        url_list.push_back(m->str());
        m++;
    }

    // If no URLs are found, print -1, otherwise print the URLs
    if (url_list.size() == 0) {
        cout << "-1" << endl;
    } else {
        for (string url : url_list) {
            cout << url << endl;
        }
    }
}

// Driver Code
int main()
{
    // Given String str
    string str = "Welcome to https://www.geeksforgeeks.org/ Computer Science Portal";

    // Function Call
    extractURL(str);

    return 0;
}
Java
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class ExtractURL {

    // Function to extract all the URLs from the string
    public static void extractURL(String str) {
        // Creating an empty ArrayList to store URLs
        ArrayList<String> urlList = new ArrayList<>();

        // Regular Expression to extract URL from the string
        String regexStr = "\\b((?:https?|ftp|file):"
                + "\\/\\/[a-zA-Z0-9+&@#\\/%?=~_|!:,.;]*"
                + "[a-zA-Z0-9+&@#\\/%=~_|])";

        // Compile the Regular Expression pattern
        Pattern pattern = Pattern.compile(regexStr, Pattern.CASE_INSENSITIVE);

        // Create a Matcher that matches the pattern with the input string
        Matcher matcher = pattern.matcher(str);

        // Find and add all matching URLs to the ArrayList
        while (matcher.find()) {
            // Add the matched URL to the ArrayList
            urlList.add(matcher.group());
        }

        // If no URL is found, print -1
        if (urlList.isEmpty()) {
            System.out.println("-1");
        } else {
            // Print all the URLs stored in the ArrayList
            for (String url : urlList) {
                System.out.println(url);
            }
        }
    }

    public static void main(String[] args) {
        // Given String str
        String str = "Welcome to https://www.geeksforgeeks.org/ "
                + "Computer Science Portal";

        // Function Call
        extractURL(str);
    }
}
Python3
import re

def extractURL(str):
    # Creating an empty list
    url_list = []
    
    # Regular Expression to extract URL from the string
    regex = r'\b((?:https?|ftp|file):\/\/[-a-zA-Z0-9+&@#\/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#\/%=~_|])'
    
    # Compile the Regular Expression
    p = re.compile(regex, re.IGNORECASE)
    
    # Find the match between string and the regular expression
    m = p.finditer(str)
    
    # Find the next subsequence of the input subsequence that find the pattern
    for match in m:
        # Find the substring from the first index of match result to the last index of match result and add in the list
        url_list.append(str[match.start():match.end()])  # Corrected slicing here
    
    # IF there no URL present
    if len(url_list) == 0:
        print("-1")
        return
    
    # Print all the URLs stored
    for url in url_list:
        print(url)

# Driver Code
if __name__ == '__main__':
  
    # Given String str
    string = "Welcome to https://www.geeksforgeeks.org/ Computer Science Portal"

    # Function Call
    extractURL(string)
C#
using System;
using System.Collections.Generic;
using System.Text.RegularExpressions;

class Program
{
    static void ExtractURL(string str)
    {
        // Create an empty list to store URLs
        List<string> urlList = new List<string>();

        // Regular Expression to extract URLs from the string
        string regexStr = @"\b((https?|ftp|file)://[a-zA-Z0-9+&@#/%?=~_|!:,.;]*[a-zA-Z0-9+&@#/%=~_|])";

        // Compile the Regular Expression
        Regex regex = new Regex(regexStr, RegexOptions.IgnoreCase);

        // Find all matches in the string
        MatchCollection matches = regex.Matches(str);

        foreach (Match match in matches)
        {
            urlList.Add(match.Value);
        }

        // If there are no URLs present
        if (urlList.Count == 0)
        {
            Console.WriteLine("-1");
            return;
        }

        // Print all the URLs stored
        foreach (string url in urlList)
        {
            Console.WriteLine(url);
        }
    }

    static void Main()
    {
        // Given String str
        string str = "Welcome to https://www.geeksforgeeks.org/ Computer Science Portal";

        // Function Call
        ExtractURL(str);
    }
}
JavaScript
// Function to extract all the URLs from the string
function extractURL(str) {
    // Creating an empty array to store URLs
    let urlList = [];

    // Regular Expression to extract URL from the string
    const regexStr = "\\b((?:https?|ftp|file):"  // Defines the URL pattern
        + "\\/\\/[a-zA-Z0-9+&@#\\/%?=~_|!:,.;]*"
        + "[a-zA-Z0-9+&@#\\/%=~_|])";

    // Compile the Regular Expression pattern
    const regex = new RegExp(regexStr, 'gi'); // 'g' flag for global match, 'i' for case-insensitive

    // Find and add all matching URLs to the array
    let match;
    while ((match = regex.exec(str)) !== null) { // Loop through matches and add to urlList
        urlList.push(match[0]);
    }

    // If no URL is found, print -1
    if (urlList.length === 0) {
        console.log("-1");
        return;
    }

    // Print all the URLs stored in the array
    for (let url of urlList) {
        console.log(url);
    }
}

// Given String str
const str = "Welcome to https://www.geeksforgeeks.org/ Computer Science Portal";

// Function Call
extractURL(str);

Output
https://www.geeksforgeeks.org/

Time Complexity: O(N)
Auxiliary Space: O(1)


Similar Reads