import pandas as pd
import re
# Create a sample DataFrame
data = {'text': ['Hello, visit my website at https://example.com',
'Check out this link: www.example.com',
'I love OpenAI, their website is https://openai.com']}
df = pd.DataFrame(data)
# Define a function to remove links from a string
def remove_links(text):
link_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+|www(?:\.[a-zA-Z0-9-]+){2,3}(?:/[a-zA-Z0-9]+)?')
return link_pattern.sub('', text)
# Apply the function to remove links from the 'text' column
df['text'] = df['text'].apply(remove_links)
# Output the DataFrame
print(df)