python – Exclude several elements out of HTML using BeautifulSoup

I want to get some names in a webpage to use them later in the code.

content = requests.get("http://serpadres.com/bebe/los-200-nombres-latinos-mas-populares-de-los-ultimos-tiempos/52175/").content
soup = BeautifulSoup(content, features="html.parser")
for tag in soup.find_all("br"):

    print("{0}: {1}".format(tag.name, tag.text))

I tried this and it did print all the names but also the br tags, being the result:

br: 
br: 
br: 
br: 
br: 
br: 
br: 
br: 
VERY LONG LIST OF NAMES
br: 
br: 
br: 
br: 
br: 
br: 
br: 
br: 
br: 
br: 

and many and many more br:. How can I exclude those and also convert the name to strings?

python 3.x – BeautifulSoup está retornando None

Estou tentando pegar o o título do produto no site da amazon, mas o valor retornado é sempre None.

Link: https://www.amazon.com.br/Câmera-Sony-Alpha-a7III-Mirrorless/dp/B07B43WPVK/ref=sr_1_1?__mk_pt_BR=ÅMÅŽÕÑ&dchild=1&keywords=sony+alpha+ilce-7m3&qid=1599604773&sr=8-1

Meu código:

import requests
from bs4 import BeautifulSoup

URL = 'https://www.amazon.com.br/Câmera-Sony-Alpha-a7III-Mirrorless/dp/B07B43WPVK/ref=sr_1_1?__mk_pt_BR=ÅMÅŽÕÑ&dchild=1&keywords=sony+alpha+ilce-7m3&qid=1599604773&sr=8-1'
headers = {
    "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36 OPR/70.0.3728.154'}

page = requests.get(URL, headers=headers)

soup = BeautifulSoup(page.content, 'html.parser')

title = soup.find(id="productTitle")

print(title)

O retorno:

None

Process finished with exit code 0

python 3.x – Remover BRs despues de resultado de scrape con beautifulsoup?

de nuevo molestando, estoy haciendo una especie de buscador de rank de personajes de un juego. Hasta ahora va todo bien hasta el momento en que al arrojar el resultado de una de las busquedas e imprimirlo, me deforma el output porque tiene BRs introducidos en el código html.

Este es mi código:

# Importamos las librerias
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests
import time
import os
import re

 # Capturamos la url 
html = urlopen('http://maplestory.nexon.net/rankings/world-ranking/reboot-(na)?pageIndex=1&character_name=Elfwinny')
url = "http://maplestory.nexon.net/rankings/world-ranking/reboot-(na)?pageIndex=1&character_name=Elfwinny"
bs = BeautifulSoup(html, 'html.parser')

 # Capturamos el hml de la pagina web y creamos un objeto Response
r  = requests.get(url)
data = r.text

#Sacamos toda la información de la tabla de rank proporcionada:

soup = BeautifulSoup(data, 'lxml')
avatar = bs.find_all('img', {'src':re.compile('.png')})
job = bs.find_all('img', {'src':re.compile('.gif')})
htnm_migration_table = soup.find("table", {'class':''})
tbody = htnm_migration_table.find('tbody')
trs = htnm_migration_table.find('tr')
td = htnm_migration_table.find_all('td')

print("Avatar de ElfWinny: " + avatar(2)("src"))
print("Nickname: " + td(2).text)
print("Job: " + job(0)("src"))
print("Nivel: " + td(5).text)

Justo en print(“Nivel: ” + td(5).text) es el que me deforma la linea pues me copia el contenido de el TD como tal que es el siguiente:

232                   <br>                                                         (61857388727)<br>

Además de esta duda, quisiera saber si hay manera tambien, de aislar o imprimir por separado ambos numeros (el 232 en un print, y el (61857388727) en otro print).

Muchas gracias por su respuesta.

python – Extraer enlaces con BeautifulSoup

estoy intentando sacar ciertos enlaces desde una url, que tiene bloques de código como este:

<div class="posting-card super-highlighted " data-id="55399753" data-posting-type="DEVELOPMENT" data-to-posting="/propiedades/ceibos-point-55399753.html" data-index="2" data-key="2">

Lo que quiero es extraer el texto "/propiedades/ceibos-point-55399753.html" que corresponde a data-to-posting, he estado intentadolo con el método find_all pero no he logrado conseguirlo. Alguna sugerencia?

python – Webscraping application that collects metal prices and converts them from USD to MXN real-time using BeautifulSoup

from bs4 import BeautifulSoup
import requests
import re
import datetime

metal_translation = {"Aluminium": "Aluminio", "Copper": "Cobre", "Zinc": "Zinc", "Nickel": "Níquel", "Lead": "Plomo", "Tin": "Estaño",
                     "Aluminium Alloy": "Aleación de Aluminio", "Cobalt": "Cobalto", "Gold*": "Oro*", "Silver*": "Plata*",
                     "Steel Scrap**": "Chatarra de Acero", "NASAAC": "NASAAC", "Steel Rebar**": "Varilla de Acero"}

def get_metal_values():
    names = ()
    prices = ()
    metals = requests.get('https://www.lme.com/').text
    soup = BeautifulSoup(metals, 'lxml')
    metal_table = soup.find("table", attrs={"class": "ring-times"})
    metal_table_names, metal_table_prices = metal_table.tbody.find_all("th"), metal_table.tbody.find_all("td")
    for name in metal_table_names:
        names.append(name.text.replace("LME ", ""))
    for price in metal_table_prices:
        prices.append(price.text.strip())
    return names, prices

def get_peso_conversion():
    peso = requests.get('https://themoneyconverter.com/USD/MXN').text
    soup1 = BeautifulSoup(peso, 'lxml')
    conversion = soup1.find("div", class_="cc-result").text
    rate = re.search("d{2}.d{4}", conversion).group()
    return rate

def get_time():
    date = datetime.datetime.now()
    time = (f'{date.day}/{date.month}/{date.year} | {date.hour}:{date.minute}')
    return time

def convert_values():
    names, prices = get_metal_values()
    rate = get_peso_conversion()
    metal_data = dict(zip(names, prices))
    for k, v in metal_data.items():
        v = (float(v.replace(",", "")) * float(rate))
        v = ("%.2f" % v)
        k = metal_translation(k)
        print(f'{k}: {v} $')

def program_run():
    print("Metal Prices by ETHAN HETRICK")
    print("================================================")
    print(f'{get_time()} | 1 USD = {get_peso_conversion()} MXNn')
    print("Precios de metales de London Metal Exchange: (Por tonelada métrica, *Por onza Troy)n")
    convert_values()
    print("================================================")

program_run()
input("nEscribe 'x' para terminar.n")

EXAMPLE OUTPUT:

Metal Prices by ETHAN HETRICK
================================================
26/6/2020 | 18:28 | 1 USD = 23.0622 MXN

Precios de metales de London Metal Exchange: (Por tonelada métrica, *Por onza Troy)

Aluminio: 36484.40 $
Cobre: 138038.80 $
Zinc: 47438.95 $
Níquel: 293097.50 $
Plomo: 41004.59 $
Estaño: 391826.78 $
Aleación de Aluminio: 27997.51 $
NASAAC: 27444.02 $
Cobalto: 657272.70 $
Oro*: 40711.70 $
Plata*: 410.74 $
Chatarra de Acero: 6065.36 $
Varilla de Acero: 9720.72 $
================================================

Escribe 'x' para terminar.

My fiances father owns a metal recycling business in Mexico City, Mexico which requires him to do a lot of calculations when negotiating with clients. Since he was doing this by hand, I decided to automate this for him. I used London Metal Exchange, which is his preferred site to get the current prices, for the metal prices and also got the exchange rate for USD to MXN and applied that as well. I also needed to translate everything to spanish so I used a dictionary. This is my first time utilizing webscraping and the datetime module so any advice to make this program run more efficiently or make the code more concise is much appreciated!

Better way to extract html table to dictionary using beautifulsoup in python

I am scrapping html pages. Part of the page has a table which has acts and sections of those acts mentioned in table format. For some other project I need to convert them to Dictionary. The key values are previously set (in the other project). I want to use the same key values for the dictionary and then replace corresponding sections with each new input. The code I have designed works but I am looking for better way to write it. Presently the code looks quite lengthy. The code:

from bs4 import BeautifulSoup as bs, NavigableString

openFile = open('/some path/samplePage.html')
soup = bs(openFile, 'html.parser')

acts = soup.select('#act_table td:nth-of-type(1)')
sections = soup.select('#act_table td:nth-of-type(2)')
dictionary = {}


ipc = 'indian penal code'
poa = 'prevention of atrocities'
pcso = 'protection of children from sexual'
pcr = 'protection of civil rights'


if len(acts) < 1:
    print('no act mentioned')
elif len(acts) < 2:
    act1 = tuple(acts(0).contents)
    sections1 = tuple(sections(0).contents)
elif len(acts) < 3:
    act1 = tuple(acts(0).contents)
    sections1 = tuple(sections(0).contents)
    act2 = tuple(acts(1).contents)
    sections2 = tuple(sections(1).contents)
elif len(acts) < 4:
    act1 = tuple(acts(0).contents)
    sections1 = tuple(sections(0).contents)
    act2 = tuple(acts(1).contents)
    sections2 = tuple(sections(1).contents)
    act3 = tuple(acts(2).contents)
    sections3 = tuple(sections(2).contents)
elif len(acts) < 5:
    act1 = tuple(acts(0).contents)
    sections1 = tuple(sections(0).contents)
    act2 = tuple(acts(1).contents)
    sections2 = tuple(sections(1).contents)
    act3 = tuple(acts(2).contents)
    sections3 = tuple(sections(2).contents)
    act4 = tuple(acts(3).contents)
    sections4 = tuple(sections(3).contents)
else:
    act1 = tuple(acts(0).contents)
    sections1 = tuple(sections(0).contents)
    act2 = tuple(acts(1).contents)
    sections2 = tuple(sections(1).contents)
    act3 = tuple(acts(2).contents)
    sections3 = tuple(sections(2).contents)
    act4 = tuple(acts(3).contents)
    sections4 = tuple(sections(3).contents)
    act5 = tuple(acts(4).contents)


if len(acts) == 0:
    pass
# for first act in list
elif len(acts) == 1:
    if ipc in str(act1).lower():
        dictionary('IPC') = sections1
    elif poa in str(act1).lower():
        dictionary('PoA') = sections1
    elif pcso in str(act1).lower():
        dictionary('PCSO') = sections1
    elif pcr in str(act1).lower():
        dictionary('PCR') = sections1
    else:
        dictionary('Any Other Act') = str(act1).lower()
    print(dictionary)

# for 2nd act in list
elif len(acts) == 2:
    if ipc in str(act1).lower():
        dictionary('IPC') = sections1
    elif poa in str(act1).lower():
        dictionary('PoA') = sections1
    elif pcso in str(act1).lower():
        dictionary('PCSO') = sections1
    else:
        dictionary('Any Other Act') = str(act1).lower()
    if ipc in str(act2).lower():
        dictionary('IPC') = sections2
    elif poa in str(act2).lower():
        dictionary('PoA') = sections2
    elif pcso in str(act2).lower():
        dictionary('PCSO') = sections2
    else:
        dictionary('Any Other Act') = act2
    print(dictionary)
# for 3rd act in list
elif len(acts) == 3:
    if ipc in str(act1).lower():
        dictionary('IPC') = sections1
    elif poa in str(act1).lower():
        dictionary('PoA') = sections1
    elif pcso in str(act1).lower():
        dictionary('PCSO') = sections1
    elif pcr in str(act1).lower():
        dictionary('PCR') = sections1
    else:
        dictionary('Any Other Act') = str(act1).lower()
    if ipc in str(act2).lower():
        dictionary('IPC') = sections2
    elif poa in str(act2).lower():
        dictionary('PoA') = sections2
    elif pcso in str(act2).lower():
        dictionary('PCSO') = sections2
    elif pcr in str(act2).lower():
        dictionary('PCR') = sections2
    else:
        dictionary('Any Other Act') = act2
    #for 3rd option
    if ipc in str(act3).lower():
        dictionary('IPC') = sections3
    elif poa in str(act3).lower():
        dictionary('PoA') = sections3
    elif pcso in str(act3).lower():
        dictionary('PCSO') = sections3
    elif pcr in str(act3).lower():
        dictionary('PCR') = sections3
    else:
        dictionary('Any Other Act') = act3
    print(dictionary)
    # for 4th act in list
elif len(acts) == 4:
    if ipc in str(act1).lower():
        dictionary('IPC') = sections1
    elif poa in str(act1).lower():
        dictionary('PoA') = sections1
    elif pcso in str(act1).lower():
        dictionary('PCSO') = sections1
    elif pcr in str(act1).lower():
        dictionary('PCR') = sections1
    else:
        dictionary('Any Other Act') = str(act1).lower()
    if ipc in str(act2).lower():
        dictionary('IPC') = sections2
    elif poa in str(act2).lower():
        dictionary('PoA') = sections2
    elif pcso in str(act2).lower():
        dictionary('PCSO') = sections2
    elif pcr in str(act2).lower():
        dictionary('PCR') = sections2
    else:
        dictionary('Any Other Act') = act2
    # for 3rd option
    if ipc in str(act3).lower():
        dictionary('IPC') = sections3
    elif poa in str(act3).lower():
        dictionary('PoA') = sections3
    elif pcso in str(act3).lower():
        dictionary('PCSO') = sections3
    elif pcr in str(act3).lower():
        dictionary('PCR') = sections3
    else:
        dictionary('Any Other Act') = act3
    # 4th Option
    if ipc in str(act4).lower():
        dictionary('IPC') = sections4
    elif poa in str(act4).lower():
        dictionary('PoA') = sections4
    elif pcso in str(act4).lower():
        dictionary('PCSO') = sections4
    elif pcr in str(act4).lower():
        dictionary('PCR') = sections4
    else:
        dictionary('Any Other Act') = act4
elif len(acts) == 5:
    if ipc in str(act1).lower():
        dictionary('IPC') = sections1
    elif poa in str(act1).lower():
        dictionary('PoA') = sections1
    elif pcso in str(act1).lower():
        dictionary('PCSO') = sections1
    elif pcr in str(act1).lower():
        dictionary('PCR') = sections1
    else:
        dictionary('Any Other Act') = str(act1).lower()
    if ipc in str(act2).lower():
        dictionary('IPC') = sections2
    elif poa in str(act2).lower():
        dictionary('PoA') = sections2
    elif pcso in str(act2).lower():
        dictionary('PCSO') = sections2
    elif pcr in str(act2).lower():
        dictionary('PCR') = sections2
    else:
        dictionary('Any Other Act') = act2
    # for 3rd option
    if ipc in str(act3).lower():
        dictionary('IPC') = sections3
    elif poa in str(act3).lower():
        dictionary('PoA') = sections3
    elif pcso in str(act3).lower():
        dictionary('PCSO') = sections3
    elif pcr in str(act3).lower():
        dictionary('PCR') = sections3
    else:
        dictionary('Any Other Act') = act3
    # 4th Option
    if ipc in str(act4).lower():
        dictionary('IPC') = sections4
    elif poa in str(act4).lower():
        dictionary('PoA') = sections4
    elif pcso in str(act4).lower():
        dictionary('PCSO') = sections4
    elif pcr in str(act4).lower():
        dictionary('PCR') = sections4
    else:
        dictionary('Any Other Act') = act4
print(dictionary)

The HTML code of one of the files is here:

link to the source code

Web scraping – I need to extract a data value from Google using BeautifulSoup in Python

If you go to this link:
"https://www.google.com/search?q=usd+to+clp&rlz=1C1CHBD_esVE816VE816&oq=usd+to+clp&aqs=chrome..69i57j35i39j0l2j69i60l4.1479j0j7&sourceid=chrome&"

You can see the exchange rate between USD and CLP (Chilean Peso).

I want to extract this number …

I've already tried to do the next one … (and many more things)

import requests
from bs4 import BeautifulSoup

result = requests.get("https://www.google.com/search?q=usd+to+clp&rlz=1C1CHBD_esVE816VE816&oq=usd+to+clp&aqs=chrome..69i57j35i39j0l2j69i60l4.1479j0j7&sourceid=chrome&ie=UTF-8")

src = result.content
soup = BeautifulSoup(src, features="html.parser")
value = soup.find ('span', class_ = "DFlfde.SwHCTb").get_text()
print(value)

It will be excellent if someone could help me, thank you.

Pd: Yes, I am a newcomer, sorry: P.

html – Extract a message ID with Beautifulsoup

Here's what, I'm in a college job whose goal is to pull news from different sections of a website. I chose the Globo site because it has a very good message pattern. I use beautifulsoup for that, which is wonderful, but I have to extract the ID of these messages, since I will put them in a database, and since each message has a different ID, I will use this ID to avoid repeated messages in my bank. Every message on the globe site belongs to this class: (Feed-Post-BSTN-Article-Form-Type-Materia-ID = "68d33d6e4dd5a282175a68e80c833154") Here the Material-ID has to be extracted. However, if I search for this class with beatifulsoup, the ID is not included. I looked around, but found that I was looking for a class and a fixed pass, but this pass varies from topic to topic, so I can not find that pass. Do you have an idea how to get this ID? I leave you with a more specific expression so that you can get an idea of ​​the structure of the website. An example of a website I use to determine the message ID is: https://g1.globo.com/educacao/index/feed/page-100.ghtml ID that I want to extract is marked

Pull href tags with BeautifulSoup with Python

As if that were not extremely obvious, I'm a new programmer.

But my codewords are far from my opinion the most ideal representation. I am also not sure if there are unnecessary parts because I tried to turn this on and off for a few days, so I started and stopped several times

The goal is clean code with a # comment in each line (unless extremely basic) to improve my note habits. Please let me know what you would do overall to improve it. Many Thanks!

import urllib.request as ur
from bs4 import BeautifulSoup


url = str (input (& # 39; enter URL & # 39;)) #Converting the input to a string
html = ur.urlopen (url) .read () #read html
soup = BeautifulSoup (html, "html.parser") # Get all anchor tags
Count_ = int (input (& # 39; Enter count: & # 39;)) #converts the input to an integer
pos_1 = int (input (& # 39; Enter position: & # 39;)) # convert the input to an integer
Tags = soup (& # 39; a & # 39;
final = & # 39; & # 39; #url the list of names before the break
curpos = & # 39; & # 39;
print (& # 39; Retrieving: & # 39; url) #prints start point / URL
count = int (count_) + 1
while count> 1: #Start of a particular loop that runs until count is less than 1
pos = 0
for day in tags:
if pos == int (pos_1) - 1: # conditional statement to position
curpos = tag.get (& # 39; href & # 39 ;, none)
break
pos = pos + 1 # increases the pos value for each day
final = curpos #
url = str (curpos) #
html = ur.urlopen (url) .read ()
Soup = BeautifulSoup (html, "html.parser")
Tags = soup (& # 39; a & # 39;
count = count - 1 # For each iteration in the loop, 1 is subtracted from count
print (& # 39; retrieving: & # 39 ;, final)