Web Scraping code

Kf-

Thanks for the tip! I do have it turned on, but in all honesty, ive ignored it up to now. (I use it quite a bit when i CAD in Rhino, so i understand how powerful it is)

Btw, just out of curiosity…

I added a recurring “fetch” aspect to my sketch…however, in one of theoir responses in this thread, Glv said that they got banned/blocked from a website for fetching/pinging too many times, too fast… any idea how fast is “too fast” or “too many, too fast”? In my sketch i have it set for 30 seconds. Code below for reference…

import processing.serial.*;
Serial myPort;  // Create object from Serial class


//void setup() 
//{
//  size(200,200); //make our canvas 200 x 200 pixels big
//  String portName = Serial.list()[0]; //change the 0 to a 1 or 2 etc. to match your port
//  myPort = new Serial(this, portName, 9600);
//}


int time_to_fetch;
int time_between_fetches = 30000; // Five seconds.

void draw() {

  if ( millis() > time_to_fetch ) {
    get_data();
  }

}

void get_data() {
    
  time_to_fetch = millis() + time_between_fetches;
      
  XML xml = loadXML("https://forecast.weather.gov/MapClick.php?lat=42.3832&lon=-71.1018&unit=0&lg=english&FcstType=dwml");
  //println(xml);
  String page1 = "";

  //FIRST level
  XML[] c = xml.getChildren("data");
  //println("c len",c.length);
  //for (int i = 0; i < c.length; i++) {
  //  println(i+"\n",c[i]);
  //}

  //SECOND level
  XML[] cc = c[1].getChildren("parameters");
  //THIS next allowed me to identify fields 11,13 and 15 as fields of interest
  //printArray(cc[0].listChildren());

  //printArray(cc[0].getChild(11));  //direction - see example at the end
  //printArray(cc[0].getChild(13));  //wind-speed - see example at the end
  printArray(cc[0].getChild(15));  //wind-speed - see example at the end


  //THIRD level
  XML ccc=cc[0].getChild(15);
  println("type:", ccc.getString("type"));
  println("units:", ccc.getString("units"));
  println("value:", ccc.getContent("value"));

  println("Done");


  ////Example of positions 11,13,15
  //
  //<direction xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" time-layout="k-p1h-n1-1" type="wind" units="degrees true">  <value>300</value>  </direction>
  //<wind-speed xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" time-layout="k-p1h-n1-1" type="gust" units="knots">  <value>NA</value>  </wind-speed>
  //<wind-speed xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" time-layout="k-p1h-n1-1" type="sustained" units="knots">  <value>9</value>  </wind-speed>
}

1 Like