HTMLParser.java
3.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
package com.upc.pbe.upcnews;
import java.util.ArrayList;
import android.util.Log;
//Parseja arxius HTML
public class HTMLParser {
private final static String TAG = "HTMLParser";
private ArrayList<String> resources; //Llista de recursos trobats
private String server; //URL Server
public HTMLParser(String u){
//Separem host i server en la direccio donada
String host = u.substring(7);
server = u.substring(0,host.indexOf("/")+7);
resources = new ArrayList<String>();
}
public ArrayList<String> parse(String code){
//Metode principal, nejetem la llista si hi ha continguts i parsejem
if(!resources.isEmpty()){
resources.clear();
}
parseDirectories(resources,code);
parseFiles(resources,code);
return resources;
}
public void parseDirectories(ArrayList<String> resources, String code){
//Busca directoris en un arxiu HTML
//Separem el codi en linies
String[] split = code.split("\n");
//Busquem el tag <a>, descartant comentaris:
for (int i = 0; i < split.length; i++){
while (split[i].startsWith("<!--")){
if (split[i++].endsWith("-->")){
break;
}
}
// Posible comentari descartat
if (split[i].indexOf("<a ") != -1){ //Enllaç
if (split[i].indexOf("href=") != -1) { //Aixo te bona pinta
String dirpath = split[i].substring(split[i].indexOf("href=\"") + 6); //Eliminem morralla del principi.
dirpath = dirpath.substring(0, dirpath.indexOf("\"")); //Treiem la morralla del final, ens queda el valor de href.
if(dirpath.lastIndexOf("/") != -1) { //Mira que sigui una carpeta y no un arxiu sol.
dirpath = dirpath.substring(0,dirpath.lastIndexOf("/")+1); //Elimina la part que no es carpeta
if(dirpath.startsWith("/")){
dirpath = server + dirpath;
}
if(!resources.contains(dirpath)) { //No siguem repetitius
resources.add(dirpath);
}
Log.d(TAG, "DIRECTORI TROBAT: " + dirpath);
}
}
}
}
}
public void parseFiles(ArrayList<String> resources, String code){
//Parseja arxius en un arxiu HTML
//Separem el codi en linies
String[] split = code.split("\n");
for (int i = 0; i < split.length; i++){
while (split[i].startsWith("<!--")){
if (split[i++].endsWith("-->")){
break;
}
}
// Possible comentari descartat
if (split[i].contains("<a ")) { //Enllaç
if (split[i].contains("href=")) { //Aixo te bona pinta
String filepath = split[i].substring(split[i].indexOf("href=\"") + 6); //Eliminem morralla del principi.
filepath = filepath.substring(0, filepath.indexOf("\"")); //Treiem la morralla del final, ens queda el valor de href.
if(filepath.endsWith(".m3u8")) { //Mira que sigui una llista.
String filename;
//Deteccio d'arxius ocults
if(filepath.indexOf("/") != -1){
filename = filepath.substring(filepath.lastIndexOf("/"));
}
else{
filename = filepath;
}
if(!filename.startsWith(".")){
if(!resources.contains(filepath)) //No siguem repetitius
{
resources.add(filepath);
}
Log.d(TAG, "PLAYLIST TROBADA: " + filepath);
}
}
}
}
}
}
}