1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
/* $Id: uri.c,v 1.1.1.1 2000-02-16 17:32:23 sdyoung Exp $
*
* This borrows the REGEX from RFC2396 to split a URI string into the five
* primary components. The components are:
* scheme the uri method (like "http", "ftp", "gopher")
* authority the domain and optional ":" port
* path path to the document/resource
* query an optional query (separated with a "?")
* fragment an optional fragement (separated with a "#")
*
* Copyright (C) 1999 Robert James Kaes (rjkaes@flarenet.com)
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the
* Free Software Foundation; either version 2, or (at your option) any
* later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifdef HAVE_CONFIG_H
#include <defines.h>
#endif
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include "uri.h"
#include "utils.h"
#include "log.h"
#include "regexp.h"
#define NMATCH 10
#define URIPATTERN "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"
#define SCHEME 2
#define AUTHORITY 4
#define PATH 5
#define QUERY_MARK 6
#define QUERY 7
#define FRAGMENT_MARK 8
#define FRAGMENT 9
static int extract_uri(regmatch_t pmatch[], const char *buffer, char **section,
int substring)
{
int len = pmatch[substring].rm_eo - pmatch[substring].rm_so;
if ((*section = xmalloc(len + 1)) == NULL)
return -1;
memset(*section, '\0', len + 1);
memcpy(*section, buffer + pmatch[substring].rm_so, len);
return 0;
}
void free_uri(URI * uri)
{
safefree(uri->scheme);
safefree(uri->authority);
safefree(uri->path);
safefree(uri->query);
safefree(uri->fragment);
safefree(uri);
}
URI *explode_uri(const char *string)
{
URI *uri;
regmatch_t pmatch[NMATCH];
regex_t preg;
if (!(uri = xmalloc(sizeof(URI))))
return NULL;
memset(uri, 0, sizeof(URI));
if (regcomp(&preg, URIPATTERN, REG_EXTENDED) != 0) {
log("ERROR explode_uri: regcomp");
goto ERROR_EXIT;
}
if (regexec(&preg, string, NMATCH, pmatch, 0) != 0) {
log("ERROR explode_uri: regexec");
goto ERROR_EXIT;
}
regfree(&preg);
if (pmatch[SCHEME].rm_so != -1) {
if (extract_uri(pmatch, string, &uri->scheme, SCHEME) < 0)
goto ERROR_EXIT;
}
if (pmatch[AUTHORITY].rm_so != -1) {
if (extract_uri(pmatch, string, &uri->authority, AUTHORITY) <
0) goto ERROR_EXIT;
}
if (pmatch[PATH].rm_so != -1) {
if (extract_uri(pmatch, string, &uri->path, PATH) < 0)
goto ERROR_EXIT;
}
if (pmatch[QUERY_MARK].rm_so != -1) {
if (extract_uri(pmatch, string, &uri->query, QUERY) < 0)
goto ERROR_EXIT;
}
if (pmatch[FRAGMENT_MARK].rm_so != -1) {
if (extract_uri(pmatch, string, &uri->fragment, FRAGMENT) < 0)
goto ERROR_EXIT;
}
return uri;
ERROR_EXIT:
free_uri(uri);
return NULL;
}
|