Source for file N3Parser.php

Documentation is available at N3Parser.php

  1. <?php
  2.  
  3. // ----------------------------------------------------------------------------------
  4. // Class: N3Parser
  5. // ----------------------------------------------------------------------------------
  6.  
  7.  
  8.  
  9.  
  10.  
  11. /**
  12. * PHP Notation3 Parser
  13. *
  14. * This parser can parse a subset of n3, reporting triples to a callback function
  15. * or constructing a RAP Model ( http://www.wiwiss.fu-berlin.de/suhl/bizer/rdfapi )
  16. *
  17. * Supported N3 features:
  18. * <ul>
  19. * <li>Standard things, repeated triples ( ; and , ), blank nodes using [ ], self-reference ('<>')</li>
  20. * <li>@prefix mappings</li>
  21. * <li>= maps to owl#sameAs</li>
  22. * <li>a maps to rdf-syntax-ns#type</li>
  23. * <li>Literal datytype- and xmlLanguageTag support
  24. * </ul>
  25. * Un-supported N3 Features include:
  26. * <ul>
  27. * <li>Reification using { }</li>
  28. * <li>. and ^ operators for tree traversal</li>
  29. * <li>Any log operators, like log:forAll etc.</li>
  30. * </ul>
  31. *
  32. * This parser is based on n3.py from Epp released 2nd March, 2002.
  33. * by Sean B. Palmer
  34. * ( http://infomesh.net/2002/eep/20020302-013802/n3.py )
  35. *
  36. * This parser is released under the GNU GPL license.
  37. * ( http://www.gnu.org/licenses/gpl.txt )
  38. *
  39. * <b>History:</b>
  40. * <ul>
  41. * <LI>12-06-2004 improved namespace handling added (tobias.gauss@web.de)</LI>
  42. * <LI>08-10-2004 Function for converting strings to its unicode NFC form. Benjamin Nowack <bnowack@appmosphere.com></LI>
  43. * <LI>10-05-2004 Fixed bug with trailing space on qnames and space before ] parsin bug
  44. * <LI>11-27-2003 fixed problems with whithespaces at the end of bNodes</li>
  45. * <LI>11-18-2003 Changed xml:language regex for supporting lang-tags like en-uk.</li>
  46. * <li>11-07-2003 Added "setFixBnodes" function. Sets, if Bnodes should be renamed to the BNODE_PREFIX constant.</li>
  47. * <li>10-27-2003 fixed problems in generateModel(), changed regEx for Literals.</li>
  48. * <li>10-24-2003 Added support for Literals with rdf:DataType and xml:Language Tags. URI-Self-Reference with '<>' is supported.</li>
  49. * <li>08-01-2003 Made compatible with new v6 MemModel.</li>
  50. * <li>07-31-2003 Function generateModel() added.</li>
  51. * <li>07-16-2003 Fixed bug with anon nodes alone on a line.</li>
  52. * <li>06-08-2003 Initial version converted from n3.py.</li>
  53. * </ul>
  54. *
  55. *
  56. * @author Sean B. Palmer <sean@mysterylights.com>, Gunnar AA. Grimnes <ggrimnes@csd.abdn.ac.uk>, Daniel Westphal <mail@d-westphal.de>
  57. * @version V0.9.1
  58. * @package syntax
  59. * @access public
  60. ***/
  61.  
  62. class N3Parser extends Object {
  63.  
  64.  
  65. /* ==================== Variables ==================== */
  66.  
  67. var $Tokens;
  68. var $bNode;
  69. var $RDF_NS, $DAML_NS, $OWL_NS;
  70. var $debug;
  71. var $parseError;
  72. var $parsedNamespaces = array();
  73.  
  74. /* ==================== Public Methods ==================== */
  75.  
  76. /**
  77. * Constructor
  78. * @access public
  79. ***/
  80. function N3Parser() {
  81. //Regular expressions:
  82. $Name = '[A-Za-z0-9_@\.]+[^\.,;\[\] ]*';
  83. $URI = '<[^> ]*>';
  84. $bNode = '_:'.$Name;
  85. $Univar = '\?'.$Name;
  86. $QName = '(?:[A-Za-z][A-Za-z0-9_@\.]*)?:'.$Name;
  87. $Literal = '"(\\\"|[^"])*"'; # '"(?:\\"|[^"])*"'
  88. // $Literal = '"[^"\\\\]*(?:\\.\\[^"\\]*)*"'; # '"(?:\\"|[^"])*"'
  89. $LangTag = '@[A-Za-z\-]*[^ \^\.\;\,]';
  90. $Datatype = '(\^\^)[^ ,\.;)]+';
  91. $Datatype_URI = '(\^\^)'.$URI;
  92. // $LLiteral = '"""[^"\\\\]*(?:(?:.|"(?!""))[^"\\\\]*)*"""';
  93. $LLiteral = '"""[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""';
  94. // '"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
  95. $Comment = '# .*$';
  96. $Prefix = '(?:[A-Za-z][A-Za-z0-9_]*)?:';
  97. $PrefixDecl = '@prefix';
  98. $WS = '[ \t]';
  99. $this->RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; # for 'a' keyword
  100. $this->DAML_NS = 'http://www.daml.org/2001/03/daml+oil#'; # for '=' keyword
  101. $this->OWL_NS = 'http://www.w3.org/2002/07/owl#';
  102.  
  103. // $t = array( $LLiteral, $URI); //, $Literal, $PrefixDecl, $QName, $bNode, $Prefix,
  104. // $Univar, 'a', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment);
  105. $t = array( $Datatype_URI,$Datatype,$LLiteral, $URI, $Literal, $PrefixDecl, $QName, $bNode, $Prefix, $Univar, 'a','=', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment,$LangTag);
  106. $this->Tokens="/(".join($t,"|").")/m";
  107.  
  108. $this->bNode=0;
  109. $this->debug=0;
  110. $this->bNodeMap = array();
  111. $this->FixBnodes = FIX_BLANKNODES;
  112. $this->parseError=false;
  113. }
  114.  
  115. /**
  116. * Sets, if BlankNode labels should be replaced by the generic label from the constants.php file
  117. * default is "false" -> the used label in n3 is parsed to the model
  118. * @param boolean
  119. * @access public
  120. ***/
  121. function setFixBnodes($set) {
  122.  
  123. if (($set===true) OR ($set===false)) $this->FixBnodes = $set;
  124. }
  125. /**
  126. * This parses a N3 string and prints out the triples
  127. * @param string $s
  128. * @access public
  129. ***/
  130. function parse($s) {
  131. // """Get a string, tokenize, create list, convert to Eep store."""
  132. $stat=$this->n3tolist($s);
  133. foreach ( $stat as $t) {
  134.  
  135. if (count($t)>3) {
  136. $object=$t[2];
  137.  
  138. for ($i = 3; $i < 5; $i++){
  139. if ($t[$i][0]=='@')$object.=$t[$i];
  140. if (substr($t[$i],0,2)=='^^')$object.=$t[$i];
  141. };
  142. } else {$object=$t[2];};
  143. print '('.$t[0].', '.$t[1].', '.$object.")\n";
  144.  
  145. }
  146. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  147. // for t in n3tolist(s)]
  148. }
  149.  
  150. /**
  151. * This parses a N3 string and calls func($subject, $predicate, $object) with each trioke
  152. * @param string $s
  153. * @param string $func
  154. * @access public
  155. ***/
  156. function uparse($s,$func) {
  157. // """Get a string, tokenize, create list, convert to Eep store."""
  158. $stat=$this->n3tolist($s);
  159. foreach ( $stat as $t) {
  160.  
  161. if (count($t)>3) {
  162. $object=$t[2];
  163.  
  164. for ($i = 3; $i < 5; $i++){
  165. if ($t[$i][0]=='@')$object.=$t[$i];
  166. if (substr($t[$i],0,2)=='^^')$object.=$t[$i];
  167. };
  168. } else {$object=$t[2];};
  169. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  170. $func($t[0],$t[1],$object);
  171. }
  172. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  173. // for t in n3tolist(s)]
  174. }
  175.  
  176.  
  177. /**
  178. * This parses a N3 string and returns a memmodel
  179. * @param string $s
  180. * @access public
  181. * @return object Model
  182. ***/
  183.  
  184. function parse2model($s,$model = false) {
  185. if($model == false){
  186. $m=new MemModel();
  187. }else{
  188. $m=$model;
  189. }
  190. // """Get a string, tokenize, create list, convert to Eep store."""
  191. $stat=$this->n3tolist($s);
  192.  
  193. foreach ( $stat as $t) {
  194. $s=$this->toRDFNode($t[0],$t);
  195. $p=$this->toRDFNode($t[1],$t);
  196. $o=$this->toRDFNode($t[2],$t);
  197. $new_statement= new Statement($s,$p,$o);
  198.  
  199. $m->add($new_statement);
  200. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  201. }
  202. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  203. // for t in n3tolist(s)]
  204. $m->addParsedNamespaces($this->parsedNamespaces);
  205. return $m;
  206. }
  207.  
  208. /**
  209. * Generate a new MemModel from an URI or file.
  210. *
  211. * @access public
  212. * @param $path
  213. * @throws PhpError
  214. * @return object MemModel
  215. */
  216. function & generateModel($path,$dummy=false,$model=false) {
  217.  
  218. $handle = fopen($path,'r') or die("N3 Parser: Could not open File: '$path' - Stopped parsing.");
  219. $done=false;
  220. $input="";
  221. while(!$done)
  222. {
  223. $input .= fread( $handle, 512 );
  224. $done = feof($handle);
  225. };
  226.  
  227. fclose($handle);
  228.  
  229. return $this->parse2model($input,$model);
  230. }
  231. /* ==================== Private Methods from here ==================== */
  232.  
  233. // General list processing functions
  234.  
  235.  
  236.  
  237. /**
  238. * Returns FALSE if argument is a whitespace character
  239. * @access private
  240. * @param string $s
  241. ***/
  242. function isWS($s) {
  243. if ($s{0}=='#') return false;
  244. $ws=array("", " ","\t","\n","\r");
  245. return !in_array($s,$ws);
  246. }
  247.  
  248. /**
  249. * Callback function for trimming whitespace from lines
  250. * @access private
  251. * @param string
  252. ***/
  253. function trimLine( &$l, $i) {
  254. $l=trim($l);
  255. }
  256.  
  257. /**
  258. * Returns true if the string is not a comment
  259. * @access private
  260. * @param string $s
  261. * @returns boolean
  262. ***/
  263. function notComment($s) {
  264. if ($s=="") return false;
  265. $N3Comment = '^[ \t]*\#';
  266. if (ereg($N3Comment,$s)) return false;
  267. else return true;
  268. }
  269.  
  270. /**
  271. * Removes all whitespace tokens from list
  272. * @access private
  273. * @param array $list
  274. ***/
  275. function filterWs($list) {
  276. // var_dump($list);
  277. // """Filter whitespace from a list."""
  278.  
  279. return array_filter($list, array($this,"isWS"));
  280. }
  281. /**
  282. * converts a string to its unicode NFC form (e.g. \uHHHH or \UHHHHHHHH).
  283. *
  284. * @param String $str
  285. * @return String
  286. * @access private
  287. *
  288. */
  289. function str2unicode_nfc($str=""){
  290. $result="";
  291. /* try to detect encoding */
  292. $tmp=str_replace("?", "", $str);
  293. if(strpos(utf8_decode($tmp), "?")===false){
  294. $str=utf8_decode($str);
  295. }
  296. for($i=0,$i_max=strlen($str);$i<$i_max;$i++){
  297. $nr=0;/* unicode dec nr */
  298. /* char */
  299. $char=$str[$i];
  300. /* utf8 binary */
  301. $utf8_char=utf8_encode($char);
  302. $bytes=strlen($utf8_char);
  303. if($bytes==1){
  304. /* 0####### (0-127) */
  305. $nr=ord($utf8_char);
  306. }
  307. elseif($bytes==2){
  308. /* 110##### 10###### = 192+x 128+x */
  309. $nr=((ord($utf8_char[0])-192)*64) + (ord($utf8_char[1])-128);
  310. }
  311. elseif($bytes==3){
  312. /* 1110#### 10###### 10###### = 224+x 128+x 128+x */
  313. $nr=((ord($utf8_char[0])-224)*4096) + ((ord($utf8_char[1])-128)*64) + (ord($utf8_char[2])-128);
  314. }
  315. elseif($bytes==4){
  316. /* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
  317. $nr=((ord($utf8_char[0])-240)*262144) + ((ord($utf8_char[1])-128)*4096) + ((ord($utf8_char[2])-128)*64) + (ord($utf8_char[3])-128);
  318. }
  319. /* result (see http://www.w3.org/TR/rdf-testcases/#ntrip_strings) */
  320. if($nr<9){/* #x0-#x8 (0-8) */
  321. $result.="\\u".sprintf("%04X",$nr);
  322. }
  323. elseif($nr==9){/* #x9 (9) */
  324. $result.='\t';
  325. }
  326. elseif($nr==10){/* #xA (10) */
  327. $result.='\n';
  328. }
  329. elseif($nr<13){/* #xB-#xC (11-12) */
  330. $result.="\\u".sprintf("%04X",$nr);
  331. }
  332. elseif($nr==13){/* #xD (13) */
  333. $result.='\t';
  334. }
  335. elseif($nr<32){/* #xE-#x1F (14-31) */
  336. $result.="\\u".sprintf("%04X",$nr);
  337. }
  338. elseif($nr<34){/* #x20-#x21 (32-33) */
  339. $result.=$char;
  340. }
  341. elseif($nr==34){/* #x22 (34) */
  342. $result.='\"';
  343. }
  344. elseif($nr<92){/* #x23-#x5B (35-91) */
  345. $result.=$char;
  346. }
  347. elseif($nr==92){/* #x5C (92) */
  348. $result.='\\';
  349. }
  350. elseif($nr<127){/* #x5D-#x7E (93-126) */
  351. $result.=$char;
  352. }
  353. elseif($nr<65536){/* #x7F-#xFFFF (128-65535) */
  354. $result.="\\u".sprintf("%04X",$nr);
  355. }
  356. elseif($nr<1114112){/* #x10000-#x10FFFF (65536-1114111) */
  357. $result.="\\U".sprintf("%08X",$nr);
  358. }
  359. else{
  360. /* other chars are not defined => ignore */
  361. }
  362. }
  363. return $result;
  364. }
  365.  
  366.  
  367. /**
  368. * Gets a slice of an array.
  369. * Returns the wanted slice, as well as the remainder of the array.
  370. * e.g. getSpan(['p', 'q', 'r'], 1, 2) gives (['q'], ['p', 'r'])
  371. * @return array
  372. * @access private
  373. * @param array $list
  374. * @param integer $start
  375. * @param integer $end
  376. ***/
  377. function getSpan($list, $start, $end) {
  378. $pre=array_slice($list, 0, $start);
  379. $post=array_slice($list, $end);
  380.  
  381. return array(array_slice($list, $start,$end-$start),$this->array_concat($pre,$post));
  382. }
  383.  
  384.  
  385. /**
  386. * Concatenates two arrays
  387. * @param array $a
  388. * @param array $b
  389. * @returns array
  390. * @access private
  391. ***/
  392. function array_concat($a, $b) {
  393. array_splice($a,count($a),0,$b);
  394. return $a;
  395. }
  396.  
  397. /**
  398. * Returns an array with all indexes where item appears in list
  399. * @param array $list
  400. * @param string $item
  401. * @returns array
  402. * @access private
  403. ***/
  404. function posns($list, $item) {
  405. $res=array();
  406. $i=0;
  407. foreach ( $list as $k=>$v) {
  408. if ($v === $item ) $res[]=$i;
  409. $i++;
  410. }
  411. $res[]=$i;
  412. return $res;
  413. }
  414.  
  415.  
  416. /* More N3 specific functions */
  417.  
  418. /**
  419. * Returns a list of tokens
  420. * @param string $s
  421. * @returns array
  422. * @access private
  423. ***/
  424. function toke($s) {
  425.  
  426. // print "$s\n";
  427. // """Notation3 tokenizer. Takes in a string, returns a raw token list."""
  428. if (strlen($s) == 0) die('Document has no content!');
  429.  
  430. $s=str_replace("\r\n","\n",$s);
  431. $s=str_replace("\r","\n",$s);
  432.  
  433.  
  434. //$lines=explode("\n",$s);
  435.  
  436. //$reallines=array_filter($lines, array($this, "notComment"));
  437. // print "LINES: ".join($reallines, " ")." :LINES\n";
  438. //array_walk($reallines, array($this, "trimLine"));
  439. //$res=array();
  440.  
  441. // foreach ($reallines as $l) {
  442. //preg_match_all($this->Tokens, $l, $newres);
  443. //$res=$this->array_concat($res,$newres[0]);
  444. //}
  445.  
  446. $res=array();
  447. preg_match_all($this->Tokens, $s, $newres);
  448.  
  449. $res=$this->array_concat($res,$newres[0]);
  450. return $res;
  451. }
  452. /**
  453. * Returns a list with the elements between start and end as one quoted string
  454. * e.g. listify(["a","b","c","d"],1,2) => ["a","b c", "d"]
  455. * @param array $list
  456. * @param integer $start
  457. * @param integer $end
  458. * @returns array
  459. * @access private
  460. ***/
  461. function listify($list, $start, $end) {
  462.  
  463. //Re-form a list, merge elements start->end into one quoted element
  464. //Start and end are offsets...
  465.  
  466. $l=$end-$start;
  467.  
  468. $s=array_slice($list, 0, $start);
  469. $m=array_slice($list, $start,$l);
  470. $e=array_slice($list, $end);
  471. // array_push($s,"\"".join($m," ")."\"");
  472. array_push($s,$m);
  473. return $this->array_concat($s,$e);
  474. }
  475.  
  476. /**
  477. * Returns an array with prefixes=>namespace mappings
  478. * @param array $list
  479. * @access private
  480. * @returns array
  481. ***/
  482. function getPrefixes($list) {
  483.  
  484. $prefixes=array();
  485. $ns=1;
  486. $name=2;
  487. foreach ($list as $l) {
  488. if ($l=='@prefix') {
  489. // while '@prefix' in list {
  490. $pos=current($list);
  491. //pos = list.index('@prefix')
  492. $r = $this->getSpan($list, $pos, ($pos+4)); # processes the prefix tokens
  493. $binding=$r[0];
  494. $list=$r[1];
  495. $prefixes[$binding[$ns]] = substr($binding[$name],1,-1);
  496. $this->parsedNamespaces[substr($binding[$name],1,-1)] = substr($binding[$ns],0,-1);
  497. }
  498. }
  499. if (count($prefixes)<1) $list= array_slice($list,0);
  500. return array($prefixes, $list);
  501. }
  502.  
  503. /**
  504. * Callback function for replacing "a" elements with the right RDF uri.
  505. * @param string $l
  506. * @access private
  507. ***/
  508. function replace_a_type(&$l,$p) {
  509. if ($l=='a') $l='<'.$this->RDF_NS.'type>';
  510. }
  511.  
  512. /**
  513. * Callback function for replacing "=" elements with the right DAML+OIL uri.
  514. * @param string $l
  515. * @access private
  516. ***/
  517. function replace_equal(&$l,$p) {
  518. if ($l=='=') $l='<'.$this->OWL_NS.'sameAs>';
  519. }
  520.  
  521. /**
  522. * Callback function for replacing "this" elements with the right RDF uri.
  523. * @param string $l
  524. * @access private
  525. ***/
  526. function replace_this($l,$p) {
  527. if ($l=='this') $l='<urn:urn-n:this>';
  528. }
  529.  
  530. /**
  531. * Applies stuff :)
  532. * Expands namespace prefixes etc.
  533. * @param array $prefixes
  534. * @param array $list
  535. * @returns $list
  536. * @access private
  537. ***/
  538. function applyStuff($prefixes, $list) {
  539.  
  540. array_walk($list, array($this, 'replace_a_type'));
  541. array_walk($list, array($this, 'replace_equal'));
  542. array_walk($list, array($this, 'replace_this'));
  543.  
  544. for ($i=0;$i<count($list);$i++) {
  545. // for i in range(len(list)) {
  546. // if (!strstr('<_"?.;,{}[]()',$list[$i]{0})) {
  547.  
  548.  
  549. // if a <> resource occours, change it to the parsed filename or local URI + timestamp
  550.  
  551. if ($list[$i]=='<>') {
  552. if (!isset($path)) {
  553. if (!isset($_SERVER['SERVER_ADDR'])) $_SERVER['SERVER_ADDR']='localhost';
  554. if (!isset($_SERVER['REQUEST_URI'])) $_SERVER['REQUEST_URI']='/rdfapi-php';
  555. $list[$i]='<http://'.$_SERVER['SERVER_ADDR'].$_SERVER['REQUEST_URI'].'#generate_timestamp_'.time().'>';
  556. }else {$list[$i]='<'.$path.'>';};
  557. };
  558.  
  559.  
  560. if ((!strstr('<_"?.;,{}[]()@',$list[$i]{0}))AND (substr($list[$i],0,3)!='^^<')) {
  561. $_r= explode(":",$list[$i]);
  562.  
  563.  
  564.  
  565.  
  566.  
  567. $ns=$_r[0].':';
  568. $name=$_r[1];
  569. if (isset($prefixes[$ns])) $list[$i] = '<'.$prefixes[$ns].$name.'>';
  570. else if (isset($prefixes[substr($ns,2)])) $list[$i] = '^^'.$prefixes[substr($ns,2)].$name.'';
  571. else {
  572. #die('Prefix not declared:'.$ns);
  573. $this->parseError=true;
  574. trigger_error('Prefix not declared: '.$ns, E_USER_ERROR);
  575. break;
  576. }
  577. } else {
  578. if ($list[$i]{0} == '"') { // Congratulations - it's a literal!
  579. if (substr($list[$i],0,3) == '"""') {
  580. if (substr($list[$i],-3,3) == '"""') { // A big literal...
  581. $lit = substr($list[$i],3,-3);
  582. // print "++$lit++";
  583. $lit=str_replace('\n', '\\n',$lit);
  584.  
  585. $lit=ereg_replace("[^\\]\"", "\\\"", $lit);
  586.  
  587. $list[$i] = '"'.$lit.'"';
  588. }
  589. else { die ('Incorrect string formatting: '.substr($list[$i],-3,3)); }
  590. } else {
  591. if (strstr($list[$i],"\n")) die('Newline in literal: '+$list[$i]);
  592. }
  593. }
  594. }
  595. if (substr($list[$i],0,2)=='^^') {
  596. if ($list[$i][2]!='<'){$list[$i]='^^<'.substr($list[$i],2).'>';};
  597. };
  598.  
  599. }
  600.  
  601.  
  602. return $list;
  603. }
  604.  
  605. /**
  606. * Returns an array of triples extracted from the list of n3 tokens
  607. * @param array $list
  608. * @returns array
  609. * @access private
  610. ***/
  611. function getStatements($list) {
  612.  
  613.  
  614. $statements = array();
  615.  
  616. while (in_array('.', $list)) {
  617. // for($i=0;$i<count($list); $i++) {
  618. // if ($list[$i]==".") {
  619. // while '.' in list {
  620. $pos=array_search('.',$list);
  621.  
  622. $r=$this->getSpan($list, 0, $pos+1);
  623.  
  624. $statement=$r[0];
  625. $list = $r[1];
  626.  
  627. array_pop($statement);
  628. $statements[]=$statement;
  629. }
  630.  
  631. return $statements;
  632. }
  633. /**
  634. * Gets a list of triples with same subject
  635. * e.g. :Gunnar :firstname "Gunnar" ; :lastname "Grimnes.
  636. * @param array $list
  637. * @returns array
  638. * @acces private
  639. ***/
  640. function getPovs($list) {
  641. $povs = array();
  642. while (in_array(';', $list)) {
  643. $r=$this->posns($list,';');
  644. $pos=array_slice($r,0,2);
  645. $r = $this->getSpan($list, $pos[0], $pos[1]);
  646. $pov=$r[0];
  647. $list=$r[1];
  648.  
  649. $povs[]=array_slice($pov,1);
  650. }
  651.  
  652. return array($list, $povs);
  653. }
  654.  
  655. /**
  656. * Gets a list of triples with same predicate
  657. * e.g. :Gunnar :likes "Cheese", "Wine".
  658. * @access private
  659. * @param array $list
  660. * @returns array
  661. ***/
  662. function getObjs($list) {
  663.  
  664.  
  665. $objs = array();
  666. while (in_array(",",$list)) {
  667. $pos=array_search(",",$list);
  668. // for($i=0;$i<count($list); $i++) {
  669. // if ($list[$i]==",") {
  670. // while ',' in list {
  671.  
  672.  
  673. $get_array_fields=2;
  674. if (isset ($list[$pos+2])) {
  675. if (@$list[$pos+2][0]=='@') $get_array_fields++;
  676. if (@$list[$pos+2][0]=='^') $get_array_fields++;
  677. };
  678. if (isset ($list[$pos+3])) { if (@$list[$pos+3][0]=='^') $get_array_fields++;};
  679.  
  680.  
  681. $r=$this->getSpan($list, $pos, ($pos+$get_array_fields));
  682.  
  683. $obj=$r[0];
  684. if (!isset($obj[2])) $obj[2]=' ';
  685. if (!isset($obj[3])) $obj[3]=' ';
  686. $list=$r[1];
  687.  
  688. $objs[]=$obj;
  689.  
  690. }
  691. return array($list, $objs);
  692. }
  693.  
  694. /**
  695. * Does the real work, returns a list of subject, predicate, object triples.
  696. * @param array $list
  697. * @returns array
  698. * @access private
  699. ***/
  700. function statementize($list) {
  701.  
  702. if (count($list) == 1 && preg_match("/_".BNODE_PREFIX."[0-9]+_/",$list[0])) {
  703. if ($this->debug) print "Ignored bNode exists statement. $list\n";
  704. return array();
  705. }
  706.  
  707. if (count($list) == 3) return array($list);
  708. if (count($list) < 3) die("Error: statement too short!");
  709.  
  710. //Get all ;
  711. $r=$this->getPovs($list);
  712. $spo=$r[0];
  713. $po=$r[1];
  714. $all=array();
  715.  
  716.  
  717.  
  718. // (spo, po), all = getPovs(list), []
  719. $subject = $spo[0];
  720. foreach ($po as $pop) {
  721. // for pop in po {
  722. $r=$this->getObjs($pop);
  723.  
  724. $myPo=$r[0];
  725. $obj=$r[1];
  726. //myPo, obj = getObjs(pop)
  727.  
  728. if (!isset($myPo[2])) $myPo[2]=' ';
  729. if (!isset($myPo[3])) $myPo[3]=' ';
  730. $predicate = $myPo[0];
  731. $all[]=array($subject,$predicate,$myPo[1],$myPo[2],$myPo[3]);
  732. // all.append([subject, predicate, myPo[1]])
  733.  
  734.  
  735. foreach ($obj as $o) $all[]=array($subject,$predicate, $o[1],$o[2],$o[3]);
  736. // for x in obj: all.append([subject, predicate, x])
  737.  
  738. }
  739.  
  740.  
  741.  
  742. $r = $this->getObjs($spo);
  743. $spo=$r[0];
  744.  
  745. $objs=$r[1];
  746.  
  747. //spo, objs = getObjs(spo)
  748. $subject=$spo[0];
  749. $predicate=$spo[1];
  750.  
  751. if(!isset($spo[3])) $spo[3]=' ';
  752. if(!isset($spo[4])) $spo[4]=' ';
  753. $all[]=array($subject, $predicate, $spo[2],$spo[3],$spo[4]);
  754.  
  755. foreach ($objs as $obj) $all[]=array($subject, $predicate, $obj[1],$obj[2],$obj[3]);
  756.  
  757. return $all;
  758. }
  759.  
  760. /**
  761. * Makes lists of elements in list into a seperate array element.
  762. * e.g. doLists(["a","b","[","c","]","d"], "[","]")=> ["a","b", ["c"], "d"]
  763. * @param array $list
  764. * @param string $schar
  765. * @param string $echar
  766. * @returns array
  767. * @access private
  768. ***/
  769. function doLists($list, $schar, $echar) {
  770.  
  771. while (in_array($schar, $list)) {
  772. // while schar in list {
  773. $ndict=array();
  774. $nestingLevel=0;
  775. $biggest=0;
  776. for ($i=0;$i<count($list);$i++) {
  777. if ($list[$i] == $schar) {
  778. $nestingLevel += 1;
  779. if (!in_array($nestingLevel, array_keys($ndict))) {
  780. $ndict[$nestingLevel] = array(array($i));
  781. } else {
  782. $ndict[$nestingLevel][]=array($i);
  783. }
  784. }
  785. if ($list[$i] == $echar) {
  786. if (!in_array($nestingLevel, array_keys($ndict))) {
  787. $ndict[$nestingLevel]=array(array($i));
  788. } else {
  789. $ndict[$nestingLevel][count($ndict[$nestingLevel])-1][]=$i;
  790. $nestingLevel-= 1;
  791. # elif type(list[i]) == type([]) {
  792. # list[i] = doLists(list[i], schar, echar)
  793. }
  794. }
  795. }
  796. foreach (array_keys($ndict) as $key)
  797. if ($key > $biggest) $biggest = $key;
  798.  
  799. $tol = $ndict[$biggest][0];
  800. $list = $this->listify($list, $tol[0], ($tol[1]+1));
  801. }
  802. return $list;
  803. }
  804.  
  805. /**
  806. * Apply doLists for all different types of list.
  807. * @param array
  808. * @returns array
  809. * @access private
  810. ***/
  811. function listStuff($list) {
  812. # y, z = zip(['[', ']'], ['{', '}'], ['(', ')'])
  813. # return map(doLists, [list, list, list], y, z).pop()
  814. $list = $this->doLists($list, '[', ']');
  815. $list = $this->doLists($list, '{', '}');
  816. return $this->doLists($list, '(', ')');
  817. }
  818.  
  819. /**
  820. * Generates a new node id.
  821. * @access private
  822. * @returns string
  823. ***/
  824. function bnodeID() {
  825. $this->bNode++;
  826. return "_".BNODE_PREFIX.$this->bNode."_";
  827. }
  828.  
  829. /**
  830. * This makes bNodes out of variables like _:a etc.
  831. * @access private
  832. * @param array $list
  833. * @returns array
  834. ***/
  835. function fixAnon($list) {
  836. // $map=array();
  837. for($i=0;$i<count($list);$i++) {
  838. $l=$list[$i];
  839. if (substr($l,0,2)=="_:") {
  840. if (!isset($this->bNodeMap[$l])) {
  841. $a=$this->bnodeID();
  842. $this->bNodeMap[$l]=$a;
  843. } else $a=$this->bNodeMap[$l];
  844. $list[$i]=$a;
  845. }
  846. }
  847. return $list;
  848. }
  849.  
  850. /**
  851. * This makes [ ] lists into bnodes.
  852. * @access private
  853. * @param array $list
  854. * @return array
  855. ***/
  856. function expandLists($list) {
  857.  
  858. for($i=0;$i<count($list);$i++) {
  859. if (is_array($list[$i])) {
  860. if ( $list[$i][0]=='[' ) {
  861. $bnode=$this->bnodeID();
  862. $prop=$list[$i];
  863. $list[$i]=$bnode;
  864. $list[]=$bnode;
  865. $list=$this->array_concat($list, array_slice($prop,1,-1));
  866. $list[]='.';
  867. } else {
  868. die('Only [ ] lists are supported!');
  869. }
  870. }
  871. }
  872. return $list;
  873. }
  874.  
  875. /**
  876. * Main work-horse function. This converts a N3 string to a list of statements
  877. * @param string $s
  878. * @returns array
  879. * @access private
  880. ***/
  881. function n3tolist($s) {
  882.  
  883. // """Convert an N3 string into a list of triples as strings."""
  884. $result = array();
  885.  
  886. $t = $this->filterWs($this->toke($s)); # tokenize the stream, and filter whitespace tokens
  887.  
  888. if ($this->debug) {
  889. print "Filter WS:\n";
  890. var_dump($t);
  891. }
  892. $r=$this->getPrefixes($t); # get the prefix directives, and add to a dict
  893. $prefixes=$r[0];
  894. $t=$r[1];
  895. if ($this->debug) {
  896. print "Prefixes:\n";
  897. var_dump($prefixes);
  898. print "***\n";
  899. var_dump($t);
  900. }
  901. $t=$this->applyStuff($prefixes, $t);#apply prefixes, keywords, and string formatting
  902. if ($this->debug) {
  903. print "Stuff applied:\n";
  904. var_dump($t);
  905. }
  906.  
  907. $t=$this->fixAnon($t); # fix _:a anons
  908. if ($this->debug) {
  909. print "Fix anon:\n";
  910. var_dump($t);
  911. }
  912. $t = $this->listStuff($t); # apply list stuff: todo
  913. if ($this->debug) {
  914. print "Lists done:\n";
  915. var_dump($t);
  916. }
  917. $t=$this->expandLists($t);
  918. if ($this->debug) {
  919. print "Lists applied:\n";
  920. var_dump($t);
  921. }
  922. $t = $this->getStatements($t); # get all of the "statements" from the stream
  923.  
  924. foreach ($t as $stat) {
  925. $stats=$this->statementize($stat);
  926. foreach ($stats as $y) {
  927. $result[]=$y;
  928. }
  929. }
  930. // for x in [statementize(stat) for stat in t] {
  931. // for y in x: result.append(y)
  932. return $result;
  933. }
  934.  
  935. /**
  936. * Constructs a RAP RDFNode from URI/Literal/Bnode
  937. * @access private
  938. * @param string $s
  939. * @returns object RDFNode
  940. ***/
  941. function toRDFNode($s,$state) {
  942. $ins=substr($s,1,-1);
  943. if ($s{0}=="\"") {
  944. $lang=NULL;
  945.  
  946.  
  947. if (count($state)>3) {
  948.  
  949.  
  950. for ($i = 3; $i < count($state); $i++){
  951. if ($state[$i][0]=='@')$lang=substr($state[3],1);
  952. if (substr($state[$i],0,2)=='^^'){
  953. $dtype=substr($state[$i],2);
  954. if ($dtype[0]=='<') $dtype= substr($dtype,1,-1);
  955. };
  956. };
  957. };
  958. if(UNIC_RDF){
  959. $ins=$this->str2unicode_nfc($ins);
  960. }
  961. $new_Literal=new Literal($ins,$lang);
  962. if (isset($dtype)) $new_Literal->setDatatype($dtype);
  963. return $new_Literal;
  964. };
  965.  
  966. if (strstr($s,'_'.BNODE_PREFIX)) {
  967. if (($this->FixBnodes) OR (!array_search($s,$this->bNodeMap))) {
  968. return new BlankNode($ins);
  969. } else {return new BlankNode(trim(substr(array_search($s,$this->bNodeMap),2)));
  970. };
  971. }
  972. return new Resource($ins);
  973. }
  974.  
  975.  
  976. } //end: N3Parser
  977.  
  978. ?>

Documentation generated on Fri, 17 Dec 2004 16:16:06 +0100 by phpDocumentor 1.3.0RC3